[mpich-discuss] Killed by Signal 9!!!!???

Burlen Loring burlen.loring at kitware.com
Mon Jul 28 08:24:47 CDT 2008


Hi,
In my experience a number of things could cause this. e.g. I have seen 
this if I run out of memory. The kernel steps in and kills the process. 
There might be some info in dmesg, have you looked? Might be something 
entirely different as well, such as too many open files. There are a 
bunch of settings you can tweak take a look at your environment settings 
via ulimit -a.
Burlen

Gholamreza Sobhaninejad wrote:
> Dear All,
>
> Hi,
> This is a prototype program which I am going to implement it in 
> another program. I want to send objects. The program works very well 
> when 'shapeset' is small. But when I increase number of CPU, and 
> Shapeset size , I get the following error message:
>
> "rank 0 in job 12  localhost.localdomain_60694   caused collective 
> abort of all ranks
>   exit status of rank 0: killed by signal 9 "
>
> I have a master process which works on Rank 0 and Slaves which are 
> working on Other ranks.
> Master serialize (write all the information in a char buffer) a Shape 
> object and send it through MPI::Send. Slaves are reciving the Objects 
> and Deserialize it and call a function pointer to operate on the object.
> Sorry the code is little bit long.
>
> I really appreciate your help. I need some advices. I am working on 
> Linux-Fedora with dual core cpu.
>
> Sincerely,
>
> -- 
> Gholamreza Sobhaninejad(Reza)
> PhD candidate, University of Tokyo
> Earthquake Research Institute
> Tel: 090-9807-2477
>
>
> This is a code that I am trying to run it:
> ------------------------------------------------------------------------
> head.h :
> #include <mpi.h>
> #include <stdio.h>
> #include <cstdlib>
> #include <iostream>
> #include <string>
> #include <sstream>
> #include <fstream>
> #include <vector>
> #include <time.h>
>
> using namespace std;
>
> const char delim = ' ';
>
> #include "Node.h"
> #include "Shape.h"
>
> #include "MPI_Process.h"
>
> ------------------------------------------------------------------------
> Node.h
> #ifndef _NODE_H_
> #define _NODE_H_
>
> static const string Node_head="<\\Node>";
> static const string Node_tail="<Node\\>";
>
> class Node
> {
>
> public:
>     int id;
>     double x,y,z;
>     ///---- Constructur, Destructor, Copy Constructor
>
>     Node():x(0),y(0),z(0),id(0) {}
>     ~Node() {}
>     Node(int ID, double X=0, double Y=0, double Z=0) {id = ID; x=X; 
> y=Y; z=Z;}
>     Node(const Node& anode) {id=anode.id <http://anode.id>; x=anode.x; 
> y= anode.y; z=anode.z;}
>
>     //---- Mutator and Accessors
>     void SetID(int ID) {id = ID;}
>     void Set(int ID, double X=0, double Y=0, double Z=0) {id = ID; 
> x=X; y=Y; z=Z;}
>
>     Node Get() {return *this;}
>
>
>
>
>     //---- Serialization and De serialization
>     void Serialize(stringstream& out)
>     {
>         out << delim << Node_head << delim << this->id << delim << 
> this->x << delim
>             << this->y << delim << this->z << delim <<Node_tail << '\n';
>     }
>
>     void DeSerialize(stringstream& in)
>     {
>         string tmp;
>
>         in >> tmp >> this->id >>this->x >> this->y >> this->z >>tmp;
>
>     }
>
>
> };
>
> #endif
> ---------------------------------------------------------------------------------------------
> MPI_Process.h :
> #ifndef _MPI_PROCESS_H_
> #define _MPI_PROCESS_H_
>
> ///---- Message Passing Tags
>
> #define WORKTAG 1
> #define LENTAG  3
> #define DIETAG 2
> #define SLAVE_GET_LENGTH 4
> #define SLAVE_WORK_FINISH 5
>
> class MPI_Process
> {
> private:
>     int* source_ID;
>     int* current_ID;
>     int* num_Process ;
>     int* length ;
>
>     MPI::Status status;
>
>     void Probe_Message_Length();
>
> public:
>     MPI_Process(void);
>     ~MPI_Process(void);
>
>     void Kill_Process(int rank);
>
>     //-- Set & Get
>
>     void Set_Num_Proc(int i);
>     int  Get_Num_Proc();
>
>     void Set_SourceID(int i);
>     int Get_SourceID();
>
>     void Set_CurrentID(int i);
>     int  Get_CurrentID();
>
>     int Send_Serializable_Object     ( void* Object ,
>                                        int  rank    ,
>                                        void (*pt2function)(void* 
> Object,stringstream& stream)
>                                       );
>
>     int Recieve_Serializable_Object  ( void* Object,
>                                        void (*pt2function)(void* 
> Object,stringstream& stream)
>                                      );
>
> };
>
>
> #endif
> ----------------------------------------------------------------------------------
> Shape.h :
>
> #ifndef _SHAPE_H_
> #define _SHAPE_H_
>
>
> static const string Shape_head="<\\Shape>";
> static const string Shape_tail="<Shape\\>";
>
> string    Int2Str(int num);
>
>
> class Shape
> {
> public :
>     int id;
>     vector<Node> nodeset;
>
>     Shape() : id(0) {}
>     ~Shape() {nodeset.clear();}
>
>     Shape(const Shape& aShape);
>
>     void SetID(int ID) {id = ID;}
>     void MakeShape();
>     void Serialize(stringstream& out);
>     void DeSerialize(stringstream& in);
>
>     void Write2File()
>     {
>         stringstream ss;
>
>         ofstream out( ("Shape" + Int2Str(this->id)+".shp").c_str());
>             this->Serialize(ss);
>             out.write(ss.str().c_str(), ss.str().size());
>         out.close();
>         ss.clear();
>     }
>
>
>     static void Wrapper_To_MPI_SEND (void* Object, stringstream& out);
>
>     static void Wrapper_To_MPI_RECIVE (void* Object, stringstream& in);
>    
> };
>
> #endif
> -------------------------------------------------------------------------------------------
> MPI_Process.cpp :
>
> #include "head.h"
> #include <string.h>
> MPI_Process::MPI_Process(void){}
>
> MPI_Process::~MPI_Process(void){}
>
> void MPI_Process::Set_Num_Proc(int i)
> {
>     this->num_Process = new int;
>     *this->num_Process = i;
> }
>
> int MPI_Process::Get_Num_Proc()
> {
>     return *this->num_Process;
> }
>
> void MPI_Process::Set_SourceID(int i)
> {
>      this->source_ID  = new int;
>      *this->source_ID = i;
> }
>
> int MPI_Process::Get_SourceID()
> {
>     return *this->source_ID;
> }
>
> void MPI_Process::Set_CurrentID(int i)
> {
>     this->current_ID = new int;
>     *this->current_ID = i;
> }
>
> void MPI_Process::Probe_Message_Length()
> {
>     MPI::COMM_WORLD.Probe(*this->source_ID,WORKTAG,this->status);
>     this->length = new int(status.Get_count(MPI::CHAR) );
> }
>
>
> int MPI_Process::Send_Serializable_Object(void* Object, int rank, void 
> (*pt2function)(void* Object,stringstream& stream))
> {
>     stringstream * stream;
>     char* buffer;
>    
>         int message_length;
>    
>     stream = new stringstream();
>    
>     //-- Serialize   
>     pt2function(Object,*stream);
>    
>     message_length = stream->str().length();
>     buffer = new char[message_length];
>
>        
>     strcpy(buffer,stream->str().c_str());
>
>     MPI::COMM_WORLD.Send(buffer,message_length,MPI::CHAR,rank,WORKTAG);
>        
>        
>         delete stream;
>         delete[] buffer;
>       
>         return message_length ;
> }
>
> int MPI_Process::Recieve_Serializable_Object(void* Object, void 
> (*pt2function)(void* Object,stringstream& stream))
> {   
>     stringstream * stream;
>     char* buffer;
>
>          
>     this->Probe_Message_Length();
>    
>     buffer = new char[ *this->length ];
>        
>         if(*this->length == 1 ) cout << buffer <<endl;
>
>
>
>     MPI::COMM_WORLD.Recv(buffer,*this->length,MPI::CHAR, 0 ,WORKTAG);
>    
>     if(buffer[0] =='0')
>     {
>                 delete stream;
>                 delete[] buffer;
>        
>                 return 0;
>     }
>     else
>     {
>         stream = new stringstream();   
>
>         stream->str(buffer);
>
>         pt2function(Object,*stream);
>
>         stream->clear();
>                 delete stream;
>                 delete[] buffer;
>                
>                
>         return *this->length;
>          }
>
> }
>
> void MPI_Process::Kill_Process(int rank)
> {
>     char *buffer;
>         buffer = new char[1];
>         buffer[0]='0';
>         cout << "Process No: " << rank << "is killed" << '\n';
>         MPI::COMM_WORLD.Send(&buffer,1,MPI::CHAR,rank,WORKTAG);
>         delete[] buffer;
>     
> //MPI::COMM_WORLD.Sendrecv(&buffer,1,MPI::CHAR,rank,WORKTAG,&tmp,1,MPI::INT,rank,WORKTAG);
> }
> ----------------------------------------------------------------------------------------------------------------------
> Shape.h :
> #include "head.h"
>
>
> Shape::Shape(const Shape& aShape)
> {
>     /*this->id = aShape.id;
>    
>     for(vector<Node>::iterator i=aShape.nodeset.begin() ; 
> i!=aShape.nodeset.end(); i++)
>         this->nodeset.push_back(*i);*/
> }
> string    Int2Str(int num)
> {
>     stringstream ss;
>     ss << num;
>     return ss.str();
> }
>
> void Shape::MakeShape()
> {
>     Node n;
>     for(register int i=0;i<10;i++)
>     {
>         //n.Set(i,0.01*i,1.0*i,2.0*i);
>         n.Set(rand()%100,rand()%100,rand()%100,rand()%100);
>         this->nodeset.push_back(n);
>     }
> }
>
> void Shape::Serialize(stringstream& out)
> {
>     cout << "Start of Serialization Processes NO " << 
> MPI::COMM_WORLD.Get_rank()    <<endl;
>
>     out << Shape_head << '\n' <<delim << this->id << delim << (int) 
> this->nodeset.size() << '\n';
>     for(vector<Node>::iterator i=this->nodeset.begin(); i!= 
> this->nodeset.end();i++)
>         i->Serialize(out);
>     out << Shape_tail  <<'\n' ;
> }
>
> void Shape::DeSerialize(stringstream& in)
> {
>     cout << "Start of DeSerialization Processes NO " << 
> MPI::COMM_WORLD.Get_rank()    <<endl;
>
>     string tmp;
>     int    number_of_nodes;
>     in >> tmp >> this->id >> number_of_nodes;
>     this->nodeset.resize(number_of_nodes);
>     for(vector<Node>::iterator i=this->nodeset.begin(); 
> i!=this->nodeset.end();i++)
>         i->DeSerialize(in);
>     in >> tmp;   
> }
>
>
>
> void Shape::Wrapper_To_MPI_SEND(void* Object, stringstream& out)
> {
>     Shape* myself = (Shape*) Object;
>     myself->Serialize(out);
> }
>
>
> void Shape::Wrapper_To_MPI_RECIVE(void *Object, std::stringstream& in)
> {
>     Shape* myself = (Shape*) Object;
>     myself->DeSerialize(in);
>     myself->Write2File();
> }
>
>
> -------------------------------------------------------------------------------------
>
> main.cpp :
> #include "head.h"
>
>
>
> void Slave()
> {
>
>     MPI_Process *slave_process = new MPI_Process();
>    
>     slave_process->Set_SourceID(0);
>     slave_process->Set_CurrentID(MPI::COMM_WORLD.Get_rank());
>    
>     Shape shape;
>
>     while(slave_process->Recieve_Serializable_Object( (void*) &shape, 
> Shape::Wrapper_To_MPI_RECIVE)  != 0)
>     {
>         cout << "Slave No: " << MPI::COMM_WORLD.Get_size() << " 
> Finished Work." <<endl;
>     }
>     cout << "Slave No: " << MPI::COMM_WORLD.Get_size() << " Quit." 
> <<endl;
> }
>
>  void Master(vector<Shape>& shapeset)
> {
>     MPI_Process* master_Process = new MPI_Process();
>    
>     int check(0),rank(1),nproc,counter(0);
>
>     master_Process ->Set_Num_Proc(MPI::COMM_WORLD.Get_size());
>     master_Process->Set_CurrentID(0);
>     master_Process->Set_SourceID(0);
>     nproc = master_Process->Get_Num_Proc();
>        
>        
>        
>         vector<Shape>::iterator i(shapeset.begin());
>         cout << shapeset.size() << endl;
>        
>     for(counter; counter < (int) shapeset.size();counter++,rank++)
>     {
>             if(i!=shapeset.end())
>             {
>         if(rank > nproc-1) rank =1;
>         cout << "Master Send a Message with Lenght of :" <<
>         master_Process->Send_Serializable_Object((void*) &(*i) ,rank, 
> Shape::Wrapper_To_MPI_SEND)
>         << " to CPU Rank:" << rank <<endl;
>                 i++;
>             }
>     }
>
>     cout <<" Start of Turning off System!" << endl;
>
>     for(rank=1; rank< nproc ;  ++rank)
>     {
>             cout << "sending Kill Message to Proc: " << rank <<endl;
>         master_Process->Kill_Process(rank);
>     }
> }
>
>
>
>
>
> int main(int argc, char ** argv)
> {
>     srand( (int) time(NULL));
>    
>
>     int rank,nproc,nwork;
>
>     //--- Start of MPI CALL
>    
>     MPI::Status status;
>     MPI::Init(argc,argv);
>
>     nproc=MPI::COMM_WORLD.Get_size();
>     rank = MPI::COMM_WORLD.Get_rank();
>    
>     if(rank==0)
>     {
>         vector<Shape> shapeset;
>
>         shapeset.resize(10000);
>         int j=0;
>         for( vector<Shape>::iterator i=shapeset.begin(); 
> i!=shapeset.end(); i++, ++j)
>             i->SetID(j);
>
>         for( vector<Shape>::iterator i=shapeset.begin(); 
> i!=shapeset.end(); i++)
>             i->MakeShape();
>        
>         cout << "Shape Initialization Finished" << endl;
>
>         cout << "Processor NO:  " << rank << " Started" <<endl;
>
>         Master(shapeset);
>     }
>     else
>     {
>         cout << "Processor NO:  " << rank << " Started" <<endl;
>         Slave();
>     }
>        
>     MPI::Finalize();
>
>
>     return 0;
> }
>
>
>


-- 
Burlen Loring
Kitware, Inc.
R&D Engineer
28 Corporate Drive
Clifton Park, NY 12065-8662
Phone: 518-371-3971 x137




More information about the mpich-discuss mailing list