[mpich-discuss] SGE & Hydra Problem

Ursula Winkler ursula.winkler at uni-graz.at
Wed Sep 15 02:43:34 CDT 2010


Pavan Balaji schrieb:
> % mpiexec -verbose /bin/true
>   
mpiexec options:
----------------
  Base path: /installadmin/mpich2/test/intel/bin/
  Bootstrap server: (null)
  Debug level: 1
  Enable X: -1

  Global environment:
  -------------------
    REMOTEHOST=ZID178.KFUNIGRAZ.AC.AT
    
MANPATH=/installadmin/sge/man:/software/mpich2/test/intel/share/man:/software/intel/intel_fce_111/man:/software/intel/intel_cce_111/man:/installadmin/sge/man:/usr/share/man/en:/usr/share/man:/usr/local/share/man
    CONSOLE=/dev/console
    SELINUX_INIT=YES
    
INTEL_LICENSE_FILE=/software/intel/intel_fce_111/licenses:/opt/intel/licenses:/usr/people/edvz/winkl/intel/licenses:/software/intel/intel_cce_111/licenses:/software/intel/licenses:/usr/people/edvz/winkl/intel/licenses
    HOST=b00
    TERM=xterm
    HISTSIZE=1000
    SSH_CLIENT=143.50.128.178 34576 22
    SSH_TTY=/dev/pts/3
    GROUP=edvz
    
LD_LIBRARY_PATH=/installadmin/mpich2/test/intel/lib:/software/intel/intel_fce_111/lib/intel64:/software/intel/intel_cce_111/lib/intel64
    LS_COLORS=no
    INIT_VERSION=sysvinit-2.86
    HOSTTYPE=x86_64-linux
    AUTOBOOT=YES
    MAIL=/var/spool/mail/winkl
    runlevel=3
    RUNLEVEL=3
    INPUTRC=/etc/inputrc
    PWD=/usr/people/edvz/winkl/MPI-Test
    SGE_ACCOUNT=sge
    LANG=en_US.UTF-8
    previous=N
    PREVLEVEL=N
    REQNAME=test_nodes.b2
    SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass
    MPI=/installadmin/mpich2/test/intel
    SHLVL=2
    SGE_CWD_PATH=/usr/people/edvz/winkl/MPI-Test
    OSTYPE=linux
    BOOT_IMAGE=2.6.18-194.11.3
    MPIHOME=/installadmin/mpich2/test/intel
    VENDOR=unknown
    MACHTYPE=x86_64
    CVS_RSH=ssh
    SSH_CONNECTION=143.50.128.178 34576 143.50.10.40 22
    LESSOPEN=|/usr/bin/lesspipe.sh %s
    G_BROKEN_FILENAMES=1
    _=/installadmin/mpich2/test/intel/bin/mpiexec


    Proxy information:
    *********************
      Proxy ID:  1
      -----------------
        Proxy name: b79
        Process count: 2
        Start PID: 0

        Proxy exec list:
        ....................
          Exec: /bin/true; Process count: 2
      Proxy ID:  2
      -----------------
        Proxy name: b51
        Process count: 2
        Start PID: 2

        Proxy exec list:
        ....................
          Exec: /bin/true; Process count: 2

==================================================================================================

[mpiexec at b79] Timeout set to -1 (-1 means infinite)
[mpiexec at b79] Got a control port string of b79:45593

Proxy launch args: /installadmin/mpich2/test/intel/bin/hydra_pmi_proxy 
--control-port b79:45593 --debug --demux poll --pgid 0 --enable-stdin 1 
--proxy-id

[mpiexec at b79] PMI FD: (null); PMI PORT: (null); PMI ID/RANK: -1
Arguments being passed to proxy 0:
--version 1.3b1 --interface-env-name MPICH_INTERFACE_NAME --hostname b79 
--global-core-count 4 --global-process-count 4 --auto-cleanup 1 
--pmi-rank -1 --pmi-kvsname kvs_13018_0 --pmi-process-mapping 
(vector,(0,2,2)) --global-inherited-env 40 
'REMOTEHOST=ZID178.KFUNIGRAZ.AC.AT' 
'MANPATH=/installadmin/sge/man:/software/mpich2/test/intel/share/man:/software/intel/intel_fce_111/man:/software/intel/intel_cce_111/man:/installadmin/sge/man:/usr/share/man/en:/usr/share/man:/usr/local/share/man' 
'CONSOLE=/dev/console' 'SELINUX_INIT=YES' 
'INTEL_LICENSE_FILE=/software/intel/intel_fce_111/licenses:/opt/intel/licenses:/usr/people/edvz/winkl/intel/licenses:/software/intel/intel_cce_111/licenses:/software/intel/licenses:/usr/people/edvz/winkl/intel/licenses' 
'HOST=b00' 'TERM=xterm' 'HISTSIZE=1000' 'SSH_CLIENT=143.50.128.178 34576 
22' 'SSH_TTY=/dev/pts/3' 'GROUP=edvz' 
'LD_LIBRARY_PATH=/installadmin/mpich2/test/intel/lib:/software/intel/intel_fce_111/lib/intel64:/software/intel/intel_cce_111/lib/intel64' 
'LS_COLORS=no' 'INIT_VERSION=sysvinit-2.86' 'HOSTTYPE=x86_64-linux' 
'AUTOBOOT=YES' 'MAIL=/var/spool/mail/winkl' 'runlevel=3' 'RUNLEVEL=3' 
'INPUTRC=/etc/inputrc' 'PWD=/usr/people/edvz/winkl/MPI-Test' 
'SGE_ACCOUNT=sge' 'LANG=en_US.UTF-8' 'previous=N' 'PREVLEVEL=N' 
'REQNAME=test_nodes.b2' 
'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 
'MPI=/installadmin/mpich2/test/intel' 'SHLVL=2' 
'SGE_CWD_PATH=/usr/people/edvz/winkl/MPI-Test' 'OSTYPE=linux' 
'BOOT_IMAGE=2.6.18-194.11.3' 'MPIHOME=/installadmin/mpich2/test/intel' 
'VENDOR=unknown' 'MACHTYPE=x86_64' 'CVS_RSH=ssh' 
'SSH_CONNECTION=143.50.128.178 34576 143.50.10.40 22' 
'LESSOPEN=|/usr/bin/lesspipe.sh %s' 'G_BROKEN_FILENAMES=1' 
'_=/installadmin/mpich2/test/intel/bin/mpiexec' --global-user-env 0 
--global-system-env 0 --start-pid 0 --proxy-core-count 2 --exec 
--exec-appnum 0 --exec-proc-count 2 --exec-local-env 0 --exec-wdir 
/usr/people/edvz/winkl/MPI-Test --exec-args 1 /bin/true

[mpiexec at b79] PMI FD: (null); PMI PORT: (null); PMI ID/RANK: -1
Arguments being passed to proxy 1:
--version 1.3b1 --interface-env-name MPICH_INTERFACE_NAME --hostname b51 
--global-core-count 4 --global-process-count 4 --auto-cleanup 1 
--pmi-rank -1 --pmi-kvsname kvs_13018_0 --pmi-process-mapping 
(vector,(0,2,2)) --global-inherited-env 40 
'REMOTEHOST=ZID178.KFUNIGRAZ.AC.AT' 
'MANPATH=/installadmin/sge/man:/software/mpich2/test/intel/share/man:/software/intel/intel_fce_111/man:/software/intel/intel_cce_111/man:/installadmin/sge/man:/usr/share/man/en:/usr/share/man:/usr/local/share/man' 
'CONSOLE=/dev/console' 'SELINUX_INIT=YES' 
'INTEL_LICENSE_FILE=/software/intel/intel_fce_111/licenses:/opt/intel/licenses:/usr/people/edvz/winkl/intel/licenses:/software/intel/intel_cce_111/licenses:/software/intel/licenses:/usr/people/edvz/winkl/intel/licenses' 
'HOST=b00' 'TERM=xterm' 'HISTSIZE=1000' 'SSH_CLIENT=143.50.128.178 34576 
22' 'SSH_TTY=/dev/pts/3' 'GROUP=edvz' 
'LD_LIBRARY_PATH=/installadmin/mpich2/test/intel/lib:/software/intel/intel_fce_111/lib/intel64:/software/intel/intel_cce_111/lib/intel64' 
'LS_COLORS=no' 'INIT_VERSION=sysvinit-2.86' 'HOSTTYPE=x86_64-linux' 
'AUTOBOOT=YES' 'MAIL=/var/spool/mail/winkl' 'runlevel=3' 'RUNLEVEL=3' 
'INPUTRC=/etc/inputrc' 'PWD=/usr/people/edvz/winkl/MPI-Test' 
'SGE_ACCOUNT=sge' 'LANG=en_US.UTF-8' 'previous=N' 'PREVLEVEL=N' 
'REQNAME=test_nodes.b2' 
'SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass' 
'MPI=/installadmin/mpich2/test/intel' 'SHLVL=2' 
'SGE_CWD_PATH=/usr/people/edvz/winkl/MPI-Test' 'OSTYPE=linux' 
'BOOT_IMAGE=2.6.18-194.11.3' 'MPIHOME=/installadmin/mpich2/test/intel' 
'VENDOR=unknown' 'MACHTYPE=x86_64' 'CVS_RSH=ssh' 
'SSH_CONNECTION=143.50.128.178 34576 143.50.10.40 22' 
'LESSOPEN=|/usr/bin/lesspipe.sh %s' 'G_BROKEN_FILENAMES=1' 
'_=/installadmin/mpich2/test/intel/bin/mpiexec' --global-user-env 0 
--global-system-env 0 --start-pid 2 --proxy-core-count 2 --exec 
--exec-appnum 0 --exec-proc-count 2 --exec-local-env 0 --exec-wdir 
/usr/people/edvz/winkl/MPI-Test --exec-args 1 /bin/true

[mpiexec at b79] Launch arguments: 
/installadmin/mpich2/test/intel/bin/hydra_pmi_proxy --control-port 
b79:45593 --debug --demux poll --pgid 0 --enable-stdin 1 --proxy-id 0
[mpiexec at b79] Launch arguments: /installadmin/sge/bin/lx24-amd64/qrsh 
-inherit -V b51 /installadmin/mpich2/test/intel/bin/hydra_pmi_proxy 
--control-port b79:45593 --debug --demux poll --pgid 0 --enable-stdin 1 
--proxy-id 1


>
> % /installadmin/sge/bin/lx24-amd64/qrsh -inherit -V b56 
> /installadmin/mpich2/test/intel/bin/hydra_pmi_proxy --control-port 
> b73:52298 --debug --demux poll --pgid 0 --enable-stdin 1 --proxy-id 1
>   
error: "qrsh" called with option "-inherit", but "JOB_ID" not set in 
environment

export JOB_ID=158269
[root at b00 ~]# /installadmin/sge/bin/lx24-amd64/qrsh -inherit -V b56 
/installadmin/mpich2/test/intel/bin/hydra_pmi_proxy --control-port 
b73:52298 --debug --demux poll --pgid 0 --enable-stdin 1 --proxy-id 1
error: executing task of job 158269 failed: missing "SGE_TASK_ID" in 
environment

I do not know to what value I should set SGE_TASK_ID so I always get an 
error with "error:
executing task of job 158275 failed"




More information about the mpich-discuss mailing list