[mpich2-commits] r7483 - in mpich2/trunk: doc/userguide maint src/pm src/pm/remshell
balaji at mcs.anl.gov
balaji at mcs.anl.gov
Mon Nov 22 18:16:40 CST 2010
Author: balaji
Date: 2010-11-22 18:16:40 -0600 (Mon, 22 Nov 2010)
New Revision: 7483
Added:
mpich2/trunk/src/pm/remshell/
mpich2/trunk/src/pm/remshell/Makefile.sm
mpich2/trunk/src/pm/remshell/configure.in
mpich2/trunk/src/pm/remshell/mpich2prereq
mpich2/trunk/src/pm/remshell/mpiexec.c
Modified:
mpich2/trunk/doc/userguide/user.tex.vin
mpich2/trunk/maint/checkbuilds.in
mpich2/trunk/src/pm/Makefile.sm
Log:
Revert r7471 and r7479 (removal of remshell). Bill is still
maintaining it.
Modified: mpich2/trunk/doc/userguide/user.tex.vin
===================================================================
--- mpich2/trunk/doc/userguide/user.tex.vin 2010-11-22 21:13:56 UTC (rev 7482)
+++ mpich2/trunk/doc/userguide/user.tex.vin 2010-11-23 00:16:40 UTC (rev 7483)
@@ -741,6 +741,29 @@
but for standard error.
\end{description}
+\subsection{Restrictions of the remshell Process Management Environment}
+\label{sec:restrictions-remshell}
+
+The \texttt{remshell} ``process manager'' provides a very simple version of
+\texttt{mpiexec} that makes use of the secure shell command (\texttt{ssh}) to
+start processes on a collection of machines. As this is intended primarily as
+an illustration of how to build a version of \texttt{mpiexec} that works with
+other process managers, it does not implement all of the features of the other
+\texttt{mpiexec} programs described in this document. In particular, it
+ignores the command line options that control the environment variables given
+to the MPI programs. It does support the same output labeling features
+provided by the \texttt{gforker} version of \texttt{mpiexec}.
+However, this version of \texttt{mpiexec} can be used
+much like the \texttt{mpirun} for the \texttt{ch\_p4} device in MPICH-1 to run
+programs on a collection of machines that allow remote shells. A file by the
+name of \texttt{machines} should contain the names of machines on which
+processes can be run, one machine name per line. There must be enough
+machines listed to satisfy the requested number of processes; you can list the
+same machine name multiple times if necessary.
+
+For more complex needs or for faster startup, we recommend the use of the
+\texttt{mpd} process manager.
+
\subsection{Using MPICH2 with SLURM and PBS}
\label{sec:external_pm}
Modified: mpich2/trunk/maint/checkbuilds.in
===================================================================
--- mpich2/trunk/maint/checkbuilds.in 2010-11-22 21:13:56 UTC (rev 7482)
+++ mpich2/trunk/maint/checkbuilds.in 2010-11-23 00:16:40 UTC (rev 7483)
@@ -66,7 +66,7 @@
@with_array = (
'logging;none;rlog',
'pmi;simple;smpd', #; uni no longer supported
- 'pm;gforker;mpd',
+ 'pm;gforker;mpd', #;remshell
'namepublisher;no;file;mpd', #;ldap:ldapserver',
'device;ch3;ch3:sock',
);
Modified: mpich2/trunk/src/pm/Makefile.sm
===================================================================
--- mpich2/trunk/src/pm/Makefile.sm 2010-11-22 21:13:56 UTC (rev 7482)
+++ mpich2/trunk/src/pm/Makefile.sm 2010-11-23 00:16:40 UTC (rev 7483)
@@ -1,7 +1,7 @@
# SUBDIRS_pm_name are the names that @ pm_name @ can take in SUBDIRS
# (except for util, which is included so that simplemake will process
# the Makefile.sm in that directory)
-SUBDIRS_pm_name = mpd smpd gforker
+SUBDIRS_pm_name = mpd smpd gforker remshell
SUBDIRS = @pm_name@ @other_pm_names@ .
# Remove PMPILIBNAME from the common make variables for the mpid
# directories
Property changes on: mpich2/trunk/src/pm/remshell
___________________________________________________________________
Added: svn:ignore
+ Makefile
Makefile.in
configure
configure.lineno
config.*
remshellconf.h.in
remshellconf.h
autom4te*
mpiexec
*.gcov
*.gcda
*.gcno
Copied: mpich2/trunk/src/pm/remshell/Makefile.sm (from rev 7470, mpich2/trunk/src/pm/remshell/Makefile.sm)
===================================================================
--- mpich2/trunk/src/pm/remshell/Makefile.sm (rev 0)
+++ mpich2/trunk/src/pm/remshell/Makefile.sm 2010-11-23 00:16:40 UTC (rev 7483)
@@ -0,0 +1,37 @@
+mpiexec_SOURCES = mpiexec.c
+mpiexec_LDADD = -L../util -lmpiexec
+INCLUDES = -I../../include -I${master_top_srcdir}/src/include \
+ -I../../pmi/simple -I${master_top_srcdir}/src/pmi/simple \
+ -I${master_top_srcdir}/src/pm/util
+install_BIN = mpiexec
+
+clean-local:
+ @-(cd ../util && $(MAKE) clean)
+distclean-local:
+ @-rm -f remshellconf.h
+
+# Use the mpich2-build-install target to include mpiexec in the build bin
+# directory (all pm's require these targets)
+mpich2-build-install: install
+mpich2-build-uninstall: uninstall
+
+# A special alternate installation target when using multiple process managers
+install-alt: mpiexec
+ @if [ ! -d $(DESTDIR)${bindir} ] ; then \
+ echo "mkdir -p $(DESTDIR)${bindir} " ;\
+ mkdir -p $(DESTDIR)${bindir} ;\
+ fi
+ $(INSTALL_PROGRAM) $(INSTALL_STRIP_FLAG) mpiexec $(DESTDIR)${bindir}/mpiexec.remshell
+
+doc_sources = mpiexec.txt
+DOCDESTDIRS = html:www/www1,man:man/man1,latex:doc/refman
+docargs_ADD = ${master_top_srcdir}/doc/mansrc/cmdnotes
+doc_HTML_SOURCES = ${doc_sources}
+doc_MAN_SOURCES = ${doc_sources}
+doc_LATEX_SOURCES = ${doc_sources}
+
+# We use a dummy dependency to ensure that we always go to the util
+# directory to see if anything needs to be done
+../util/libmpiexec.a: dummy
+dummy:
+ cd ../util && $(MAKE)
Copied: mpich2/trunk/src/pm/remshell/configure.in (from rev 7470, mpich2/trunk/src/pm/remshell/configure.in)
===================================================================
--- mpich2/trunk/src/pm/remshell/configure.in (rev 0)
+++ mpich2/trunk/src/pm/remshell/configure.in 2010-11-23 00:16:40 UTC (rev 7483)
@@ -0,0 +1,255 @@
+dnl Process this file with autoconf to produce a configure script.
+dnl
+dnl aclocal_cache.m4, included by sowing/confdb/aclocal.m4, fixes
+dnl bugs in autoconf caching.
+dnl
+dnl The file name here refers to a file in the source being configured
+AC_INIT(mpiexec.c)
+
+dnl Definitions will be placed in this file rather than in the DEFS variable
+AC_CONFIG_HEADER(remshellconf.h)
+AH_TOP([/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2001 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+#ifndef REMSHELLCONF_H_INCLUDED
+#define REMSHELLCONF_H_INCLUDED
+])
+AH_BOTTOM([#endif])
+dnl
+dnl Set the directory that contains support scripts such as install-sh and
+dnl config.guess
+AC_CONFIG_AUX_DIR(../../../confdb)
+dnl
+echo "RUNNING CONFIGURE FOR THE REMSHELL PM"
+dnl
+dnl Use AC_ARG_ENABLE to look for --enable-feature and AC_ARG_WITH to look for
+dnl --with-capability
+dnl
+dnl Enable better caching control
+PAC_ARG_CACHING
+dnl
+AC_ARG_ENABLE(echo,
+[--enable-echo - Turn on strong echoing. The default is enable=no.] ,set -x)
+
+AC_ARG_ENABLE(onsig,
+[--enable-onsig - Control the handling of processes that signal (e.g., SEGV)
+ using ptrace. Disabled by default],,enable_onsig=no)
+AC_ARG_ENABLE(newsession,
+[--enable-newsession - Create a new process group session if standard in is
+ not connected to a terminal],,enable_newsession=yes)
+dnl With options
+dnl
+dnl First check that we have a clean build if we are doing a VPATH build
+PAC_VPATH_CHECK()
+dnl
+dnl Process any enable or with values
+
+dnl This test is complicated by the fact that top_srcdir is not set until
+dnl the very end of configure. Instead, we get it ourselves
+if test -z "$top_srcdir" ; then
+ use_top_srcdir=$srcdir
+else
+ use_top_srcdir=$top_srcdir
+fi
+
+PAC_LOAD_BASE_CACHE
+
+dnl The recommended order for items in a configure.in file is
+dnl check for programs
+dnl check for libraries
+dnl check for header files
+dnl check for typedefs
+dnl check for structures
+dnl check for compiler characteristics
+dnl check for library functions
+dnl check for system services
+dnl Here are some sample tests
+dnl
+dnl Determine the executable and object file extensions. These
+dnl are needed for some operations under cygwin
+AC_EXEEXT
+AC_OBJEXT
+
+dnl Find a C compiler
+PAC_PROG_CC
+
+PAC_ARG_STRICT
+AC_PROG_INSTALL
+PAC_PROG_MKDIR_P
+PAC_PROG_MAKE
+dnl AC_CHECK_PROGS(AR,ar)
+dnl AC_PROG_RANLIB
+# Turn off creating shared libraries for any utility routines. If we *are*
+# building shared libaries, make sure that we create a static executable
+case "$ENABLE_SHLIB" in
+ none) ;;
+ gcc)
+ LDFLAGS="$LDFLAGS -static"
+ ;;
+ libtool)
+ ;;
+ *)
+ ;;
+esac
+
+ENABLE_SHLIB=none
+AC_SUBST(ENABLE_SHLIB)
+dnl
+dnl check for compiler characteristics
+AC_C_CONST
+AC_C_VOLATILE
+AC_C_RESTRICT
+AC_C_INLINE
+dnl
+dnl check for library functions
+dnl AC_CHECK_FUNCS(strtol time gettimeofday)
+AC_CHECK_FUNCS(snprintf)
+if test "$ac_cv_func_snprintf" = "yes" ; then
+ PAC_FUNC_NEEDS_DECL([#include <stdio.h>],snprintf)
+fi
+AC_CHECK_FUNCS(strdup)
+if test "$ac_cv_func_strdup" = "yes" ; then
+ # Do we need to declare strdup?
+ PAC_FUNC_NEEDS_DECL([#include <string.h>],strdup)
+fi
+
+if test "$enable_onsig" = "yes" ; then
+ AC_CHECK_FUNCS(ptrace)
+ # It isn't enough to find ptrace. We also need the ptrace
+ # parameters, which some systems, such as IRIX, do not define.
+ if test "$ac_cv_func_ptrace" = yes ; then
+ AC_CACHE_CHECK([for ptrace named parameters],
+pac_cv_has_ptrace_parms,[
+ AC_TRY_COMPILE([
+#include <sys/types.h>
+#include <sys/ptrace.h>],[int i = PTRACE_CONT;],pac_cv_has_ptrace_parms=yes,
+pac_cv_has_ptrace_parms=no)])
+ if test "$pac_cv_has_ptrace_parms" = "yes" ; then
+ AC_DEFINE(HAVE_PTRACE_CONT,,[Define if ptrace parameters available])
+ fi
+ fi
+fi
+# Check for the functions needed to create a new session.
+# Cygwin has setsid but not getsid
+AC_CHECK_FUNCS(setsid isatty getsid)
+if test "$enable_newsession" = "yes" ; then
+ AC_DEFINE(USE_NEW_SESSION,1,[Define if mpiexec should create a new process group session])
+fi
+dnl
+dnl Check for special compile characteristics
+dnl
+dnl Is there libnsl needed for gethostbyname?
+dnl AC_SEARCH_LIBS(gethostbyname,nsl)
+AC_SEARCH_LIBS(socketpair,socket)
+dnl
+dnl Look for Standard headers
+AC_HEADER_STDC
+dnl Check for a specific header
+AC_CHECK_HEADERS(sys/types.h signal.h sys/ptrace.h)
+dnl
+dnl Check for functions. This invokes another test if the function is
+dnl found. The braces around the second test are essential.
+dnl AC_CHECK_FUNC(setpgrp,[AC_FUNC_SETPGRP])
+AC_CHECK_FUNCS(strsignal)
+dnl
+dnl Check for signal handlers
+AC_CHECK_FUNCS(sigaction signal sigset)
+if test "$ac_cv_func_sigaction" = "yes" ; then
+ AC_MSG_CHECKING(for struct sigaction)
+ AC_TRY_COMPILE([#include <signal.h>],[
+struct sigaction act; sigaddset( &act.sa_mask, SIGINT );],sigaction_ok="yes",sigaction_ok="no")
+ AC_MSG_RESULT($sigaction_ok)
+ if test "$sigaction_ok" = "no" ; then
+ AC_MSG_CHECKING(for struct sigaction with _POSIX_SOURCE)
+ AC_TRY_COMPILE([#define _POSIX_SOURCE
+#include <signal.h>],[
+struct sigaction act; sigaddset( &act.sa_mask, SIGINT );],sigaction_with_posix_ok="yes",sigaction_with_posix_ok="no")
+ AC_MSG_RESULT($sigaction_with_posix_ok)
+ if test "$sigaction_with_posix_ok" = "yes" ; then
+ sigaction_ok=yes
+ sigaction_needs_posix=yes
+ fi
+ fi
+fi
+dnl
+# Decide on the signal handler to use
+if test "$ac_cv_func_sigaction" = "yes" -a "$sigaction_ok" = "yes" ; then
+ if test "$sigaction_needs_posix" = yes ; then
+ AC_DEFINE(NEEDS_POSIX_FOR_SIGACTION,1,[Define if _POSIX_SOURCE needed to get sigaction])
+ fi
+ AC_DEFINE(USE_SIGACTION,1,[Define if sigaction should be used to set signals])
+elif test "$ac_cv_func_signal" = "yes" ; then
+ AC_DEFINE(USE_SIGNAL,1,[Define if signal should be used to set signals])
+fi
+dnl
+dnl Setup other replaceable values
+MPILIBNAME=${MPILIBNAME:-"mpich"}
+dnl
+dnl
+dnl Configure the pm utilities for mpiexec2 (which will eventually replace
+dnl mpiexec)
+PAC_SUBDIR_CACHE
+dnl
+dnl You can't configure a directory that isn't a strict subdirectory
+dnl reliably. For example, configure will set the cache file incorrectly
+dnl unless you set it explicitly
+dnl if test "$cache_file" != "/dev/null" ; then
+dnl # convert the cachefile to an absolute path
+dnl cachedir=`echo $cache_file | sed -e 's%/[^/]*$%%'`
+dnl fname=`basename $cache_file`
+dnl if test -d $cachedir ; then
+dnl cachedir=`(cd $cachedir && pwd)`
+dnl else
+dnl cachedir=`pwd`
+dnl fi
+dnl if test -f "$cachedir/$fname" ; then
+dnl cache_file=$cachedir/$fname
+dnl fi
+dnl fi
+
+if test -z "$master_top_srcdir" ; then
+ master_top_srcdir='${srcdir}/../../..'
+fi
+
+dnl See setup_pm. The pm/util directory must also be configured
+PAC_CONFIG_SUBDIR(../util,,AC_ERROR(pm/utils configure failed))
+
+AC_CHECK_HEADERS(string.h sys/time.h unistd.h stdlib.h sys/socket.h wait.h errno.h)
+
+AC_PATH_PROG(SSH,ssh)
+if test -z "$SSH" ; then
+ AC_PATH_PROG(RSH,rsh)
+ if test -n "$RSH" ; then
+ remshell="$RSH"
+ fi
+else
+ remshell="$SSH"
+fi
+
+AC_SUBST(MPILIBNAME)
+AC_SUBST(CC)
+AC_SUBST(CFLAGS)
+AC_SUBST(CC_SHL)
+AC_SUBST(C_LINK_SHL)
+AC_SUBST(master_top_srcdir)
+dnl
+dnl Dependency handling
+AC_SUBST(MAKE_DEPEND_C)
+dnl
+AC_DEFINE_UNQUOTED(DEFAULT_MACHINES_PATH,"$eval_datadir:.",[Define the search path for machines files])
+AC_DEFINE_UNQUOTED(DEFAULT_REMOTE_SHELL,"$remshell",[Define the default remote shell program to use])
+dnl
+dnl Etags
+AC_SUBST(ETAGS)
+AC_SUBST(ETAGSADD)
+export ETAGS
+export ETAGSADD
+dnl
+dnl Other autoconf variables that may be imported from the calling environment
+AC_SUBST(DOCTEXT)
+PAC_UPDATE_BASE_CACHE
+
+dnl Generate the Makefiles from Makefile.in
+AC_OUTPUT(Makefile)
Copied: mpich2/trunk/src/pm/remshell/mpich2prereq (from rev 7470, mpich2/trunk/src/pm/remshell/mpich2prereq)
===================================================================
--- mpich2/trunk/src/pm/remshell/mpich2prereq (rev 0)
+++ mpich2/trunk/src/pm/remshell/mpich2prereq 2010-11-23 00:16:40 UTC (rev 7483)
@@ -0,0 +1,10 @@
+#! /bin/sh
+# Check that we are using the simple PMI implementation
+# (Selecting multiple PM's may require incompatible PMI implementations
+# (e.g., remshell and SMPD).
+if [ -z "$PM_REQUIRES_PMI" ] ; then
+ PM_REQUIRES_PMI=simple
+elif [ "$PM_REQUIRES_PMI" != "simple" ] ; then
+ echo "remshell requires the simple PMI implementation; $PM_REQUIRES_PMI has already been selected"
+ exit 1
+fi
Copied: mpich2/trunk/src/pm/remshell/mpiexec.c (from rev 7470, mpich2/trunk/src/pm/remshell/mpiexec.c)
===================================================================
--- mpich2/trunk/src/pm/remshell/mpiexec.c (rev 0)
+++ mpich2/trunk/src/pm/remshell/mpiexec.c 2010-11-23 00:16:40 UTC (rev 7483)
@@ -0,0 +1,451 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2004 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+/* OWNER=gropp */
+
+/* An example mpiexec program that uses a remote shell program to create
+ new processes on the selected hosts.
+
+ This code also shows how to use the pmutil routines (in ../util)
+ to provide many of the services required by mpiexec
+
+ Steps:
+ 1. Read and process that command line. Build a ProcessList. (A ProcessList
+ may have one entry for a request to create n separate processes)
+
+ 2. Convert the ProcessList into a ProcessTable. In the forker mpiexec,
+ this simply expands the requested number of processes into an
+ array with one entry per process. These entries contain information
+ on both the setup of the processes and the file descriptors used for
+ stdin,out,err, and for the PMI calls.
+
+ 3. (Optionally) allow the forked processes to use a host:port to
+ contact this program, rather than just sharing a pipe. This allows the
+ forker to start other programs, such as debuggers.
+
+ 4. Establish a signal handler for SIGCHLD. This will allow us to
+ get information about process termination; in particular, the exit
+ status.
+
+ 5. Start the programs.
+
+ 6. Process input from the programs; send stdin given to this process
+ to the selected processes (usually rank 0 or everyone). Handle all
+ PMI commands, including spawn. Another "input" is the expiration of the
+ specified timelimit for the run, if any.
+
+ 7. Process rundown commands and handle any abnormal termination.
+
+ 8. Wait for any processes to exit; gather the exit status and reason
+ for exit (if abnormal, such as signaled with SEGV or BUS)
+
+ 9. Release all resources and compute the exit status for this program
+ (using one of several approaches, such as taking the maximum of the
+ exit statuses).
+
+ Special Case to support Singleton Init:
+ To support a singleton init of a process that then wants to
+ create processes with MPI_Comm_spawn(_multiple), a special form of
+ mpiexec is supported:
+
+ mpiexec -pmi_args <port> <interfacename> <securitykey> <pid>
+
+ The singleton process (in a routine in simple_pmi.c) forks a process and
+ execs mpiexe with these arguments, where port is the port to which
+ mpiexec should connect, interfacename is the name of the network interface
+ (BUG: may not be correctly set as mpd currently ignores it), securitykey
+ is a place-holder for a key used by the singleton init process to verify
+ that the process connecting on the port is the one that was intended, and
+ pid is the pid of the singleton init process.
+
+ FIXME: The above has not been implemented yet.
+*/
+
+#include "remshellconf.h"
+#include <stdio.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <stdlib.h>
+
+#include "pmutil.h"
+#include "process.h"
+#include "cmnargs.h"
+#include "pmiserv.h"
+#include "ioloop.h"
+#include "labelout.h"
+#include "rm.h"
+#include "simple_pmiutil.h"
+#include "env.h" /* MPIE_Putenv */
+/* mpimem.h contains prototypes for MPIU_Strncpy etc. */
+/* We no longer can use these because they are MPI device specific */
+/* #include "mpimem.h" */
+
+typedef struct { PMISetup pmiinfo; IOLabelSetup labelinfo; } SetupInfo;
+
+/* Forward declarations */
+int mypreamble( void *, ProcessState* );
+int mypostfork( void *, void *, ProcessState* );
+int mypostamble( void *, void *, ProcessState* );
+int myspawn( ProcessWorld *, void * );
+
+static int AddEnvSetToCmdLine( const char *, const char *, const char ** );
+
+/* Set printFailure to 1 to get an explanation of the failure reason
+ for each process when a process fails */
+static int printFailure = 0;
+
+#ifndef MAX_PORT_STRING
+#define MAX_PORT_STRING 1024
+#endif
+
+/* Note that envp is common but not standard */
+int main( int argc, char *argv[], char *envp[] )
+{
+ int rc;
+ int erc = 0; /* Other (exceptional) return codes */
+ int reason, signaled = 0;
+ SetupInfo s;
+ char portString[MAX_PORT_STRING];
+
+ /* MPIE_ProcessInit initializes the global pUniv */
+ MPIE_ProcessInit();
+ /* Set a default for the universe size */
+ pUniv.size = 64;
+
+ /* Set defaults for any arguments that are options. Also check the
+ environment for special options, such as debugging. Set
+ some defaults in pUniv */
+ MPIE_CheckEnv( &pUniv, 0, 0 );
+ IOLabelCheckEnv( );
+
+ /* Handle the command line arguments. Use the routine from util/cmnargs.c
+ to fill in the universe */
+ MPIE_Args( argc, argv, &pUniv, 0, 0 );
+ /* If there were any soft arguments, we need to handle them now */
+ rc = MPIE_InitWorldWithSoft( &pUniv.worlds[0], pUniv.size );
+ if (!rc) {
+ MPIU_Error_printf( "Unable to process soft arguments\n" );
+ exit(1);
+ }
+
+ if (pUniv.fromSingleton) {
+ /* The MPI process is already running. We create a simple entry
+ for a single process rather than creating the process */
+ MPIE_SetupSingleton( &pUniv );
+ }
+
+
+ rc = MPIE_ChooseHosts( &pUniv.worlds[0], MPIE_ReadMachines, 0 );
+ if (rc) {
+ MPIU_Error_printf( "Unable to assign hosts to processes\n" );
+ exit(1);
+ }
+
+ if (MPIE_Debug) MPIE_PrintProcessUniverse( stdout, &pUniv );
+
+ DBG_PRINTF( ("timeout_seconds = %d\n", pUniv.timeout) );
+
+ /* Get the common port for creating PMI connections to the created
+ processes */
+ rc = PMIServSetupPort( &pUniv, portString, sizeof(portString) );
+ if (rc) {
+ MPIU_Error_printf( "Unable to setup port for listener\n" );
+ exit(1);
+ }
+ s.pmiinfo.portName = portString;
+
+#ifdef USE_MPI_STAGE_EXECUTABLES
+ /* Hook for later use in staging executables */
+ if (?stageExes) {
+ rc = MPIE_StageExecutables( &pUniv.worlds[0] );
+ if (!rc) ...;
+ }
+#endif
+
+ PMIServInit(myspawn,&s);
+ s.pmiinfo.pWorld = &pUniv.worlds[0];
+ PMISetupNewGroup( pUniv.worlds[0].nProcess, 0 );
+ MPIE_ForwardCommonSignals();
+ if (!pUniv.fromSingleton) {
+ MPIE_ForkProcesses( &pUniv.worlds[0], envp, mypreamble, &s,
+ mypostfork, 0, mypostamble, 0 );
+ }
+ else {
+ /* FIXME: The singleton code goes here */
+ MPIU_Error_printf( "Singleton init not supported\n" );
+ exit(1);
+ }
+ reason = MPIE_IOLoop( pUniv.timeout );
+
+ if (reason == IOLOOP_TIMEOUT) {
+ /* Exited due to timeout. Generate an error message and
+ terminate the children */
+ if (pUniv.timeout > 60) {
+ MPIU_Error_printf( "Timeout of %d minutes expired; job aborted\n",
+ pUniv.timeout / 60 );
+ }
+ else {
+ MPIU_Error_printf( "Timeout of %d seconds expired; job aborted\n",
+ pUniv.timeout );
+ }
+ erc = 1;
+ MPIE_KillUniverse( &pUniv );
+ }
+
+ /* Wait for all processes to exit and gather information on them.
+ We do this through the SIGCHLD handler. We also bound the length
+ of time that we wait to 2 seconds.
+ */
+ MPIE_WaitForProcesses( &pUniv, 2 );
+
+ /* Compute the return code (max for now) */
+ rc = MPIE_ProcessGetExitStatus( &signaled );
+
+ /* Optionally provide detailed information about failed processes */
+ if ( (rc && printFailure) || signaled)
+ MPIE_PrintFailureReasons( stderr );
+
+ /* If the processes exited normally (or were already gone) but we
+ had an exceptional exit, such as a timeout, use the erc value */
+ if (!rc && erc) rc = erc;
+
+ return( rc );
+}
+
+void mpiexec_usage( const char *msg )
+{
+ if (msg) {
+ MPIU_Error_printf( msg );
+ if (msg[strlen(msg)-1] != '\n') {
+ MPIU_Error_printf( "\n" );
+ }
+ }
+ MPIU_Usage_printf( "Usage: mpiexec %s\n", MPIE_ArgDescription() );
+ exit( -1 );
+}
+
+/* Redirect stdout and stderr to a handler */
+int mypreamble( void *data, ProcessState *pState )
+{
+ SetupInfo *s = (SetupInfo *)data;
+ int rc;
+
+ IOLabelSetupFDs( &s->labelinfo );
+ rc = PMISetupSockets( 1, &s->pmiinfo );
+ /* We must use communication over the socket, rather than the
+ environment, to pass initialization data */
+ pState->initWithEnv = 0;
+
+ return rc;
+}
+
+/* Close one side of each pipe pair and replace stdout/err with the pipes */
+int mypostfork( void *predata, void *data, ProcessState *pState )
+{
+ SetupInfo *s = (SetupInfo *)predata;
+ int curarg=0;
+
+ IOLabelSetupInClient( &s->labelinfo );
+ PMISetupInClient( 1, &s->pmiinfo );
+
+ /* Now, we *also* change the process state to insert the
+ interposed remote shell routine. This is probably not
+ where we want this in the final version (because MPIE_ExecProgram
+ does a lot under the assumption that the started program will
+ know what to do with new environment variables), but this
+ will allow us to start. */
+ {
+ ProcessApp *app = pState->app;
+ const char **newargs = 0;
+ char *pmiDebugStr = 0;
+ int j;
+ char rankStr[12];
+
+ /* Insert into app->args */
+ newargs = (const char **) MPIU_Malloc( (app->nArgs + 14 + 1) *
+ sizeof(char *) );
+ if (!pState->hostname) {
+ MPIU_Error_printf( "No hostname avaliable for %s\n", app->exename );
+ exit(1);
+ }
+
+ snprintf( rankStr, sizeof(rankStr)-1, "%d", pState->id );
+ rankStr[12-1] = 0;
+ curarg = 0;
+ newargs[curarg++] = MPIU_Strdup( "-Y" );
+
+ newargs[curarg++] = pState->hostname;
+ curarg += AddEnvSetToCmdLine( "PMI_PORT", s->pmiinfo.portName,
+ newargs + curarg );
+ curarg += AddEnvSetToCmdLine( "PMI_ID", rankStr, newargs + curarg );
+ pmiDebugStr = getenv( "PMI_DEBUG" );
+ if (pmiDebugStr) {
+ /* Use this to help debug the connection process */
+ curarg += AddEnvSetToCmdLine( "PMI_DEBUG", pmiDebugStr,
+ newargs + curarg );
+ }
+
+ newargs[curarg++] = app->exename;
+ for (j=0; j<app->nArgs; j++) {
+ newargs[j+curarg] = app->args[j];
+ }
+ newargs[j+curarg] = 0;
+ app->exename = MPIU_Strdup( "/usr/bin/ssh" );
+
+ app->args = newargs;
+ app->nArgs += curarg;
+
+ if (MPIE_Debug) {
+ printf( "cmd = %s\n", app->exename ); fflush(stdout);
+ printf( "Number of args = %d\n", app->nArgs );
+ for (j=0; j<app->nArgs; j++) {
+ printf( "argv[%d] = %s\n", j, app->args[j] ); fflush(stdout);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* Close one side of the pipe pair and register a handler for the I/O */
+int mypostamble( void *predata, void *data, ProcessState *pState )
+{
+ SetupInfo *s = (SetupInfo *)predata;
+
+ IOLabelSetupFinishInServer( &s->labelinfo, pState );
+ PMISetupFinishInServer( 1, &s->pmiinfo, pState );
+
+ return 0;
+}
+
+int myspawn( ProcessWorld *pWorld, void *data )
+{
+ SetupInfo *s = (SetupInfo *)data;
+ ProcessWorld *p, **pPtr;
+
+ p = pUniv.worlds;
+ pPtr = &(pUniv.worlds);
+ while (p) {
+ pPtr = &p->nextWorld;
+ p = *pPtr;
+ }
+ *pPtr = pWorld;
+
+ /* Fork Processes may call a routine that is passed s but not pWorld;
+ this makes sure that all routines can access the current world */
+ s->pmiinfo.pWorld = pWorld;
+
+ /* FIXME: This should be part of the PMI initialization in the clients */
+ MPIE_Putenv( pWorld, "PMI_SPAWNED=1" );
+
+ MPIE_ForkProcesses( pWorld, 0, mypreamble, s,
+ mypostfork, 0, mypostamble, 0 );
+ return 0;
+}
+
+/* Temp test for the replacement for the simple "spawn == fork" */
+
+/*
+ * Approach:
+ * Processes are created using a remote shell program. This requires
+ * changing the command line from
+ *
+ * a.out args ...
+ *
+ * to
+ *
+ * remshell-program remshell-args /bin/sh -c PMI_PORT=string &&
+ * export PMI_PORT && PMI_ID=rank-in-world && export PMI_ID &&
+ * a.out args
+ *
+ * (the export PMI_PORT=string syntax is not valid in all versions of sh)
+ *
+ * Using PMI_ID ensures that we correctly identify each process (this was
+ * a major problem in the setup used by the p4 device in MPICH1).
+ * Using environment variables instead of command line arguments keeps
+ * the commaand line clean.
+ *
+ * Two alternatives should be considered
+ * 1) Use an intermediate manager. This would allow us to set up the
+ * environment as well:
+ * remshell-program remshell-args manager -port string
+ * One possibilty for the manager is the mpd manager
+ * 2) Use the secure server (even the same one as in MPICH1); then
+ * there is no remote shell command.
+ *
+ * We can handle the transformation of the command line by adding a
+ * to the postfork routine; this is called after the fork but before the
+ * exec, and it can change the command line by making a copy of the app
+ * structure, changing the command line, and setting the pState structure
+ * to point to this new app (after the fork, these changes are visable only
+ * to the forked process).
+ *
+ * Enhancements:
+ * Allow the code to avoid the remote shell if the process is being created
+ * on the local host.
+ *
+ * Handle the user of -l username and -n options to remshell
+ * (-n makes stdin /dev/null, necessary for backgrounding).
+ * (-l username allows login to hosts where the user's username is
+ * different)
+ *
+ * Provide an option to add a backslash before any - to deal with the
+ * serious bug in the GNU inetutils remote shell programs that process
+ * *all* arguments on the remote shell command line, even those for the
+ * *program*!
+ *
+ * To best support the errcodes return from MPI_Comm_spawn,
+ * we need a way to communicate the array of error codes back to the
+ * spawn and spawn multiple commands. Query: how is that done in
+ * PMI?
+ *
+ */
+
+static int AddEnvSetToCmdLine( const char *envName, const char *envValue,
+ const char **args )
+{
+ int nArgs = 0;
+ static int useCSHFormat = -1;
+
+ /* Determine the Shell type the first time*/
+ if (useCSHFormat == -1) {
+ char *shell = getenv( "SHELL" ), *sname;
+ if (shell) {
+/* printf( "Shell is %s\n", shell ); */
+ sname = strrchr( shell, '/' );
+ if (!sname) sname = shell;
+ else sname++;
+/* printf( "Sname is %s\n", sname ); */
+ if (strcmp( sname, "bash" ) == 0 || strcmp( sname, "sh" ) ||
+ strcmp( sname, "ash" ) == 0) useCSHFormat = 0;
+ else
+ useCSHFormat = 1;
+ }
+ else {
+ /* Default is to assume csh (setenv) format */
+ useCSHFormat = 1;
+ }
+ }
+
+ if (useCSHFormat) {
+ args[nArgs++] = MPIU_Strdup( "setenv" );
+ args[nArgs++] = MPIU_Strdup( envName );
+ args[nArgs++] = MPIU_Strdup( envValue );
+ args[nArgs++] = MPIU_Strdup( ";" );
+ }
+ else {
+ char tmpBuf[1024];
+ args[nArgs++] = MPIU_Strdup( "export" );
+ MPIU_Strncpy( tmpBuf, envName, sizeof(tmpBuf) );
+ MPIU_Strnapp( tmpBuf, "=", sizeof(tmpBuf) );
+ MPIU_Strnapp( tmpBuf, envValue, sizeof(tmpBuf) );
+ args[nArgs++] = MPIU_Strdup( tmpBuf );
+ args[nArgs++] = MPIU_Strdup( ";" );
+ }
+ return nArgs;
+}
More information about the mpich2-commits
mailing list