[mpich2-commits] r7418 - in mpich2/branches/release/mpich2-1.3.x: . confdb maint src/include src/mpi/coll src/mpi/comm src/mpi/debugger src/mpi/errhan src/mpi/group src/mpi/init src/mpi/rma src/mpi/romio/adio/ad_pvfs2 src/mpi/romio/adio/common src/mpi/romio/adio/include src/mpi/romio/mpi-io src/mpid/ch3/channels/nemesis/nemesis/include src/mpid/ch3/channels/nemesis/nemesis/netmod/elan src/mpid/ch3/channels/nemesis/nemesis/netmod/gm src/mpid/ch3/channels/nemesis/nemesis/netmod/mx src/mpid/ch3/channels/nemesis/nemesis/netmod/nd src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad src/mpid/ch3/channels/nemesis/nemesis/netmod/none src/mpid/ch3/channels/nemesis/nemesis/netmod/psm src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp src/mpid/ch3/channels/nemesis/nemesis/src src/mpid/ch3/channels/nemesis/src src/mpid/ch3/include src/mpid/ch3/src src/mpid/ch3/util/ftb src/mpid/dcmfd/src/pt2pt src/mpl/src src/pm/hydra src/pm/hydra/examples src/pm /hydra/include src/pm/hydra/pm src/pm/hydra/pm/pmiserv src/pm/hydra/tools src/pm/hydra/tools/bootstrap/external src/pm/hydra/tools/bootstrap/src src/pm/hydra/tools/bootstrap/utils src/pm/hydra/tools/ftb src/pm/hydra/ui src/pm/hydra/ui/mpich src/pm/hydra/utils src/util src/util/instrm src/util/param test/mpi/errors/spawn test/mpi/group test/mpi/perf

balaji at mcs.anl.gov balaji at mcs.anl.gov
Sat Nov 6 10:02:45 CDT 2010


Author: balaji
Date: 2010-11-06 10:02:44 -0500 (Sat, 06 Nov 2010)
New Revision: 7418

Added:
   mpich2/branches/release/mpich2-1.3.x/src/include/mpiinstr.h
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/Makefile.mk
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.h
   mpich2/branches/release/mpich2-1.3.x/src/util/instrm/instr.c
   mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranksperf.c
Removed:
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/Makefile.mk
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.h
Modified:
   mpich2/branches/release/mpich2-1.3.x/
   mpich2/branches/release/mpich2-1.3.x/Makefile.sm
   mpich2/branches/release/mpich2-1.3.x/confdb/
   mpich2/branches/release/mpich2-1.3.x/confdb/aclocal_mpi.m4
   mpich2/branches/release/mpich2-1.3.x/configure.in
   mpich2/branches/release/mpich2-1.3.x/maint/Version
   mpich2/branches/release/mpich2-1.3.x/maint/genparams
   mpich2/branches/release/mpich2-1.3.x/maint/simplemake.in
   mpich2/branches/release/mpich2-1.3.x/src/include/mpihandlemem.h
   mpich2/branches/release/mpich2-1.3.x/src/include/mpiimpl.h
   mpich2/branches/release/mpich2-1.3.x/src/include/mpiimplthread.h
   mpich2/branches/release/mpich2-1.3.x/src/include/mpiu_ex.h
   mpich2/branches/release/mpich2-1.3.x/src/include/mpiutil.h
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgather.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgatherv.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allreduce.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/bcast.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gather.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gatherv.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat_block.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/reduce.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/scatter.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_group.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_remote_group.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/mpicomm.h
   mpich2/branches/release/mpich2-1.3.x/src/mpi/debugger/dbginit.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/errhan/errnames.txt
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_difference.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_excl.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_incl.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_intersection.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_excl.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_incl.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_translate_ranks.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_union.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/group/grouputil.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/init/initthread.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/accumulate.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/get.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/put.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_get_group.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_lock.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_unlock.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/ad_pvfs2/ad_pvfs2_common.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_end.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_init.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/flatten.c
   mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio.h
   mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio_extern.h
   mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/mpi-io/open.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/include/mpid_nem_nets.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/elan/elan_init.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/gm/gm_init.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_alloc.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_impl.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_init.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_poll.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_probe.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_send.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_ep_util.cpp
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_impl.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_sm.cpp
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_alloc.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_impl.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_init.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_probe.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_send.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/none/none.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/psm/psm_init.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/socksm.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_impl.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_init.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_send.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_network.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidftb.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidimpl.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpkt.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpre.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidrma.h
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_connection.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_req.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_port.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_ops.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_sync.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_abort.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_iprobe.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_probe.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/util/ftb/ftb.c
   mpich2/branches/release/mpich2-1.3.x/src/mpid/dcmfd/src/pt2pt/mpidi_irecv.c
   mpich2/branches/release/mpich2-1.3.x/src/mpl/src/mplstr.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/Makefile.am
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/README
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/autogen.sh
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/configure.in
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/examples/
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/hydra-doxygen.cfg.in
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/include/
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/mpich2prereq
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_cb.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/Makefile.mk
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/ui/
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/ui/mpich/mpiexec.c
   mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/utils/
   mpich2/branches/release/mpich2-1.3.x/src/util/createshlib.in
   mpich2/branches/release/mpich2-1.3.x/src/util/instrm/Makefile.sm
   mpich2/branches/release/mpich2-1.3.x/src/util/param/params.yml
   mpich2/branches/release/mpich2-1.3.x/test/mpi/errors/spawn/testlist
   mpich2/branches/release/mpich2-1.3.x/test/mpi/group/Makefile.sm
   mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranks.c
   mpich2/branches/release/mpich2-1.3.x/test/mpi/group/testlist
   mpich2/branches/release/mpich2-1.3.x/test/mpi/perf/manyrma.c
   mpich2/branches/release/mpich2-1.3.x/winconfigure.wsf
Log:
Merging the following changesets from trunk:

r7355, r7356, r7357, r7358, r7359, r7366, r7367, r7371, r7372, r7373,
r7374, r7375, r7376, r7377, r7378, r7379, r7380, r7381, r7382, r7383,
r7384, r7385, r7386, r7387, r7388, r7389, r7390, r7391, r7392, r7393,
r7394, r7395, r7396, r7397, r7398, r7399, r7400, r7401, r7402, r7406,
r7407, r7408, r7409, r7411, r7412, r7413, r7414, r7415, r7416.

This includes all changes after 1.3, not including the non-blocking
collectives, which are a 1.4 change.



Property changes on: mpich2/branches/release/mpich2-1.3.x
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt:5050
/mpich2/branches/dev/ckpt2:5057-6537
/mpich2/branches/dev/ftb:5661-5730
/mpich2/branches/dev/lapi:5817
/mpich2/branches/dev/wintcp_async_progress:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2:5406
   + /mpich2/branches/dev/ckpt:5050
/mpich2/branches/dev/ckpt2:5057-6537
/mpich2/branches/dev/ftb:5661-5730
/mpich2/branches/dev/lapi:5817
/mpich2/branches/dev/wintcp_async_progress:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2:5406
/mpich2/trunk:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/Makefile.sm
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/Makefile.sm	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/Makefile.sm	2010-11-06 15:02:44 UTC (rev 7418)
@@ -43,6 +43,9 @@
 clean-preamble:
 	${MAKE} cleanlibs
 
+clean-postamble:
+	cd examples && ${MAKE} clean
+
 cleanobjsandlibs:
 	${MAKE} clean && cd test && ${MAKE} clean
 	rm -f lib/newconfig
@@ -153,6 +156,7 @@
 libf${MPILIBNAME}_so_LIBS   = -L. @LDFLAGS_DEPS@ -l$(MPILIBNAME) @LIB_DEPS@
 lib${MPILIBNAME}f90_so_LIBS = -L. @LDFLAGS_DEPS@ -l$(MPILIBNAME) @LIB_DEPS@
 lib${MPICXXLIBNAME}_so_LIBS = -L. @LDFLAGS_DEPS@ -l$(MPILIBNAME) @LIB_DEPS@
+lib${MPICXXLIBNAME}_so_LINKER = @CXX_SHL@
 
 install_INCLUDE = src/include/mpi.h
 


Property changes on: mpich2/branches/release/mpich2-1.3.x/confdb
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt2/confdb:5180,5182,5196,5198
/mpich2/branches/dev/ftb/confdb:5661-5730
/mpich2/branches/dev/lapi/confdb:5817
/mpich2/branches/dev/wintcp_async_progress/confdb:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/confdb:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/confdb:5406
   + /mpich2/branches/dev/ckpt2/confdb:5180,5182,5196,5198
/mpich2/branches/dev/ftb/confdb:5661-5730
/mpich2/branches/dev/lapi/confdb:5817
/mpich2/branches/dev/wintcp_async_progress/confdb:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/confdb:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/confdb:5406
/mpich2/trunk/confdb:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/confdb/aclocal_mpi.m4
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/confdb/aclocal_mpi.m4	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/confdb/aclocal_mpi.m4	2010-11-06 15:02:44 UTC (rev 7418)
@@ -301,22 +301,22 @@
 
 	ibmmpi)
 	AC_CHECK_PROGS(MPCC,mpcc)
-	AC_CHECK_PROGS(MPXLF,mpxlf)
+	AC_CHECK_PROGS(MPXLF,mpxlf mpfort)
 	if test -z "$MPCC" -o -z "$MPXLF" ; then
-	    AC_MSG_ERROR([Could not find IBM MPI compilation scripts.  Either mpcc or mpxlf is missing])
+	    AC_MSG_ERROR([Could not find IBM MPI compilation scripts.  Either mpcc or mpxlf/mpfort is missing])
 	fi
 	if test -z "$TESTCC" ; then TESTCC=${CC-xlC} ; fi
 	if test -z "$TESTF77" ; then TESTF77=${F77-xlf}; fi
-	CC=mpcc; F77=mpxlf
+	CC=mpcc; F77=$MPXLF
 	# There is no mpxlf90, but the options langlvl and free can
 	# select the Fortran 90 version of xlf
 	if test "$enable_f90" != no ; then
-	    AC_CHECK_PROGS(MPIXLF90,mpxlf90)
+	    AC_CHECK_PROGS(MPIXLF90,mpxlf90 mpfort)
 	    if test -z "$TESTFC" ; then TESTFC=${FC-xlf90}; fi
             if test "X$MPIXLF90" != "X" ; then 
-	        FC="mpxlf90"
+	        FC="$MPIXLF90"
 	    else
-	    	FC="mpxlf -qlanglvl=90ext -qfree=f90"
+	    	FC="$MPXLF -qlanglvl=90ext -qfree=f90"
 	    fi
 	fi
 	MPILIBNAME=""

Modified: mpich2/branches/release/mpich2-1.3.x/configure.in
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/configure.in	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/configure.in	2010-11-06 15:02:44 UTC (rev 7418)
@@ -135,7 +135,6 @@
     AC_MSG_ERROR([Version information not found. Configuration aborted.])
 fi
 AC_SUBST(MPICH2_RELEASE_DATE)
-
 # Produce a numeric version assuming the following format:
 # Version: [MAJ].[MIN].[REV][EXT][EXT_NUMBER]
 # Example: 1.0.7rc1 has
@@ -330,6 +329,7 @@
                    compiler flags, i.e. MPICH2LIB_CFLAGS, MPICH2LIB_CXXFLAGS,
                    MPICH2LIB_FFLAGS, and MPICH2LIB_FCFLAGS.
         debug    - Synonym for dbg
+        instr    - Enable instrumentation
         log      - Enable debug event logging
         mem      - Memory usage tracing
         meminit  - Preinitialize memory associated structures and unions to
@@ -740,7 +740,9 @@
             MPI_DEFAULT_FOPTS="-$option"
             MPI_DEFAULT_FCOPTS="-$option"
         else
+	    IFS="$save_IFS"
             AC_MSG_WARN([Unknown value $option for --enable-fast])
+	    IFS=","
         fi
         ;;
         none|no)
@@ -751,7 +753,9 @@
         enable_append_ndebug=no
         ;;
         *)
+	IFS="$save_IFS"
         AC_MSG_WARN([Unknown value $option for --enable-fast])
+	IFS=","
         ;;
     esac
 done
@@ -1260,6 +1264,9 @@
 	handle)
 	AC_DEFINE(MPICH_DEBUG_HANDLES,1,[Define to enable handle checking])
 	;;
+	instr)
+	perform_instr=yes
+	;;
 	meminit)
 	perform_meminit=yes
 	;;
@@ -1284,12 +1291,15 @@
 	perform_dbglog=yes
 	enable_append_g=yes
 	perform_meminit=yes
+	perform_instr=yes
 	perform_dbgmutex=yes
 	perform_mutexnesting=yes
 	perform_handlealloc=yes
 	;;
 	*)
-	AC_MSG_WARN([Unknown value $enable_g for enable-g])
+	IFS=$save_IFS
+	AC_MSG_WARN([Unknown value $option for enable-g])
+	IFS=","
 	;;
     esac
 done
@@ -1311,8 +1321,11 @@
     AC_DEFINE(MPICH_DEBUG_MEMINIT,1,[Define to enable preinitialization of memory used by structures and unions])
 fi
 if test "$perform_handlealloc" = yes ; then
-   AC_DEFINE(MPICH_DEBUG_HANDLEALLOC,1,[Define to enable checking of handles still allocated at MPI_Finalize])
+    AC_DEFINE(MPICH_DEBUG_HANDLEALLOC,1,[Define to enable checking of handles still allocated at MPI_Finalize])
 fi
+if test "$perform_instr" = yes ; then
+    AC_DEFINE(USE_MPIU_INSTR,1,[Define this to enable internal instrumentation] )
+fi
 
 if test -n "$perform_memtracing" ; then
     enable_g_mem=yes
@@ -1729,6 +1742,15 @@
     PAC_PROG_F77
 fi
 
+if test "$enable_f77" = "yes" -a "$F77" = "" ; then
+   # No Fortran 77 compiler found; abort
+   AC_MSG_ERROR([No Fortran 77 compiler found. If you don't need to
+   build any Fortran programs, you can disable Fortran support using
+   --disable-f77 and --disable-fc. If you do want to build Fortran
+   programs, you need to install a Fortran compiler such as gfortran
+   or ifort before you can proceed.])
+fi
+
 if test "$enable_f77" = yes ; then
     # Check if $MPI_DEFAULT_FOPTS is valid with $F77
     if test "$enable_default_optimize" = "yes" \
@@ -1904,7 +1926,17 @@
     elif test "$pac_cv_prog_fc_works" = no; then
         AC_MSG_WARN([Use --disable-fc to keep configure from searching for a Fortran 90 compiler])
     fi
+
+    if test "$enable_fc" = "yes" -a "$FC" = "" ; then
+       # No Fortran 90 compiler found; abort
+       AC_MSG_ERROR([No Fortran 90 compiler found. If you don't need
+       to build any Fortran 90 programs, you can disable Fortran 90
+       support using --disable-fc. If you do want to build Fortran 90
+       programs, you need to install a Fortran 90 compiler such as
+       gfortran or ifort before you can proceed.])
+    fi
 fi
+
 if test "$enable_fc" = "yes" -a "$enable_f77" != "yes" ; then
    # Fortran 90 support requires compatible Fortran 77 support
    AC_MSG_ERROR([


Property changes on: mpich2/branches/release/mpich2-1.3.x/maint/Version
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/maint/Version:5050
/mpich2/branches/dev/ckpt2/maint/Version:5057-6537
/mpich2/branches/dev/ftb/maint/Version:5661-5730
/mpich2/branches/dev/lapi/maint/Version:5817
/mpich2/branches/dev/wintcp_async_progress/maint/Version:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/maint/Version:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/maint/Version:5406
/mpich2/trunk/src/pm/hydra/VERSION:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/maint/genparams
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/maint/genparams	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/maint/genparams	2010-11-06 15:02:44 UTC (rev 7418)
@@ -347,10 +347,35 @@
 
 close(PARAM_C);
 
-print PARAM_HDR "\n#endif /* $hdr_guard */\n";
+print PARAM_HDR <<EOT;
+
+/* TODO: this should be defined elsewhere */
+#define ${ns}_assert MPIU_Assert
+
+/* helper macros for safely getting the default value of a parameter */
+EOT
+my @type_field = (
+    ['INT','i_val'],
+    ['DOUBLE','d_val'],
+    ['BOOLEAN','i_val'],
+    ['STRING','s_val'],
+);
+foreach my $tuple (@type_field) {
+    my ($type,$field) = @$tuple;
+    print PARAM_HDR <<EOT;
+#define ${uc_ns}_GET_DEFAULT_${type}(p_suffix_,out_ptr_)                                               \\
+    do {                                                                                               \\
+        ${ns}_assert(${uc_ns}_TYPE_${type} == ${ns}_params[${uc_ns}_ID_##p_suffix_].default_val.type); \\
+        *(out_ptr_) = ${ns}_params[${uc_ns}_ID_##p_suffix_].default_val.${field};                      \\
+    } while (0)
+EOT
+}
+print PARAM_HDR <<EOT;
+
+#endif /* $hdr_guard */
+EOT
 close(PARAM_HDR);
 
-
 ########################################################################
 # helper subroutines
 

Modified: mpich2/branches/release/mpich2-1.3.x/maint/simplemake.in
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/maint/simplemake.in	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/maint/simplemake.in	2010-11-06 15:02:44 UTC (rev 7418)
@@ -194,6 +194,7 @@
 $quiet = 0;
 
 %libdir = ();
+%liblinker = ();
 #
 # While simplemake doesn't support ext->other (e.g., .c to .s), 
 # the rules have been added.
@@ -849,6 +850,13 @@
 	    print "Setting libdir{$1} to $2/\n" if $debug;
             $libdir{$1}  = "$2/";
         }
+	elsif (/^lib([@\${}\(\)\w-]*)_so_LINKER\s*=\s*(\S+)\s*$/) {
+	    $libLinker{$1} = $2;
+	    # This selects a specific format for specifing the linker 
+	    # when installing the shared library
+	    print "setting installExtraArgs $1 to --clink=$2\n" if $debug;
+	    $installExtraArgs{$1} = "--clink=\"$2\"";
+	}
 	elsif (/^install_local_DIR\s*=\s*(.*)\s*$/) {
 	    $install_local_dirs = $1;
 	}
@@ -1942,6 +1950,8 @@
     $depadd_all = "";
     %altCompile = ();
     %altCompileSources = ();
+    %libLinker = ();
+    %installExtraArgs = ();
 
     $gSubdirSMVarsSeen = "";
 
@@ -3369,6 +3379,10 @@
     my $newlibname = $_[1];
     my $libdir = $_[2];
 
+    # Some shared libraries may need to be built with a compiler other than 
+    # the C compiler.  This will let us define that.
+    my $linkerOption = "";
+
     my $libbasename = $libname;
     $libbasename =~ s/^lib//;
     $libbasename =~ s/\.la$//;
@@ -3379,9 +3393,14 @@
 	return;
     }
 
+    # If the linker should not be the C compiler, this selects it
+    if (defined($libLinker{$libbasename})) {
+	$linkerOption = "--clink=\"" . $libLinker{$libbasename} . "\"";
+    }
+
     if ($libdir eq "" || $newlibname eq "" || $libname eq "") {
 	# If we don't have these names, we cannot create a valid library
-	print STDOUT "Unable to create shared libary target (no directory\n\
+	print STDOUT "Unable to create shared library target (no directory\n\
 library, or new library name)\n" if $debug;
 	return;
     }
@@ -3409,7 +3428,7 @@
     if (!defined($usertargets{"$libdir/$newlibname"})) {
 	print FD "# Build the shared library from the shared object files\n";
 	print FD "$libdir/$newlibname: $libdir/$libname
-\t(cd $libdir && \$(CREATESHLIB) --mode=link -version-info \"\$(ABIVERSION)\" -o $libname $exports -rpath \$(libdir) $otherlibs -ldflags \"\$(LDFLAGS)\" \$(LIBS))\n";
+\t(cd $libdir && \$(CREATESHLIB) --mode=link $linkerOption -version-info \"\$(ABIVERSION)\" -o $libname $exports -rpath \$(libdir) $otherlibs -ldflags \"\$(LDFLAGS)\" \$(LIBS))\n";
     }
     # If there is a profiling library, we need to build it
     # now, as part of this target, to handle the case where the
@@ -3445,7 +3464,7 @@
 	    $otherlibs = $shared_libraries_libs{$libbasename};
 	}
 
-	print FD "\t(cd $libdir && \$(CREATESHLIB) --mode=link -version-info \"\$(ABIVERSION)\" -o $libname $exports -rpath \$(libdir) $otherlibs);\\\n";
+	print FD "\t(cd $libdir && \$(CREATESHLIB) --mode=link $linkerOption -version-info \"\$(ABIVERSION)\" -o $libname $exports -rpath \$(libdir) $otherlibs);\\\n";
 
 	print FD "\tfi\n";
     }
@@ -3967,7 +3986,22 @@
 	    if ($this_install_method eq "") {
 		$this_install_method = '$(INSTALL)';
 		}
-	    print FD "\t$this_install_method $file \${DESTDIR}\${$dir}/$destfile$newline";
+	    $extraArgs = "";
+	    #print STDOUT "DEBUG: file = $file\n";
+	    if (defined($installExtraArgs{$file})) {
+		$extraArgs = $installExtraArgs{$file};
+	    }
+	    if ($kind eq "SHLIB" && $extraArgs eq "") {
+		# This is a hack 
+		$basefile = $file;
+		$basefile =~ s/^.*\/lib//;
+		$basefile =~ s/\.[^\.]*$//;
+		#print STDOUT "DEBUG: basefile = $basefile\n";
+		if (defined($installExtraArgs{$basefile})) {
+		    $extraArgs = $installExtraArgs{$basefile};
+		}
+	    }
+	    print FD "\t$this_install_method $extraArgs $file \${DESTDIR}\${$dir}/$destfile$newline";
 	}
     }
 
@@ -4043,7 +4077,22 @@
 		else {
 		    $this_install_choice = $install_methods{$kind};
 		}
-		print FD "\tif [ -s $file ] ; then $this_install_choice $file \${DESTDIR}\${$dir}/$destfile ; fi$newline";
+		$extraArgs = "";
+		#print STDOUT "DEBUG: file = $file\n";
+		if (defined($installExtraArgs{$file})) {
+		    $extraArgs = $installExtraArgs{$file};
+		}
+		if ($kind eq "SHLIB" && $extraArgs eq "") {
+		    # This is a hack 
+		    $basefile = $file;
+		    $basefile =~ s/^.*\/lib//;
+		    $basefile =~ s/\.[^\.]*$//;
+		    #print STDOUT "DEBUG: basefile = $basefile\n";
+		    if (defined($installExtraArgs{$basefile})) {
+			$extraArgs = $installExtraArgs{$basefile};
+		    }
+		}
+		print FD "\tif [ -s $file ] ; then $this_install_choice $extraArgs $file \${DESTDIR}\${$dir}/$destfile ; fi$newline";
 	    }
 	}
     }
@@ -4424,12 +4473,12 @@
 	# We always use a "newalldeps" incase there is a failure 
 	# creating the new list of dependency files.
 	print FD "\trm -f \$(DEPS_DIR)/newalldeps$newline";
-	my %sawFile = ();
+	%sawFile = ();
 	foreach $lib (keys(%libraries)) {
 	    foreach $sourcefile (split(/\s+/,&ExpandMakeVars($libraries{$lib}))) {
 		if (defined($sawFile{$sourcefile})) { next; }
 		$sawFile{$sourcefile} = 1;
-		my $srcdirloc = '$(srcdir)/';
+		$srcdirloc = '$(srcdir)/';
 		$ext = $sourcefile;
 		$ext =~ s/^.*\.//g;
 		$sourcebasename = $sourcefile;

Modified: mpich2/branches/release/mpich2-1.3.x/src/include/mpihandlemem.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/include/mpihandlemem.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/include/mpihandlemem.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -313,6 +313,8 @@
  * It is also assumed that any object being reference counted via these macros
  * will have a valid value in the handle field, even if it is
  * HANDLE_SET_KIND(0, HANDLE_KIND_INVALID) */
+/* TODO profile and examine the assembly that is generated for this if() on Blue
+ * Gene (and elsewhere).  We may need to mark it unlikely(). */
 #define MPIU_Object_add_ref(objptr_)                           \
     do {                                                       \
         int handle_kind_ = HANDLE_GET_KIND((objptr_)->handle); \

Modified: mpich2/branches/release/mpich2-1.3.x/src/include/mpiimpl.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/include/mpiimpl.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/include/mpiimpl.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -369,6 +369,9 @@
 int MPIU_Handle_free( void *((*)[]), int );
 */
 /* Convert Handles to objects for MPI types that have predefined objects */
+/* TODO examine generated assembly for this construct, it's probably suboptimal
+ * on Blue Gene.  An if/else if/else might help the compiler out.  It also lets
+ * us hint that one case is likely(), usually the BUILTIN case. */
 #define MPID_Getb_ptr(kind,a,bmsk,ptr)                                  \
 {                                                                       \
    switch (HANDLE_GET_KIND(a)) {                                        \
@@ -996,6 +999,8 @@
     int          idx_of_first_lpid;
     MPID_Group_pmap_t *lrank_to_lpid; /* Array mapping a local rank to local 
 					 process number */
+    int          is_local_dense_monotonic; /* see NOTE-G1 */
+
     /* We may want some additional data for the RMA syncrhonization calls */
   /* Other, device-specific information */
 #ifdef MPID_DEV_GROUP_DECL
@@ -1003,6 +1008,18 @@
 #endif
 } MPID_Group;
 
+/* NOTE-G1: is_local_dense_monotonic will be true iff the group meets the
+ * following criteria:
+ * 1) the lpids are all in the range [0,size-1], i.e. a subset of comm world
+ * 2) the pids are sequentially numbered in increasing order, without any gaps,
+ *    stride, or repetitions
+ *
+ * This additional information allows us to handle the common case (insofar as
+ * group ops are common) for MPI_Group_translate_ranks where group2 is
+ * group_of(MPI_COMM_WORLD), or some simple subset.  This is an important use
+ * case for many MPI tool libraries, such as Scalasca.
+ */
+
 extern MPIU_Object_alloc_t MPID_Group_mem;
 /* Preallocated group objects */
 #define MPID_GROUP_N_BUILTIN 1
@@ -1256,6 +1273,7 @@
    the device may need to create a new contextid */
 int MPIR_Get_contextid( MPID_Comm *, MPIR_Context_id_t *context_id );
 int MPIR_Get_contextid_sparse(MPID_Comm *comm_ptr, MPIR_Context_id_t *context_id, int ignore_id);
+void MPIR_Free_contextid( MPIR_Context_id_t );
 
 /* ------------------------------------------------------------------------- */
 
@@ -1479,7 +1497,9 @@
     MPID_Attribute *attributes;
     MPID_Group *start_group_ptr; /* group passed in MPI_Win_start */
     int start_assert;            /* assert passed to MPI_Win_start */
-    MPI_Comm    comm;         /* communicator of window (dup) */
+    MPID_Comm *comm_ptr;         /* Pointer to comm of window (dup) */
+    int         myrank;          /* Rank of this process in comm (used to 
+				    detect operations on self) */
 #ifdef USE_THREADED_WINDOW_CODE
     /* These were causing compilation errors.  We need to figure out how to
        integrate threads into MPICH2 before including these fields. */
@@ -1900,6 +1920,9 @@
 #include "mpierror.h"
 #include "mpierrs.h"
 
+/* Definitions for instrumentation (currently used within RMA code) */
+#include "mpiinstr.h"
+
 /* FIXME: This routine is only used within mpi/src/err/errutil.c and 
    smpd.  We may not want to export it.  */
 void MPIR_Err_print_stack(FILE *, int);
@@ -3140,32 +3163,7 @@
    file (mpiimpl.h). */
 #include "mpidpost.h"
 
-/* ------------------------------------------------------------------------- */
-/* FIXME: Also for mpicoll.h, in src/mpi/coll?  */
-/* ------------------------------------------------------------------------- */
-/* thresholds to switch between long and short vector algorithms for
-   collective operations */ 
-/* FIXME: Should there be a way to (a) update/compute these at configure time
-   and (b) provide runtime control?  Should these be MPIR_xxx_DEFAULT 
-   instead? */
-#define MPIR_BCAST_SHORT_MSG          12288
-#define MPIR_BCAST_LONG_MSG           524288
-#define MPIR_BCAST_MIN_PROCS          8
-#define MPIR_REDSCAT_COMMUTATIVE_LONG_MSG 524288
-#define MPIR_REDSCAT_NONCOMMUTATIVE_SHORT_MSG 512
-#define MPIR_ALLGATHER_SHORT_MSG      81920
-#define MPIR_ALLGATHER_LONG_MSG       524288
-#define MPIR_REDUCE_SHORT_MSG         2048
-#define MPIR_ALLREDUCE_SHORT_MSG      2048
-#define MPIR_GATHER_VSMALL_MSG        1024
-#define MPIR_SCATTER_SHORT_MSG        2048  /* for intercommunicator scatter */
-#define MPIR_GATHER_SHORT_MSG         2048  /* for intercommunicator scatter */
-#define MPIR_GATHERV_MIN_PROCS        32
-
-/* For pipelined collectives */
-#define MPIR_ALLGATHERV_PIPELINE_MSGSIZE   32768
-
-/* TODO convert all cut-over constants above to parameters */
+/* tunable parameter values */
 #include "mpich_param_vals.h"
 
 /* Tags for point to point operations which implement collective and other

Modified: mpich2/branches/release/mpich2-1.3.x/src/include/mpiimplthread.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/include/mpiimplthread.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/include/mpiimplthread.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -458,7 +458,7 @@
     }                                           \
     MPIU_THREAD_CS_EXIT(INITFLAG,)
 
-#elif !defined(MPICH_IS_THREADED)
+#elif !defined(MPID_DEVICE_DEFINES_THREAD_CS) /* && !defined(MPICH_IS_THREADED) */
 
 /* These provide a uniform way to perform a first-use initialization
    in a thread-safe way.  See the web page or mpidtime.c for the generic
@@ -475,7 +475,7 @@
 #define MPIU_THREADSAFE_INIT_CLEAR(_var) _var=0
 #define MPIU_THREADSAFE_INIT_BLOCK_END(_var)
 
-#endif  /* MPICH_IS_THREADED */
+#endif
 
 /* Helper definitions for the default macro definitions */
 #if defined(MPICH_IS_THREADED) && !defined(MPID_DEVICE_DEFINES_THREAD_CS)
@@ -887,7 +887,7 @@
 #error Unrecognized thread granularity
 #endif
 
-#elif !defined(MPICH_IS_THREAED)
+#elif !defined(MPID_DEVICE_DEFINES_THREAD_CS) /* && !defined(MPICH_IS_THREADED) */
 #define MPIU_THREAD_CS_INIT
 #define MPIU_THREAD_CS_FINALIZE
 #define MPIU_THREAD_CS_ENTER(_name,_context)
@@ -928,6 +928,8 @@
 
 # elif MPIU_THREAD_GRANULARITY == MPIU_THREAD_GRANULARITY_PER_OBJECT
 
+#include "opa_primitives.h"
+
 typedef OPA_int_t MPID_cc_t;
 
 /* implies no barrier, since this routine should only be used for request

Copied: mpich2/branches/release/mpich2-1.3.x/src/include/mpiinstr.h (from rev 7416, mpich2/trunk/src/include/mpiinstr.h)
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/include/mpiinstr.h	                        (rev 0)
+++ mpich2/branches/release/mpich2-1.3.x/src/include/mpiinstr.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -0,0 +1,82 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*  
+ *  (C) 2010 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#ifndef MPIINSTR_H_INCLUDED
+#define MPIINSTR_H_INCLUDED
+
+#ifdef USE_MPIU_INSTR
+
+#define MPIU_INSTR_TYPE_DURATION 1
+
+typedef struct MPIU_INSTR_Generic_t {
+    int instrType;
+    void *next;
+    int count;
+    const char *desc;
+    int (*toStr)( char *buf, size_t maxlen, void *handlePtr );
+} MPIU_INSTR_Generic_t;
+
+#define MPIU_INSTR_MAX_DATA 8
+typedef struct MPIU_INSTR_Duration_count_t {
+    int         instrType;
+    void       *next;
+    int         count;          /* Number of times in duration */
+    const char *desc;           /* Character string describing duration */
+    int (*toStr)( char *buf, size_t maxlen, void *handlePtr );
+    MPID_Time_t ttime,          /* Time in duration */
+                curstart;       /* Time of entry into current duration */
+    int nitems;                 /* Number of items in data */
+    int data[MPIU_INSTR_MAX_DATA]; /* Used to hold additional data */
+    } MPIU_INSTR_Duration_count;
+
+/* Prototypes for visible routines */
+int MPIU_INSTR_AddHandle( void * );
+int MPIU_INSTR_ToStr_Duration_Count( char *, size_t, void * );
+
+/* Definitions for including instrumentation in files*/
+
+#define MPIU_INSTR_DURATION_DECL(name_) \
+    struct MPIU_INSTR_Duration_count_t MPIU_INSTR_HANDLE_##name_ = { 0 };
+#define MPIU_INSTR_DURATION_EXTERN_DECL(name_) \
+    extern struct MPIU_INSTR_Duration_count_t MPIU_INSTR_HANDLE_##name_;
+/* FIXME: Need a generic way to zero the time */
+#define MPIU_INSTR_DURATION_INIT(name_,nitems_,desc_)	\
+    MPIU_INSTR_HANDLE_##name_.count = 0; \
+    MPIU_INSTR_HANDLE_##name_.desc = (const char *)MPIU_Strdup( desc_ ); \
+    memset( &MPIU_INSTR_HANDLE_##name_.ttime,0,sizeof(MPID_Time_t));\
+    MPIU_INSTR_HANDLE_##name_.toStr = MPIU_INSTR_ToStr_Duration_Count;\
+    MPIU_INSTR_HANDLE_##name_.nitems = nitems_;\
+    memset( MPIU_INSTR_HANDLE_##name_.data,0,MPIU_INSTR_MAX_DATA*sizeof(int));\
+    MPIU_INSTR_AddHandle( &MPIU_INSTR_HANDLE_##name_ );
+#define MPIU_INSTR_DURATION_START(name_) \
+    MPID_Wtime( &MPIU_INSTR_HANDLE_##name_.curstart )
+#define MPIU_INSTR_DURATION_END(name_) \
+    do { \
+    MPID_Time_t curend; MPID_Wtime( &curend );\
+    MPID_Wtime_acc( &MPIU_INSTR_HANDLE_##name_.curstart, \
+    		    &curend, \
+		    &MPIU_INSTR_HANDLE_##name_.ttime );\
+    MPIU_INSTR_HANDLE_##name_.count++; } while(0)
+
+#define MPIU_INSTR_DURATION_INCR(name_,idx_,incr_) \
+    MPIU_INSTR_HANDLE_##name_.data[idx_] += incr_;
+#define MPIU_INSTR_DURATION_MAX(name_,idx_,incr_) \
+    MPIU_INSTR_HANDLE_##name_.data[idx_] = \
+	incr_ > MPIU_INSTR_HANDLE_##name_.data[idx_] ? \
+	incr_ : MPIU_INSTR_HANDLE_##name_.data[idx_];
+
+#else
+/* Define null versions of macros (these are empty statements) */
+#define MPIU_INSTR_DURATION_DECL(name_)
+#define MPIU_INSTR_DURATION_EXTERN_DECL(name_)
+#define MPIU_INSTR_DURATION_INIT(name_,nitems_,desc_)
+#define MPIU_INSTR_DURATION_START(name_)
+#define MPIU_INSTR_DURATION_END(name_)
+#define MPIU_INSTR_DURATION_INCR(name_,idx_,incr_)
+#define MPIU_INSTR_DURATION_MAX(name_,idx_,incr_)
+
+#endif /* USE_MPIU_INSTR */ 
+
+#endif

Modified: mpich2/branches/release/mpich2-1.3.x/src/include/mpiu_ex.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/include/mpiu_ex.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/include/mpiu_ex.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -306,7 +306,31 @@
     pOverlapped->pfnFailure = pfnFailure;
 }
 
+/*
+    MPIU_ExReInitOverlapped
 
+    Re-Initialize the success & failure callback function fields
+    The hEvent field of the OVERLAPPED is reset since it will
+	be reused
+
+	Returns TRUE if Re-init succeeds, FALSE otherwise
+*/
+
+static
+inline
+BOOL
+MPIU_ExReInitOverlapped(
+    MPIU_EXOVERLAPPED* pOverlapped,
+    MPIU_ExCompletionRoutine pfnSuccess,
+    MPIU_ExCompletionRoutine pfnFailure
+    )
+{
+    pOverlapped->pfnSuccess = pfnSuccess;
+    pOverlapped->pfnFailure = pfnFailure;
+
+    return(ResetEvent(pOverlapped->ov.hEvent));
+}
+
 /*
     MPIU_ExPostOverlapped
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/include/mpiutil.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/include/mpiutil.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/include/mpiutil.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -172,4 +172,27 @@
 #define MPIU_UNIQUE_IMPL2_(prefix_,line_) MPIU_UNIQUE_IMPL3_(prefix_,line_)
 #define MPIU_UNIQUE_IMPL3_(prefix_,line_) prefix_##line_
 
+/* These likely/unlikely macros provide static branch prediction hints to the
+ * compiler, if such hints are available.  Simply wrap the relevant expression in
+ * the macro, like this:
+ *
+ * if (unlikely(ptr == NULL)) {
+ *     // ... some unlikely code path ...
+ * }
+ *
+ * They should be used sparingly, especially in upper-level code.  It's easy to
+ * incorrectly estimate branching likelihood, while the compiler can often do a
+ * decent job if left to its own devices.
+ *
+ * These macros are not namespaced because the namespacing is cumbersome.
+ */
+/* safety guard for now, add a configure check in the future */
+#if defined(__GNUC__) && (__GNUC__ >= 3)
+#  define unlikely(x_) __builtin_expect(!!(x_),0)
+#  define likely(x_)   __builtin_expect(!!(x_),1)
+#else
+#  define unlikely(x_) (x_)
+#  define likely(x_)   (x_)
+#endif
+
 #endif /* !defined(MPIUTIL_H_INCLUDED) */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgather.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgather.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgather.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -119,7 +119,7 @@
     MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );
 
     tot_bytes = (MPI_Aint)recvcount * comm_size * type_size;
-    if ((tot_bytes < MPIR_ALLGATHER_LONG_MSG) && !(comm_size & (comm_size - 1))) {
+    if ((tot_bytes < MPIR_PARAM_ALLGATHER_LONG_MSG_SIZE) && !(comm_size & (comm_size - 1))) {
 
         /* Short or medium size message and power-of-two no. of processes. Use
          * recursive doubling algorithm */   
@@ -420,7 +420,7 @@
 #endif /* MPID_HAS_HETERO */
     }
 
-    else if (tot_bytes < MPIR_ALLGATHER_SHORT_MSG) {
+    else if (tot_bytes < MPIR_PARAM_ALLGATHER_SHORT_MSG_SIZE) {
         /* Short message and non-power-of-two no. of processes. Use
          * Bruck algorithm (see description above). */
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgatherv.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgatherv.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allgatherv.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -110,7 +110,7 @@
     MPID_Datatype_get_extent_macro( recvtype, recvtype_extent );
     MPID_Datatype_get_size_macro(recvtype, recvtype_size);
 
-    if ((total_count*recvtype_size < MPIR_ALLGATHER_LONG_MSG) &&
+    if ((total_count*recvtype_size < MPIR_PARAM_ALLGATHER_LONG_MSG_SIZE) &&
         !(comm_size & (comm_size - 1))) {
         /* Short or medium size message and power-of-two no. of processes. Use
          * recursive doubling algorithm */   
@@ -477,7 +477,7 @@
 
     }
 
-    else if (total_count*recvtype_size < MPIR_ALLGATHER_SHORT_MSG) {
+    else if (total_count*recvtype_size < MPIR_PARAM_ALLGATHER_SHORT_MSG_SIZE) {
         /* Short message and non-power-of-two no. of processes. Use
          * Bruck algorithm (see description above). */
  
@@ -601,8 +601,8 @@
 	for (i = 1; i < comm_size; i++)
 	    if (min > recvcounts[i])
                 min = recvcounts[i];
-	if (min * recvtype_extent < MPIR_ALLGATHERV_PIPELINE_MSGSIZE)
-	    min = MPIR_ALLGATHERV_PIPELINE_MSGSIZE / recvtype_extent;
+	if (min * recvtype_extent < MPIR_PARAM_ALLGATHERV_PIPELINE_MSG_SIZE)
+	    min = MPIR_PARAM_ALLGATHERV_PIPELINE_MSG_SIZE / recvtype_extent;
         /* Handle the case where the datatype extent is larger than
          * the pipeline size. */
         if (!min)

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allreduce.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allreduce.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/allreduce.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -22,6 +22,8 @@
 #undef MPI_Allreduce
 #define MPI_Allreduce PMPI_Allreduce
 
+/* The order of entries in this table must match the definitions in 
+   mpi.h.in */
 MPI_User_function *MPIR_Op_table[] = { MPIR_MAXF, MPIR_MINF, MPIR_SUM,
                                        MPIR_PROD, MPIR_LAND,
                                        MPIR_BAND, MPIR_LOR, MPIR_BOR,
@@ -342,7 +344,7 @@
            using recursive doubling in that case.) */
 
         if (newrank != -1) {
-            if ((count*type_size <= MPIR_ALLREDUCE_SHORT_MSG) ||
+            if ((count*type_size <= MPIR_PARAM_ALLREDUCE_SHORT_MSG_SIZE) ||
                 (HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN) ||  
                 (count < pof2)) { /* use recursive doubling */
                 mask = 0x1;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/bcast.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/bcast.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/bcast.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -814,7 +814,7 @@
 
     nbytes = type_size * count;
 
-    if ((nbytes < MPIR_BCAST_SHORT_MSG) || (comm_ptr->local_size < MPIR_BCAST_MIN_PROCS))
+    if ((nbytes < MPIR_PARAM_BCAST_SHORT_MSG_SIZE) || (comm_ptr->local_size < MPIR_PARAM_BCAST_MIN_PROCS))
     {
         /* send to intranode-rank 0 on the root's node */
         if (comm_ptr->node_comm != NULL &&
@@ -847,12 +847,12 @@
                                       buffer, count, datatype, 0, comm_ptr->node_comm);
         }
     }
-    else /* (nbytes > MPIR_BCAST_SHORT_MSG) && (comm_ptr->size >= MPIR_BCAST_MIN_PROCS) */
+    else /* (nbytes > MPIR_PARAM_BCAST_SHORT_MSG_SIZE) && (comm_ptr->size >= MPIR_PARAM_BCAST_MIN_PROCS) */
     {
         /* supposedly...
            smp+doubling good for pof2
            reg+ring better for non-pof2 */
-        if (nbytes < MPIR_BCAST_LONG_MSG && MPIU_is_pof2(comm_ptr->local_size, NULL))
+        if (nbytes < MPIR_PARAM_BCAST_LONG_MSG_SIZE && MPIU_is_pof2(comm_ptr->local_size, NULL))
         {
             /* medium-sized msg and pof2 np */
 
@@ -1015,19 +1015,19 @@
 
     nbytes = type_size * count;
 
-    if ((nbytes < MPIR_BCAST_SHORT_MSG) || (comm_size < MPIR_BCAST_MIN_PROCS))
+    if ((nbytes < MPIR_PARAM_BCAST_SHORT_MSG_SIZE) || (comm_size < MPIR_PARAM_BCAST_MIN_PROCS))
     {
         mpi_errno = MPIR_Bcast_binomial(buffer, count, datatype, root, comm_ptr);
         if (mpi_errno) MPIU_ERR_POP(mpi_errno);
     }
-    else /* (nbytes >= MPIR_BCAST_SHORT_MSG) && (comm_size >= MPIR_BCAST_MIN_PROCS) */
+    else /* (nbytes >= MPIR_PARAM_BCAST_SHORT_MSG_SIZE) && (comm_size >= MPIR_PARAM_BCAST_MIN_PROCS) */
     {
-        if ((nbytes < MPIR_BCAST_LONG_MSG) && (MPIU_is_pof2(comm_size, NULL)))
+        if ((nbytes < MPIR_PARAM_BCAST_LONG_MSG_SIZE) && (MPIU_is_pof2(comm_size, NULL)))
         {
             mpi_errno = MPIR_Bcast_scatter_doubling_allgather(buffer, count, datatype, root, comm_ptr);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
         }
-        else /* (nbytes >= MPIR_BCAST_LONG_MSG) || !(comm_size_is_pof2) */
+        else /* (nbytes >= MPIR_PARAM_BCAST_LONG_MSG_SIZE) || !(comm_size_is_pof2) */
         {
             /* We want the ring algorithm whether or not we have a
                topologically aware communicator.  Doing inter/intra-node

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gather.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gather.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gather.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -136,12 +136,12 @@
 
 	/* If the message is smaller than the threshold, we will copy
 	 * our message in there too */
-	if (nbytes < MPIR_GATHER_VSMALL_MSG) tmp_buf_size++;
+	if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) tmp_buf_size++;
 
 	tmp_buf_size *= nbytes;
 
 	/* For zero-ranked root, we don't need any temporary buffer */
-	if ((rank == root) && (!root || (nbytes >= MPIR_GATHER_VSMALL_MSG)))
+	if ((rank == root) && (!root || (nbytes >= MPIR_PARAM_GATHER_VSMALL_MSG_SIZE)))
 	    tmp_buf_size = 0;
 
 	if (tmp_buf_size) {
@@ -157,7 +157,7 @@
 		if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 	    }
         }
-	else if (tmp_buf_size && (nbytes < MPIR_GATHER_VSMALL_MSG))
+	else if (tmp_buf_size && (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE))
 	{
             /* copy from sendbuf into tmp_buf */
             mpi_errno = MPIR_Localcopy(sendbuf, sendcnt, sendtype,
@@ -196,7 +196,7 @@
 						  &status);
                             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 			}
-			else if (nbytes < MPIR_GATHER_VSMALL_MSG) {
+			else if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) {
 			    mpi_errno = MPIC_Recv(tmp_buf, recvblks * nbytes, MPI_BYTE,
 						  src, MPIR_GATHER_TAG, comm, &status);
                             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -235,7 +235,7 @@
 			if (relative_src + mask > comm_size)
 			    recvblks -= (relative_src + mask - comm_size);
 
-			if (nbytes < MPIR_GATHER_VSMALL_MSG)
+			if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE)
 			    offset = mask * nbytes;
 			else
 			    offset = (mask - 1) * nbytes;
@@ -260,7 +260,7 @@
                                           MPIR_GATHER_TAG, comm);
                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
                 }
-                else if (nbytes < MPIR_GATHER_VSMALL_MSG) {
+                else if (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) {
 		    mpi_errno = MPIC_Send(tmp_buf, curr_cnt, MPI_BYTE, dst,
 					  MPIR_GATHER_TAG, comm);
                     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -293,7 +293,7 @@
             mask <<= 1;
         }
 
-        if ((rank == root) && root && (nbytes < MPIR_GATHER_VSMALL_MSG) && copy_blks)
+        if ((rank == root) && root && (nbytes < MPIR_PARAM_GATHER_VSMALL_MSG_SIZE) && copy_blks)
 	{
             /* reorder and copy from tmp_buf into recvbuf */
 	    mpi_errno = MPIR_Localcopy(tmp_buf,
@@ -470,7 +470,7 @@
         nbytes = sendtype_size * sendcnt * local_size;
     }
 
-    if (nbytes < MPIR_GATHER_SHORT_MSG)
+    if (nbytes < MPIR_PARAM_GATHER_INTER_SHORT_MSG_SIZE)
     {
         if (root == MPI_ROOT)
 	{

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gatherv.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gatherv.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/gatherv.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -65,7 +65,6 @@
     MPI_Aint       extent;
     int            i, reqs;
     int min_procs;
-    char *min_procs_str;
     MPI_Request *reqarray;
     MPI_Status *starray;
     MPIU_CHKLMEM_DECL(2);
@@ -135,20 +134,11 @@
                irrelevant here. */
             comm_size = comm_ptr->local_size;
 
-	    /* FIXME:  Do not use getenv, particularly each time the
-               routine is called.  Instead, use the parameter routines */
-            min_procs_str = getenv("MPICH2_GATHERV_MIN_PROCS");
-            /* FIXME: atoi does not indicate any errors and should not be
-               used unless there is a separate test for correctness */
-            if (min_procs_str != NULL)
-                min_procs = atoi(min_procs_str);
-            else
-                min_procs = comm_size + 1; /* Disable ssend if env not set */
-
+            min_procs = MPIR_PARAM_GATHERV_INTER_SSEND_MIN_PROCS;
             if (min_procs == -1)
                 min_procs = comm_size + 1; /* Disable ssend */
-            else if (min_procs == 0)
-                min_procs = MPIR_GATHERV_MIN_PROCS; /* Use the default value */
+            else if (min_procs == 0) /* backwards compatibility, use default value */
+                MPIR_PARAM_GET_DEFAULT_INT(GATHERV_INTER_SSEND_MIN_PROCS,&min_procs);
 
             if (comm_size >= min_procs) {
                 mpi_errno = MPIC_Ssend(sendbuf, sendcnt, sendtype, root, 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -367,7 +367,7 @@
      * a user-passed in buffer */
     MPID_Ensure_Aint_fits_in_pointer(total_count * MPIR_MAX(true_extent, extent));
 
-    if ((is_commutative) && (nbytes < MPIR_REDSCAT_COMMUTATIVE_LONG_MSG)) {
+    if ((is_commutative) && (nbytes < MPIR_PARAM_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
         /* commutative and short. use recursive halving algorithm */
 
         /* allocate temp. buffer to receive incoming data */
@@ -582,7 +582,7 @@
         }
     }
     
-    if (is_commutative && (nbytes >= MPIR_REDSCAT_COMMUTATIVE_LONG_MSG)) {
+    if (is_commutative && (nbytes >= MPIR_PARAM_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
 
         /* commutative and long message, or noncommutative and long message.
            use (p-1) pairwise exchanges */ 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat_block.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat_block.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/red_scat_block.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -370,7 +370,7 @@
      * a user-passed in buffer */
     MPID_Ensure_Aint_fits_in_pointer(total_count * MPIR_MAX(true_extent, extent));
 
-    if ((is_commutative) && (nbytes < MPIR_REDSCAT_COMMUTATIVE_LONG_MSG)) {
+    if ((is_commutative) && (nbytes < MPIR_PARAM_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
         /* commutative and short. use recursive halving algorithm */
 
         /* allocate temp. buffer to receive incoming data */
@@ -577,7 +577,7 @@
         }
     }
     
-    if (is_commutative && (nbytes >= MPIR_REDSCAT_COMMUTATIVE_LONG_MSG)) {
+    if (is_commutative && (nbytes >= MPIR_PARAM_REDSCAT_COMMUTATIVE_LONG_MSG_SIZE)) {
 
         /* commutative and long message, or noncommutative and long message.
            use (p-1) pairwise exchanges */ 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/reduce.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/reduce.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/reduce.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -838,7 +838,7 @@
     while (pof2 <= comm_size) pof2 <<= 1;
     pof2 >>=1;
 
-    if ((count*type_size > MPIR_REDUCE_SHORT_MSG) &&
+    if ((count*type_size > MPIR_PARAM_REDUCE_SHORT_MSG_SIZE) &&
         (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) && (count >= pof2)) {
         /* do a reduce-scatter followed by gather to root. */
         mpi_errno = MPIR_Reduce_redscat_gather(sendbuf, recvbuf, count, datatype, op, root, comm_ptr);

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/scatter.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/scatter.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/coll/scatter.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -424,7 +424,7 @@
         nbytes = recvtype_size * recvcnt * local_size;
     }
 
-    if (nbytes < MPIR_SCATTER_SHORT_MSG) {
+    if (nbytes < MPIR_PARAM_SCATTER_INTER_SHORT_MSG_SIZE) {
         if (root == MPI_ROOT) {
             /* root sends all data to rank 0 on remote group and returns */
             mpi_errno = MPIC_Send(sendbuf, sendcnt*remote_size,

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_group.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_group.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_group.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -33,6 +33,7 @@
     int mpi_errno = MPI_SUCCESS;
     MPID_VCR   *local_vcr;
     int i, lpid, n;
+    int comm_world_size = MPIR_Process.comm_world->local_size;
 
     /* Create a group if necessary and populate it with the
        local process ids */
@@ -47,12 +48,19 @@
 	}
 	else
 	    local_vcr = comm_ptr->vcr;
-	
+
+        (*group_ptr)->is_local_dense_monotonic = TRUE;
 	for (i=0; i<n; i++) {
 	    (void) MPID_VCR_Get_lpid( local_vcr[i], &lpid );
 	    (*group_ptr)->lrank_to_lpid[i].lrank = i;
 	    (*group_ptr)->lrank_to_lpid[i].lpid  = lpid;
+            if (lpid > comm_world_size ||
+                (i > 0 && (*group_ptr)->lrank_to_lpid[i-1].lpid != (lpid-1)))
+            {
+                (*group_ptr)->is_local_dense_monotonic = FALSE;
+            }
 	}
+
 	(*group_ptr)->size		 = n;
         (*group_ptr)->rank		 = comm_ptr->rank;
         (*group_ptr)->idx_of_first_lpid = -1;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_remote_group.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_remote_group.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/comm_remote_group.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -43,6 +43,7 @@
             (*group_ptr)->lrank_to_lpid[i].lrank = i;
             (void) MPID_VCR_Get_lpid( comm_ptr->vcr[i], &lpid );
             (*group_ptr)->lrank_to_lpid[i].lpid  = lpid;
+            /* TODO calculate is_local_dense_monotonic */
         }
         (*group_ptr)->size = n;
         (*group_ptr)->rank = MPI_UNDEFINED;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/mpicomm.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/mpicomm.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/comm/mpicomm.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -5,9 +5,9 @@
  */
 
 /* Function prototypes for communicator helper functions */
-/* The MPIR_Get_contextid routine is in mpiimpl.h so that the device 
-   may use it */
+/* The MPIR_Get_contextid and void MPIR_Free_contextid routines are in
+   mpiimpl.h so that the device may use them */
 /* int MPIR_Get_contextid( MPID_Comm *, MPIR_Context_id_t * ); */
 int MPIR_Get_intercomm_contextid( MPID_Comm *, MPIR_Context_id_t *, MPIR_Context_id_t * );
-void MPIR_Free_contextid( MPIR_Context_id_t );
+/* void MPIR_Free_contextid( MPIR_Context_id_t ); */
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/debugger/dbginit.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/debugger/dbginit.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/debugger/dbginit.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -365,7 +365,7 @@
     p->context_id = context_id;
     p->next       = MPIR_Sendq_head;
     MPIR_Sendq_head = p;
-    MPIU_THREAD_CS_EXIT(HANDLE,req)
+    MPIU_THREAD_CS_EXIT(HANDLE,req);
 }
 
 void MPIR_Sendq_forget( MPID_Request *req )

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/errhan/errnames.txt
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/errhan/errnames.txt	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/errhan/errnames.txt	2010-11-06 15:02:44 UTC (rev 7418)
@@ -280,6 +280,7 @@
 **fileamoderead:Cannot use MPI_MODE_CREATE or MPI_MODE_EXCL with \
  MPI_MODE_RDONLY 
 **fileamodeseq:Cannot specify MPI_MODE_SEQUENTIAL with MPI_MODE_RDWR
+**fileamodediff:amode must be the same on all processors
 **filename:Invalid file name
 **filename %s:Invalid file name %s
 **filenamelong:Pathname too long

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_difference.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_difference.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_difference.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -160,6 +160,7 @@
 		k++;
 	    }
 	}
+        /* TODO calculate is_local_dense_monotonic */
     }
 
     MPIU_OBJ_PUBLISH_HANDLE(*newgroup, new_group_ptr->handle);

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_excl.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_excl.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_excl.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -144,6 +144,7 @@
 
     new_group_ptr->size = size - n;
     new_group_ptr->idx_of_first_lpid = -1;
+    /* TODO calculate is_local_dense_monotonic */
 
     MPIU_OBJ_PUBLISH_HANDLE(*newgroup, new_group_ptr->handle);
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_incl.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_incl.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_incl.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -125,6 +125,7 @@
     }
     new_group_ptr->size = n;
     new_group_ptr->idx_of_first_lpid = -1;
+    /* TODO calculate is_local_dense_monotonic */
 
     MPIU_OBJ_PUBLISH_HANDLE(*newgroup, new_group_ptr->handle);
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_intersection.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_intersection.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_intersection.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -148,14 +148,21 @@
     }
     /* --END ERROR HANDLING-- */
     new_group_ptr->rank = MPI_UNDEFINED;
+    new_group_ptr->is_local_dense_monotonic = TRUE;
     k = 0;
     for (i=0; i<size1; i++) {
 	if (group_ptr1->lrank_to_lpid[i].flag) {
+            int lpid = group_ptr1->lrank_to_lpid[i].lpid;
 	    new_group_ptr->lrank_to_lpid[k].lrank = k;
-	    new_group_ptr->lrank_to_lpid[k].lpid = 
-		group_ptr1->lrank_to_lpid[i].lpid;
+	    new_group_ptr->lrank_to_lpid[k].lpid = lpid;
 	    if (i == group_ptr1->rank) 
 		new_group_ptr->rank = k;
+            if (lpid > MPIR_Process.comm_world->local_size ||
+                (k > 0 && new_group_ptr->lrank_to_lpid[k-1].lpid != (lpid-1)))
+            {
+                new_group_ptr->is_local_dense_monotonic = FALSE;
+            }
+
 	    k++;
 	}
     }

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_excl.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_excl.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_excl.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -180,6 +180,8 @@
 	}
     }
 
+    /* TODO calculate is_local_dense_monotonic */
+
     MPIU_OBJ_PUBLISH_HANDLE(*newgroup, new_group_ptr->handle);
 
     /* ... end of body of routine ... */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_incl.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_incl.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_range_incl.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -153,6 +153,8 @@
 	    }
 	}
     }
+
+    /* TODO calculate is_local_dense_monotonic */
     MPIU_OBJ_PUBLISH_HANDLE(*newgroup, new_group_ptr->handle);
 
     /* ... end of body of routine ... */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_translate_ranks.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_translate_ranks.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_translate_ranks.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -28,51 +28,73 @@
 #define FUNCNAME MPIR_Group_translate_ranks_impl
 #undef FCNAME
 #define FCNAME MPIU_QUOTE(FUNCNAME)
-void MPIR_Group_translate_ranks_impl(MPID_Group *group_ptr1, int n, int *ranks1,
-                                    MPID_Group *group_ptr2, int *ranks2)
+void MPIR_Group_translate_ranks_impl(MPID_Group *gp1, int n, int *ranks1,
+                                     MPID_Group *gp2, int *ranks2)
 {
     int i, g2_idx, l1_pid, l2_pid;
-    
+
+    MPIU_DBG_MSG_S(OTHER,VERBOSE,"gp2->is_local_dense_monotonic=%s\n", (gp2->is_local_dense_monotonic ? "TRUE" : "FALSE"));
+
     /* Initialize the output ranks */
     for (i=0; i<n; i++)
-	ranks2[i] = MPI_UNDEFINED;
+        ranks2[i] = MPI_UNDEFINED;
 
-    /* We may want to optimize for the special case of group2 is 
-       a dup of MPI_COMM_WORLD, or more generally, has rank == lpid 
-       for everything within the size of group2.  NOT DONE YET */
-    g2_idx = group_ptr2->idx_of_first_lpid;
-    if (g2_idx < 0) {
-	MPIR_Group_setup_lpid_list( group_ptr2 );
-	g2_idx = group_ptr2->idx_of_first_lpid;
+    if (gp2->size > 0 && gp2->is_local_dense_monotonic) {
+        /* g2 probably == group_of(MPI_COMM_WORLD); use fast, constant-time lookup */
+        int lpid_offset = gp2->lrank_to_lpid[0].lpid;
+
+        MPIU_Assert(lpid_offset >= 0);
+        for (i = 0; i < n; ++i) {
+            int g1_lpid;
+
+            if (ranks1[i] == MPI_PROC_NULL) {
+                ranks2[i] = MPI_PROC_NULL;
+                continue;
+            }
+            /* "adjusted" lpid from g1 */
+            g1_lpid = gp1->lrank_to_lpid[ranks1[i]].lpid - lpid_offset;
+            if ((g1_lpid >= 0) && (g1_lpid < gp2->size)) {
+                ranks2[i] = g1_lpid;
+            }
+            /* else leave UNDEFINED */
+        }
     }
-    if (g2_idx >= 0) {
-	/* g2_idx can be < 0 if the g2 group is empty */
-	l2_pid = group_ptr2->lrank_to_lpid[g2_idx].lpid;
-	for (i=0; i<n; i++) {
-	    if (ranks1[i] == MPI_PROC_NULL) {
-		ranks2[i] = MPI_PROC_NULL;
-		continue;
-	    }
-	    l1_pid = group_ptr1->lrank_to_lpid[ranks1[i]].lpid;
-	    /* Search for this l1_pid in group2.  Use the following
-	       optimization: start from the last position in the lpid list
-	       if possible.  A more sophisticated version could use a 
-	       tree based or even hashed search to speed the translation. */
-	    if (l1_pid < l2_pid || g2_idx < 0) {
-		/* Start over from the beginning */
-		g2_idx = group_ptr2->idx_of_first_lpid;
-		l2_pid = group_ptr2->lrank_to_lpid[g2_idx].lpid;
-	    }
-	    while (g2_idx >= 0 && l1_pid > l2_pid) {
-		g2_idx = group_ptr2->lrank_to_lpid[g2_idx].next_lpid;
-		if (g2_idx >= 0)
-		    l2_pid = group_ptr2->lrank_to_lpid[g2_idx].lpid;
-		else
-		    l2_pid = -1;
+    else {
+        /* general, slow path; lookup time is dependent on the user-provided rank values! */
+        g2_idx = gp2->idx_of_first_lpid;
+        if (g2_idx < 0) {
+            MPIR_Group_setup_lpid_list( gp2 );
+            g2_idx = gp2->idx_of_first_lpid;
+        }
+        if (g2_idx >= 0) {
+            /* g2_idx can be < 0 if the g2 group is empty */
+            l2_pid = gp2->lrank_to_lpid[g2_idx].lpid;
+            for (i=0; i<n; i++) {
+                if (ranks1[i] == MPI_PROC_NULL) {
+                    ranks2[i] = MPI_PROC_NULL;
+                    continue;
+                }
+                l1_pid = gp1->lrank_to_lpid[ranks1[i]].lpid;
+                /* Search for this l1_pid in group2.  Use the following
+                   optimization: start from the last position in the lpid list
+                   if possible.  A more sophisticated version could use a 
+                   tree based or even hashed search to speed the translation. */
+                if (l1_pid < l2_pid || g2_idx < 0) {
+                    /* Start over from the beginning */
+                    g2_idx = gp2->idx_of_first_lpid;
+                    l2_pid = gp2->lrank_to_lpid[g2_idx].lpid;
+                }
+                while (g2_idx >= 0 && l1_pid > l2_pid) {
+                    g2_idx = gp2->lrank_to_lpid[g2_idx].next_lpid;
+                    if (g2_idx >= 0)
+                        l2_pid = gp2->lrank_to_lpid[g2_idx].lpid;
+                    else
+                        l2_pid = -1;
+                }
+                if (l1_pid == l2_pid)
+                    ranks2[i] = gp2->lrank_to_lpid[g2_idx].lrank;
             }
-	    if (l1_pid == l2_pid)
-		ranks2[i] = group_ptr2->lrank_to_lpid[g2_idx].lrank;
-	}
+        }
     }
 }
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_union.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_union.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/group_union.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -191,6 +191,9 @@
 	    k++;
 	}
     }
+
+    /* TODO calculate is_local_dense_monotonic */
+
     MPIU_OBJ_PUBLISH_HANDLE(*newgroup, new_group_ptr->handle);
 
     /* ... end of body of routine ... */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/group/grouputil.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/group/grouputil.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/group/grouputil.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -86,14 +86,19 @@
     /* Make sure that there is no question that the list of ranks sorted
        by pids is marked as uninitialized */
     (*new_group_ptr)->idx_of_first_lpid = -1;
+
+    (*new_group_ptr)->is_local_dense_monotonic = FALSE;
     return mpi_errno;
 }
 /*
  * return value is the first index in the list
  *
- * This sorts an lpid array by lpid value, using a simple merge sort
+ * This "sorts" an lpid array by lpid value, using a simple merge sort
  * algorithm.
  *
+ * In actuality, it does not reorder the elements of maparray (these must remain
+ * in group rank order).  Instead it builds the traversal order (in increasing
+ * lpid order) through the maparray given by the "next_lpid" fields.
  */
 static int MPIR_Mergesort_lpidarray( MPID_Group_pmap_t maparray[], int n )
 {

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/init/initthread.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/init/initthread.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/init/initthread.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -10,6 +10,9 @@
 #ifdef HAVE_CRTDBG_H
 #include <crtdbg.h>
 #endif
+#ifdef HAVE_USLEEP
+#include <unistd.h>
+#endif
 
 /* -- Begin Profiling Symbol Block for routine MPI_Init_thread */
 #if defined(HAVE_PRAGMA_WEAK)

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/accumulate.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/accumulate.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/accumulate.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -126,7 +126,7 @@
                 MPID_Datatype_committed_ptr(datatype_ptr, mpi_errno);
             }
 
-	    MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
+	    comm_ptr = win_ptr->comm_ptr;
 	    MPIR_ERRTEST_SEND_RANK(comm_ptr, target_rank, mpi_errno);
 
             if (mpi_errno != MPI_SUCCESS) goto fn_fail;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/get.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/get.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/get.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -122,7 +122,7 @@
                 MPID_Datatype_committed_ptr(datatype_ptr, mpi_errno);
             }
 
-	    MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
+	    comm_ptr = win_ptr->comm_ptr;
 	    MPIR_ERRTEST_SEND_RANK(comm_ptr, target_rank, mpi_errno);
 	    
             if (mpi_errno != MPI_SUCCESS) goto fn_fail;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/put.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/put.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/put.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -122,7 +122,7 @@
                 MPID_Datatype_committed_ptr(datatype_ptr, mpi_errno);
             }
 
-	    MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
+	    comm_ptr = win_ptr->comm_ptr;
 	    MPIR_ERRTEST_SEND_RANK(comm_ptr, target_rank, mpi_errno);
 	    
             if (mpi_errno != MPI_SUCCESS) goto fn_fail;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_get_group.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_get_group.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_get_group.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -98,7 +98,7 @@
 #   endif /* HAVE_ERROR_CHECKING */
 
     /* ... body of routine ...  */
-    MPID_Comm_get_ptr( win_ptr->comm, win_comm_ptr );
+    win_comm_ptr = win_ptr->comm_ptr;
 
     mpi_errno = MPIR_Comm_group_impl(win_comm_ptr, &group_ptr);
     if (mpi_errno != MPI_SUCCESS) goto fn_fail;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_lock.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_lock.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_lock.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -117,7 +117,7 @@
 						  MPI_ERR_OTHER, 
 						  "**locktype", 0 );
 
-            MPID_Comm_get_ptr( win_ptr->comm, comm_ptr );
+	    comm_ptr = win_ptr->comm_ptr;
             MPIR_ERRTEST_SEND_RANK(comm_ptr, rank, mpi_errno);
 
             if (mpi_errno) goto fn_fail;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_unlock.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_unlock.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/rma/win_unlock.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -87,7 +87,7 @@
 	    /* If win_ptr is not valid, it will be reset to null */
             if (mpi_errno) goto fn_fail;
 
-            MPID_Comm_get_ptr( win_ptr->comm, comm_ptr );
+	    comm_ptr = win_ptr->comm_ptr;
             MPIR_ERRTEST_SEND_RANK(comm_ptr, rank, mpi_errno);
 
             if (mpi_errno) goto fn_fail;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/ad_pvfs2/ad_pvfs2_common.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/ad_pvfs2/ad_pvfs2_common.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/ad_pvfs2/ad_pvfs2_common.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -108,7 +108,41 @@
 
 int ADIOI_PVFS2_error_convert(int pvfs_error)
 {
-    return MPI_UNDEFINED;
+    switch(pvfs_error)
+    {
+	case PVFS_EPERM:
+	case PVFS_EACCES:
+	    return MPI_ERR_ACCESS;
+	case PVFS_ENOENT:
+	case PVFS_ENXIO:
+	case PVFS_ENODEV:
+	    return MPI_ERR_NO_SUCH_FILE;
+	case PVFS_EIO:
+	    return MPI_ERR_IO;
+	case PVFS_EEXIST:
+	    return MPI_ERR_FILE_EXISTS;
+	case PVFS_ENOTDIR: /* ??? */
+	case PVFS_EISDIR: /* ??? */
+	case PVFS_ENAMETOOLONG:
+	    return MPI_ERR_BAD_FILE;
+	case PVFS_EINVAL:
+	    return MPI_ERR_FILE;
+	case PVFS_EFBIG: /* ??? */
+	case PVFS_ENOSPC:
+	    return MPI_ERR_NO_SPACE;
+	case PVFS_EROFS:
+	    return MPI_ERR_READ_ONLY;
+	case PVFS_ENOSYS:
+	    return MPI_ERR_UNSUPPORTED_OPERATION;
+	    /* PVFS does not support quotas */
+	case EDQUOT:
+	    return MPI_ERR_QUOTA;
+	case PVFS_ENOMEM:
+	    return MPI_ERR_INTERN;
+	default:
+	    return MPI_UNDEFINED;
+    }
+
 }
 
 /* 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_end.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_end.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_end.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -55,6 +55,8 @@
     if( ADIOI_syshints != MPI_INFO_NULL)
 	    MPI_Info_free(&ADIOI_syshints);
 
+    MPI_Op_free(&ADIO_same_amode);
+
     *error_code = MPI_SUCCESS;
 }
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_init.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_init.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/ad_init.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -23,6 +23,8 @@
 
 MPI_Info ADIOI_syshints = MPI_INFO_NULL;
 
+MPI_Op ADIO_same_amode=MPI_OP_NULL;
+
 #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE)
 int ADIOI_Direct_read = 0, ADIOI_Direct_write = 0;
 #endif
@@ -31,6 +33,20 @@
 
 MPI_Errhandler ADIOI_DFLT_ERR_HANDLER = MPI_ERRORS_RETURN;
 
+
+static void my_consensus(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype)
+{
+    int i, *in, *inout;
+    in = (int*)invec;
+    inout = (int*)inoutvec;
+
+    for (i=0; i< *len; i++) {
+        if (in[i] != inout[i])
+	    inout[i] = ADIO_AMODE_NOMATCH;
+    }
+    return;
+}
+
 void ADIO_Init(int *argc, char ***argv, int *error_code)
 {
 #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE)
@@ -111,4 +127,5 @@
 #endif
 
     *error_code = MPI_SUCCESS;
+    MPI_Op_create(my_consensus, 1, &ADIO_same_amode);
 }

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/flatten.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/flatten.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/common/flatten.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -1023,6 +1023,12 @@
     ADIO_Offset *opt_blocklens;
     ADIO_Offset *opt_indices;
 
+    /* short-circuit: there is nothing to do if there are
+     * 	- 1 block:  what can we remove?
+     * 	- 2 blocks: either both blocks are data (and not zero) 
+     * 		or one block is the UB or LB */
+    if (flat_type->count <= 2) return;
+
     opt_blocks = 2; /* LB and UB */
     for (i=1; i < flat_type->count -1; i++) {
         if(flat_type->blocklens[i] != 0)

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -262,6 +262,8 @@
 #define ADIO_APPEND             128
 #define ADIO_SEQUENTIAL         256
 
+#define ADIO_AMODE_NOMATCH  ~(ADIO_CREATE|ADIO_RDONLY|ADIO_WRONLY|ADIO_RDWR|ADIO_DELETE_ON_CLOSE|ADIO_UNIQUE_OPEN|ADIO_EXCL|ADIO_APPEND|ADIO_SEQUENTIAL)
+
 /* file-pointer types */
 #define ADIO_EXPLICIT_OFFSET     100
 #define ADIO_INDIVIDUAL          101

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio_extern.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio_extern.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/adio/include/adio_extern.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -25,3 +25,5 @@
 extern MPI_Errhandler ADIOI_DFLT_ERR_HANDLER;
 
 extern MPI_Info ADIOI_syshints;
+
+extern MPI_Op ADIO_same_amode;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/mpi-io/open.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/mpi-io/open.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpi/romio/mpi-io/open.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -23,6 +23,10 @@
 #include "mpioprof.h"
 #endif
 
+/* for user-definde reduce operator */
+#include "adio_extern.h"
+
+
 extern int ADIO_Init_keyval;
 
 /*@
@@ -42,7 +46,7 @@
 int MPI_File_open(MPI_Comm comm, char *filename, int amode, 
                   MPI_Info info, MPI_File *fh)
 {
-    int error_code, file_system, flag, /* tmp_amode, */rank;
+    int error_code, file_system, flag, tmp_amode=0, rank;
     char *tmp;
     MPI_Comm dupcomm;
     ADIOI_Fns *fsops;
@@ -100,25 +104,24 @@
 					  "**fileamodeseq", 0);
 	goto fn_fail;
     }
-    /* --END ERROR HANDLING-- */
 
-/* check if amode is the same on all processes */
     MPI_Comm_dup(comm, &dupcomm);
 
-/*  
-    Removed this check because broadcast is too expensive. 
-    tmp_amode = amode;
-    MPI_Bcast(&tmp_amode, 1, MPI_INT, 0, dupcomm);
-    if (amode != tmp_amode) {
-	FPRINTF(stderr, "MPI_File_open: amode must be the same on all processes\n");
-	MPI_Abort(MPI_COMM_WORLD, 1);
-    }
-*/
-
 /* check if ADIO has been initialized. If not, initialize it */
     MPIR_MPIOInit(&error_code);
     if (error_code != MPI_SUCCESS) goto fn_fail;
 
+/* check if amode is the same on all processes */
+    MPI_Allreduce(&amode, &tmp_amode, 1, MPI_INT, ADIO_same_amode, dupcomm);
+
+    if (tmp_amode == ADIO_AMODE_NOMATCH) {
+	error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
+			myname, __LINE__, MPI_ERR_AMODE,
+			"**fileamodediff", 0);
+	goto fn_fail;
+    }
+    /* --END ERROR HANDLING-- */
+
     file_system = -1;
 
     /* resolve file system type from file name; this is a collective call */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/include/mpid_nem_nets.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/include/mpid_nem_nets.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/include/mpid_nem_nets.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -21,6 +21,8 @@
 typedef int (* MPID_nem_net_module_vc_init_t)(MPIDI_VC_t *vc);
 typedef int (* MPID_nem_net_module_vc_destroy_t)(MPIDI_VC_t *vc);
 typedef int (* MPID_nem_net_module_vc_terminate_t)(MPIDI_VC_t *vc);
+typedef int (* MPID_nem_net_module_anysource_iprobe_t)(int tag, MPID_Comm *comm, int context_offset, int *flag,
+                                                       MPI_Status *status);
 
 typedef void (* MPID_nem_net_module_vc_dbg_print_sendq_t)(FILE *stream, MPIDI_VC_t *vc);
 
@@ -39,6 +41,7 @@
     MPID_nem_net_module_vc_init_t vc_init;
     MPID_nem_net_module_vc_destroy_t vc_destroy;
     MPID_nem_net_module_vc_terminate_t vc_terminate;
+    MPID_nem_net_module_anysource_iprobe_t anysource_iprobe;
 } MPID_nem_netmod_funcs_t;
 
 extern MPID_nem_net_module_vc_dbg_print_sendq_t  MPID_nem_net_module_vc_dbg_print_sendq;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/elan/elan_init.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/elan/elan_init.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/elan/elan_init.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -21,7 +21,8 @@
     MPID_nem_elan_connect_to_root,
     MPID_nem_elan_vc_init,
     MPID_nem_elan_vc_destroy,
-    MPID_nem_elan_vc_terminate
+    MPID_nem_elan_vc_terminate,
+    NULL /* anysource iprobe */
 };
 
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/gm/gm_init.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/gm/gm_init.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/gm/gm_init.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -15,7 +15,8 @@
     MPID_nem_gm_connect_to_root,
     MPID_nem_gm_vc_init,
     MPID_nem_gm_vc_destroy,
-    MPID_nem_gm_vc_terminate
+    MPID_nem_gm_vc_terminate,
+    NULL /* anysource iprobe */
 };
 
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_alloc.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_alloc.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_alloc.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -30,7 +30,8 @@
 	curr_req->next = next_req;
 	curr_req = next_req;
     }
-
+    curr_req->next = NULL;
+   
  fn_exit:
     return mpi_errno;
  fn_fail: ATTRIBUTE((unused))
@@ -97,6 +98,7 @@
 	   curr_req->next = next_req;
 	   curr_req = next_req;
        }       
+      curr_req->next = NULL;
    }
 
  fn_exit:

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_impl.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_impl.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_impl.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -6,6 +6,11 @@
 
 #ifndef MX_MODULE_IMPL_H
 #define MX_MODULE_IMPL_H
+#ifdef USE_PMI2_API
+#include "pmi2.h"
+#else
+#include "pmi.h"
+#endif
 #include <myriexpress.h>
 #include "mx_extensions.h"
 #include "mpid_nem_impl.h"
@@ -42,6 +47,8 @@
 int MPID_nem_mx_probe(MPIDI_VC_t *vc,  int source, int tag, MPID_Comm *comm, int context_offset, MPI_Status *status);
 int MPID_nem_mx_iprobe(MPIDI_VC_t *vc,  int source, int tag, MPID_Comm *comm, int context_offset, int *flag, MPI_Status *status);
 
+int MPID_nem_mx_anysource_iprobe(int tag, MPID_Comm *comm, int context_offset, int *flag, MPI_Status *status);
+
 /* Callback routine for unex msgs in MX */
 mx_unexp_handler_action_t MPID_nem_mx_get_adi_msg(void *context,mx_endpoint_addr_t source,
 						  uint64_t match_info,uint32_t length,void *data);
@@ -175,12 +182,12 @@
         ((_match) |= (((uint64_t)((_tag)&(NEM_MX_MAX_TAG))) << SHIFT_TAG)); \
 }while(0)
 #define NEM_MX_SET_SRC(_match, _src) do {                      \
-        MPIU_Assert(_src >= 0)&&(_src<=(NEM_MX_MAX_RANK)));    \
+        MPIU_Assert((_src >= 0)&&(_src<=(NEM_MX_MAX_RANK)));   \
         ((_match) |= (((uint64_t)(_src)) << SHIFT_RANK));      \
 }while(0)
-#define NEM_MX_SET_CTXT(_match, _ctxt) do {                    \
-        MPIU_Assert(_ctxt >= 0)&&(_ctxt<=(NEM_MX_MAX_CTXT)));  \
-       ((_match) |= (((uint64_t)(_ctxt)) << SHIFT_CTXT));      \
+#define NEM_MX_SET_CTXT(_match, _ctxt) do {                     \
+        MPIU_Assert((_ctxt >= 0)&&(_ctxt<=(NEM_MX_MAX_CTXT)));  \
+        ((_match) |= (((uint64_t)(_ctxt)) << SHIFT_CTXT));      \
 }while(0)
 #define NEM_MX_SET_PGRANK(_match, _pg_rank)  do {               \
 	((_match) |= (((uint64_t)(_pg_rank)) << SHIFT_PGRANK));	\

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_init.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_init.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_init.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -15,7 +15,8 @@
     MPID_nem_mx_connect_to_root,
     MPID_nem_mx_vc_init,
     MPID_nem_mx_vc_destroy,
-    MPID_nem_mx_vc_terminate
+    MPID_nem_mx_vc_terminate,
+    MPID_nem_mx_anysource_iprobe
 };
 
 static MPIDI_Comm_ops_t comm_ops = {
@@ -240,15 +241,23 @@
    VC_FIELD(vc, remote_connected) = 0;
 #else
    {
-       char business_card[MPID_NEM_MAX_NETMOD_STRING_LEN];
-       int ret;
-       
-       mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card, MPID_NEM_MAX_NETMOD_STRING_LEN, vc->pg);
+       char *business_card;
+       int   val_max_sz;
+       int   ret;
+#ifdef USE_PMI2_API
+       val_max_sz = PMI2_MAX_VALLEN;
+#else
+       mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
+#endif 
+       business_card = (char *)MPIU_Malloc(val_max_sz); 
+       mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz, vc->pg);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        
        mpi_errno = MPID_nem_mx_get_from_bc (business_card, &VC_FIELD(vc, remote_endpoint_id), &VC_FIELD(vc, remote_nic_id));
        if (mpi_errno)    MPIU_ERR_POP (mpi_errno);
- 
+
+       MPIU_Free(business_card);
+       
        ret = mx_connect(MPID_nem_mx_local_endpoint,VC_FIELD(vc, remote_nic_id),VC_FIELD(vc, remote_endpoint_id),
 			MPID_NEM_MX_FILTER,MX_INFINITE,&(VC_FIELD(vc, remote_endpoint_addr)));
        MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_connect", "**mx_connect %s", mx_strerror (ret));

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_poll.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_poll.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_poll.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -23,10 +23,10 @@
     s->mx_req_ptr = (_mx_req);						       \
     HASH_ADD(hh1, mpid_nem_mx_asreqs, mpid_req_ptr, sizeof(MPID_Request*), s); \
 }while(0)
-#define MPID_NEM_MX_GET_REQ_FROM_HASH(_mpi_req_ptr,_mx_req) do{		                                 \
-    mpid_nem_mx_hash_t *s;						                                 \
-    HASH_FIND(hh1, mpid_nem_mx_asreqs, &(_mpi_req_ptr), sizeof(MPID_Request*), s);                       \
-    if(s){HASH_DELETE(hh1, mpid_nem_mx_asreqs, s); (_mx_req) = s->mx_req_ptr; } else {(_mx_req) = NULL;} \
+#define MPID_NEM_MX_GET_REQ_FROM_HASH(_mpi_req_ptr,_mx_req) do{		                                              \
+    mpid_nem_mx_hash_t *s;						                                              \
+    HASH_FIND(hh1, mpid_nem_mx_asreqs, &(_mpi_req_ptr), sizeof(MPID_Request*), s);                                    \
+    if(s){HASH_DELETE(hh1, mpid_nem_mx_asreqs, s); (_mx_req) = s->mx_req_ptr; MPIU_Free(s);} else {(_mx_req) = NULL;} \
 }while(0)
 
 static mpid_nem_mx_hash_t *mpid_nem_mx_connreqs ATTRIBUTE((unused, used))= NULL; 
@@ -328,8 +328,10 @@
      }
      else if (kind == MPID_REQUEST_RECV)	       
      {
-       MPID_nem_mx_internal_req_t *adi_req = &(myreq->nem_mx_req);
+       MPID_nem_mx_internal_req_t *adi_req = &(myreq->nem_mx_req);	
        MPIU_Assert(status.code != MX_STATUS_TRUNCATED);	       	
+       if(adi_req->vc == NULL)
+	  mx_get_endpoint_addr_context(status.source,(void **)(&(adi_req->vc))); 
        if (status.msg_length <= sizeof(MPIDI_CH3_PktGeneric_t))
        {
 	 MPID_nem_handle_pkt(adi_req->vc,(char *)&(adi_req->pending_pkt),(MPIDI_msg_sz_t)(status.msg_length));
@@ -432,6 +434,8 @@
       {
       	 MPID_nem_mx_internal_req_t *adi_req = &(myreq->nem_mx_req);
 	 MPIU_Assert(status.code != MX_STATUS_TRUNCATED);	       	
+	 if(adi_req->vc == NULL)
+	   mx_get_endpoint_addr_context(status.source,(void **)(&(adi_req->vc))); 
 	 if (status.msg_length <= sizeof(MPIDI_CH3_PktGeneric_t))
 	 {
 	   MPID_nem_handle_pkt(adi_req->vc,(char *)&(adi_req->pending_pkt),(MPIDI_msg_sz_t)(status.msg_length));
@@ -552,8 +556,23 @@
   MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb);
   /*fprintf(stdout," ===> userbuf_size is %i, msg_length is %i, xfer_length is %i\n",userbuf_sz,status.msg_length,status.xfer_length); */
   
-  if (status.msg_length <=  userbuf_sz) {
-    data_sz = req->dev.recv_data_sz;
+  if (status.msg_length <=  userbuf_sz) 
+  {
+     data_sz = status.xfer_length;
+     /* the sent message was truncated */
+     if (status.msg_length != status.xfer_length )
+     {
+	MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
+					    "message truncated on receiver side, real_msg_sz=" 
+					    MPIDI_MSG_SZ_FMT ", expected_msg_sz="
+					    MPIDI_MSG_SZ_FMT,
+					    status.xfer_length, status.msg_length));
+	req->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, 
+						     MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE,
+						     "**truncate", "**truncate %d %d %d %d", 
+						     req->status.MPI_SOURCE, req->status.MPI_TAG, 
+						     req->dev.recv_data_sz, userbuf_sz );
+     }
   }
   else
   {

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_probe.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_probe.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_probe.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -22,17 +22,6 @@
     uint32_t result;
 
     NEM_MX_DIRECT_MATCH(match_info,0,source,comm->context_id + context_offset);
-    /*
-    NEM_MX_SET_CTXT(match_info,comm->context_id + context_offset);
-    if( source  == MPI_ANY_SOURCE)
-    {
-	NEM_MX_SET_ANYSRC(match_info);
-	NEM_MX_SET_ANYSRC(match_mask);	
-    }
-    else
-      NEM_MX_SET_SRC(match_info,source);	
-    */
-
     if (tag == MPI_ANY_TAG)
     {
 	NEM_MX_SET_ANYTAG(match_info);
@@ -71,8 +60,6 @@
     mx_status_t mx_status;
     uint32_t result;
 
-    NEM_MX_DIRECT_MATCH(match_info,0,source,comm->context_id + context_offset);
-    /*
     NEM_MX_SET_CTXT(match_info,comm->context_id + context_offset);
     if( source  == MPI_ANY_SOURCE)
     {
@@ -81,8 +68,6 @@
     }
     else
 	NEM_MX_SET_SRC(match_info,source);	
-    */
-
     if (tag == MPI_ANY_TAG)
     {
 	NEM_MX_SET_ANYTAG(match_info);
@@ -90,7 +75,6 @@
     }
     else
         NEM_MX_SET_TAG(match_info,tag);
-
     
     ret = mx_iprobe(MPID_nem_mx_local_endpoint,match_info,match_mask,&mx_status,&result);
     MPIU_Assert(ret == MX_SUCCESS);
@@ -114,3 +98,11 @@
 
 
 
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_mx_anysource_iprobe
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_mx_anysource_iprobe(int tag, MPID_Comm *comm, int context_offset, int *flag, MPI_Status *status)
+{
+    return MPID_nem_mx_iprobe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, status);
+}

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_send.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_send.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/mx/mx_send.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -15,7 +15,7 @@
 {
     int mpi_errno = MPI_SUCCESS;
     mx_request_t  mx_request; 
-    mx_segment_t  mx_iov[3];
+    mx_segment_t  mx_iov[2];
     uint32_t      num_seg = 1;
     mx_return_t   ret;
     uint64_t      match_info = 0;        
@@ -127,16 +127,13 @@
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPID_nem_mx_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz)
 {
-    mx_segment_t  mx_iov[MX_MAX_SEGMENTS];
-    uint32_t      num_seg = 1;
-    int           mpi_errno = MPI_SUCCESS;
-    mx_request_t  mx_request;
-    mx_return_t   ret;
-    uint64_t      match_info;
-    MPIDI_msg_sz_t data_sz;
-    int            dt_contig;
-    MPI_Aint       dt_true_lb;
-    MPID_Datatype *dt_ptr;
+    mx_segment_t   mx_iov[2];
+    uint32_t       num_seg = 1;
+    int            mpi_errno = MPI_SUCCESS;
+    mx_request_t   mx_request;
+    mx_return_t    ret;
+    uint64_t       match_info;
+    MPIDI_msg_sz_t last;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MX_SENDNONCONTIGMSG);    
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MX_SENDNONCONTIGMSG);    
@@ -157,6 +154,19 @@
     mx_iov[0].segment_length  = sizeof(MPIDI_CH3_PktGeneric_t);
     num_seg = 1;
 
+    MPIU_Assert(sreq->dev.segment_first == 0);
+    last = sreq->dev.segment_size;
+    if (last > 0)
+    {
+        sreq->dev.tmpbuf = MPIU_Malloc((size_t)sreq->dev.segment_size);
+        MPID_Segment_pack(sreq->dev.segment_ptr,sreq->dev.segment_first, &last,(char *)(sreq->dev.tmpbuf));
+        MPIU_Assert(last == sreq->dev.segment_size);
+        mx_iov[1].segment_ptr = (char *)(sreq->dev.tmpbuf);
+        mx_iov[1].segment_length = (uint32_t)last;
+        num_seg++;
+    }
+   
+    /*
     MPIDI_Datatype_get_info(sreq->dev.user_count,sreq->dev.datatype, dt_contig, data_sz, dt_ptr,dt_true_lb);
     if(data_sz)
     {
@@ -183,6 +193,7 @@
 	    num_seg++;
 	}
     }
+   */
    
     MPIU_Assert(num_seg <= MX_MAX_SEGMENTS);    
     ret = mx_isend(MPID_nem_mx_local_endpoint,mx_iov,num_seg,VC_FIELD(vc,remote_endpoint_addr),match_info,(void *)sreq,&mx_request);

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_ep_util.cpp
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_ep_util.cpp	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_ep_util.cpp	2010-11-06 15:02:44 UTC (rev 7418)
@@ -44,6 +44,7 @@
     MPIU_ExInitOverlapped(&((*pconn_hnd)->recv_ov), NULL, NULL);
     MPIU_ExInitOverlapped(&((*pconn_hnd)->send_ov), NULL, NULL);
 
+	(*pconn_hnd)->npending_ops = 0;
     (*pconn_hnd)->zcp_in_progress = 0;
 
     /* Create an endpoint - listen conns don't need an endpoint */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_impl.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_impl.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_impl.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -160,9 +160,14 @@
      * in a credit packet
      */
     int recv_credits;
+	/* Currently tracking only pending sends...
+	 * FIXME: Can we get this info from send_credits ?
+	 */
+	int npending_ops;
     /* Is a Flow control pkt pending ? */
     int fc_pkt_pending;
-    /* FIXME: Make sure that we only have 1 pending RDMA read */
+
+	/* FIXME: Make sure that we only have 1 pending RDMA read */
     /* FIXME: Move rdma fields to another struct */
     /* Once we finish invalidating a MW - use these credits as send_credits */
 
@@ -184,7 +189,9 @@
 typedef struct MPID_Nem_nd_block_op_hnd_{
     /* For EX blocking ops */
     MPIU_EXOVERLAPPED ex_ov;
+	MPID_Nem_nd_conn_hnd_t conn_hnd;
 } *MPID_Nem_nd_block_op_hnd_t;
+#define MPID_NEM_ND_BLOCK_OP_HND_INVALID NULL
 #define MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(hnd) (MPIU_EX_GET_OVERLAPPED_PTR(&(hnd->ex_ov)))
 
 #define MPID_NEM_ND_CONN_HND_INVALID    NULL

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_sm.cpp
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_sm.cpp	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/nd/nd_sm.cpp	2010-11-06 15:02:44 UTC (rev 7418)
@@ -34,11 +34,13 @@
 static int __cdecl passive_quiescent_handler(MPIU_EXOVERLAPPED *recv_ov);
 static int __cdecl gen_ex_fail_handler(MPIU_EXOVERLAPPED *ov);
 static int __cdecl block_op_handler(MPIU_EXOVERLAPPED *ov);
+static int __cdecl manual_event_handler(MPIU_EXOVERLAPPED *ov);
 }
 
 static inline int MPID_Nem_nd_handle_posted_sendq_head_req(MPIDI_VC_t *vc, int *req_complete);
 static int process_pending_req(MPID_Nem_nd_conn_hnd_t conn_hnd);
 int MPID_Nem_nd_update_fc_info(MPID_Nem_nd_conn_hnd_t conn_hnd, MPID_Nem_nd_msg_t *pmsg);
+int MPID_Nem_nd_sm_block(MPID_Nem_nd_block_op_hnd_t op_hnd);
 
 #undef FUNCNAME
 #define FUNCNAME MPID_Nem_nd_sm_init
@@ -69,13 +71,15 @@
     goto fn_exit;
 }
 
+/* Initialize a blocking op that waits until all pending ops on the conn complete */
 #undef FUNCNAME
 #define FUNCNAME MPID_Nem_nd_block_op_init
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_Nem_nd_block_op_init(MPID_Nem_nd_block_op_hnd_t *phnd)
+int MPID_Nem_nd_block_op_init(MPID_Nem_nd_block_op_hnd_t *phnd, MPID_Nem_nd_conn_hnd_t conn_hnd)
 {
     int mpi_errno = MPI_SUCCESS;
+	HRESULT hr;
     OVERLAPPED *pov;
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_ND_BLOCK_OP_INIT);
     MPIU_CHKPMEM_DECL(1);
@@ -83,11 +87,23 @@
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_ND_BLOCK_OP_INIT);
 
     MPIU_Assert(phnd != NULL);
+	MPIU_Assert(MPID_NEM_ND_CONN_HND_IS_VALID(conn_hnd));
 
     MPIU_CHKPMEM_MALLOC(*phnd, MPID_Nem_nd_block_op_hnd_t, sizeof(struct MPID_Nem_nd_block_op_hnd_), mpi_errno, "Block op hnd");
 
-    MPIU_ExInitOverlapped(&((*phnd)->ex_ov), block_op_handler, block_op_handler);
+	(*phnd)->conn_hnd = conn_hnd;
 
+	if(conn_hnd->npending_ops <= 1){
+		/* Call the block op handlers only when the last pending event is over 
+		 * Note that the event handler gets called AFTER the event
+		 */
+		MPIU_ExInitOverlapped(&((*phnd)->ex_ov), block_op_handler, block_op_handler);
+	}
+	else{
+		/* Handle manual events with the event handler */
+		MPIU_ExInitOverlapped(&((*phnd)->ex_ov), manual_event_handler, manual_event_handler);
+	}
+
     pov = MPIU_EX_GET_OVERLAPPED_PTR(&((*phnd)->ex_ov));
 
     /* Executive initializes event to NULL - So create events after initializing the 
@@ -96,6 +112,12 @@
     pov->hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
     MPIU_ERR_CHKANDJUMP((pov->hEvent == NULL), mpi_errno, MPI_ERR_OTHER, "**intern");
 
+	/* Get notification for all events on CQ */
+	hr = MPID_Nem_nd_dev_hnd_g->p_cq->Notify(ND_CQ_NOTIFY_ANY, MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR((*phnd)));
+	MPIU_ERR_CHKANDJUMP2((hr != ND_PENDING) && FAILED(hr),
+            mpi_errno, MPI_ERR_OTHER, "**nd_write", "**nd_write %s %d",
+            _com_error(hr).ErrorMessage(), hr);
+
     MPIU_CHKPMEM_COMMIT();
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_ND_BLOCK_OP_INIT);
@@ -129,8 +151,45 @@
     goto fn_exit;
 }
 
+#undef FUNCNAME
+#define FUNCNAME MPID_Nem_nd_block_op_reinit
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_Nem_nd_block_op_reinit(MPID_Nem_nd_block_op_hnd_t op_hnd)
+{
+    int mpi_errno = MPI_SUCCESS;
+    OVERLAPPED *pov;
+    BOOL ret;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_ND_BLOCK_OP_REINIT);
 
-/* 
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_ND_BLOCK_OP_REINIT);
+
+	MPIU_Assert(op_hnd != MPID_NEM_ND_BLOCK_OP_HND_INVALID);
+	MPIU_Assert(MPID_NEM_ND_CONN_HND_IS_VALID(op_hnd->conn_hnd));
+
+	/* Re-initialize the ex ov */
+	if(op_hnd->conn_hnd->npending_ops <= 1){
+		ret = MPIU_ExReInitOverlapped(&(op_hnd->ex_ov), block_op_handler, block_op_handler);
+		MPIU_ERR_CHKANDJUMP((ret == FALSE), mpi_errno, MPI_ERR_OTHER, "**intern");
+	}
+	else{
+		ret = MPIU_ExReInitOverlapped(&(op_hnd->ex_ov), manual_event_handler, manual_event_handler);
+		MPIU_ERR_CHKANDJUMP((ret == FALSE), mpi_errno, MPI_ERR_OTHER, "**intern");
+	}
+
+    pov = MPIU_EX_GET_OVERLAPPED_PTR(&(op_hnd->ex_ov));
+    MPIU_Assert(pov->hEvent != NULL);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_ND_BLOCK_OP_REINIT);
+    return mpi_errno;
+ fn_fail:
+    MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "failed, mpi_errno = %d", mpi_errno);
+    goto fn_exit;
+}
+
+
+/*
 #undef FUNCNAME
 #define FUNCNAME MPID_Nem_nd_conn_block_op_reinit
 #undef FCNAME
@@ -159,6 +218,7 @@
     MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "failed, mpi_errno = %d", mpi_errno);
     goto fn_exit;
 }
+
 */
 
 #define FUNCNAME MPID_Nem_nd_conn_msg_bufs_init
@@ -184,25 +244,35 @@
     MSGBUF_FREEQ_INIT(conn_hnd);
 
     /* Register the sendq & recvq with adapter - We block while registering memory */
-    mpi_errno = MPID_Nem_nd_block_op_init(&rsbuf_op_hnd);
+    mpi_errno = MPID_Nem_nd_block_op_init(&rsbuf_op_hnd, conn_hnd);
     if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     hr = MPID_Nem_nd_dev_hnd_g->p_ad->RegisterMemory(conn_hnd->rsbuf, sizeof(conn_hnd->rsbuf), MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(rsbuf_op_hnd), &(conn_hnd->rsbuf_hmr));
     if(hr == ND_PENDING){
-        SIZE_T nb;
+		/* Manual event */
+		conn_hnd->npending_ops++;
+		mpi_errno = MPID_Nem_nd_sm_block(rsbuf_op_hnd);
+		if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+		/*
         hr = MPID_Nem_nd_dev_hnd_g->p_ad->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(rsbuf_op_hnd), &nb, TRUE);
+		*/
     }
     MPIU_ERR_CHKANDJUMP2(FAILED(hr),
         mpi_errno, MPI_ERR_OTHER, "**nd_listen", "**nd_listen %s %d",
         _com_error(hr).ErrorMessage(), hr);
 
-    mpi_errno = MPID_Nem_nd_block_op_init(&ssbuf_op_hnd);
+    mpi_errno = MPID_Nem_nd_block_op_init(&ssbuf_op_hnd, conn_hnd);
     if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     hr = MPID_Nem_nd_dev_hnd_g->p_ad->RegisterMemory(conn_hnd->ssbuf, sizeof(conn_hnd->ssbuf), MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(ssbuf_op_hnd), &(conn_hnd->ssbuf_hmr));
     if(hr == ND_PENDING){
-        SIZE_T nb;
+		/* Manual event */
+		conn_hnd->npending_ops++;
+		mpi_errno = MPID_Nem_nd_sm_block(ssbuf_op_hnd);
+		if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+		/*
         hr = MPID_Nem_nd_dev_hnd_g->p_ad->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(ssbuf_op_hnd), &nb, TRUE);
+		*/
     }
     MPIU_ERR_CHKANDJUMP2(FAILED(hr),
         mpi_errno, MPI_ERR_OTHER, "**nd_listen", "**nd_listen %s %d",
@@ -263,13 +333,18 @@
     if(is_blocking){
         MPID_Nem_nd_block_op_hnd_t op_hnd;
 
-        mpi_errno = MPID_Nem_nd_block_op_init(&op_hnd);
+        mpi_errno = MPID_Nem_nd_block_op_init(&op_hnd, lconn_hnd);
         if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
         
         hr = new_conn_hnd->p_conn->Accept(new_conn_hnd->p_ep, NULL, 0, MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(op_hnd));
         if(hr == ND_PENDING){
-            SIZE_T nb;
+			/* Manual event */
+			lconn_hnd->npending_ops++;
+			mpi_errno = MPID_Nem_nd_sm_block(op_hnd);
+			if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+			/*
             hr = new_conn_hnd->p_conn->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(op_hnd), &nb, TRUE);
+			*/
         }
         MPIU_ERR_CHKANDJUMP2(FAILED(hr),
             mpi_errno, MPI_ERR_OTHER, "**nd_accept", "**nd_accept %s %d",
@@ -497,14 +572,9 @@
     if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     if(is_blocking){
-        mpi_errno = MPID_Nem_nd_block_op_init(&op_hnd);
+		mpi_errno = MPID_Nem_nd_block_op_init(&op_hnd, conn_hnd);
         if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
-        hr = MPID_Nem_nd_dev_hnd_g->p_cq->Notify(ND_CQ_NOTIFY_ANY, MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(op_hnd));
-        MPIU_ERR_CHKANDJUMP2((hr != ND_PENDING) && FAILED(hr),
-            mpi_errno, MPI_ERR_OTHER, "**nd_write", "**nd_write %s %d",
-            _com_error(hr).ErrorMessage(), hr);
-
         MPIU_CHKPMEM_MALLOC(pmsg_result, MPID_Nem_nd_msg_result_t *, sizeof(MPID_Nem_nd_msg_result_t ), mpi_errno, "block send op result");
         INIT_MSGRESULT(pmsg_result, free_msg_result_handler, free_msg_result_handler);
         pnd_result = &(pmsg_result->result);
@@ -522,16 +592,21 @@
     MPIU_ERR_CHKANDJUMP2((hr != ND_PENDING) && FAILED(hr),
         mpi_errno, MPI_ERR_OTHER, "**nd_write", "**nd_write %s %d",
         _com_error(hr).ErrorMessage(), hr);
+
+	/* Increment the number of pending ops on conn */
+	conn_hnd->npending_ops++;
+
     if(is_blocking){
-        int nresults;
-        SIZE_T nb=0;
-        ND_RESULT *presult;
-        hr = MPID_Nem_nd_dev_hnd_g->p_cq->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(op_hnd), &nb, TRUE);
-        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "Sent %d bytes", nb);
-        MPIU_ERR_CHKANDJUMP2(FAILED(hr),
-            mpi_errno, MPI_ERR_OTHER, "**nd_write", "**nd_write %s %d",
-            _com_error(hr).ErrorMessage(), hr);
+		/* Block till all current pending ops complete */
+		mpi_errno = MPID_Nem_nd_sm_block(op_hnd);
+		if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
+		/* No pending ops */
+		MPIU_Assert(conn_hnd->npending_ops == 0);
+
+		mpi_errno = MPID_Nem_nd_block_op_finalize(&op_hnd);
+		if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
         /*
         nresults = MPID_Nem_nd_dev_hnd_g->p_cq->GetResults(&presult, 1);
         MPIU_ERR_CHKANDJUMP2(FAILED(presult->Status),
@@ -540,7 +615,7 @@
             */
         MPIU_CHKPMEM_COMMIT();
     }
-    
+
     if(was_fc_pkt){
         MPID_NEM_ND_CONN_DECR_SCREDITS(conn_hnd);
     }
@@ -964,9 +1039,9 @@
         nresults = pcq->GetResults(nd_results, 1);
 
         if(nresults == 0){
-            /* An error */
-            *pstatus = FALSE;
-            break;
+			/* No pending op in cq */
+			*pstatus = FALSE;
+			break;
         }
         /* An Event completed */
         *pstatus = TRUE;
@@ -1157,14 +1232,19 @@
     }
 
     /* Unregister user memory */
-    mpi_errno = MPID_Nem_nd_block_op_init(&dereg_op_hnd);
+    mpi_errno = MPID_Nem_nd_block_op_init(&dereg_op_hnd, conn_hnd);
     if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     hr = MPID_Nem_nd_dev_hnd_g->p_ad->DeregisterMemory(conn_hnd->zcp_recv_sge.hMr,
             MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(dereg_op_hnd));
     if(hr == ND_PENDING){
-        SIZE_T nb;
+		/* Manual event */
+		conn_hnd->npending_ops++;
+		mpi_errno = MPID_Nem_nd_sm_block(dereg_op_hnd);
+		if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+		/*
         hr = MPID_Nem_nd_dev_hnd_g->p_ad->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(dereg_op_hnd), &nb, TRUE);
+		*/
     }
     MPIU_ERR_CHKANDJUMP2(FAILED(hr),
         mpi_errno, MPI_ERR_OTHER, "**nd_read", "**nd_read %s %d",
@@ -1217,14 +1297,19 @@
     if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     /* Deregister memory */
-    mpi_errno = MPID_Nem_nd_block_op_init(&dereg_op_hnd);
+    mpi_errno = MPID_Nem_nd_block_op_init(&dereg_op_hnd, conn_hnd);
     if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     hr = MPID_Nem_nd_dev_hnd_g->p_ad->DeregisterMemory(conn_hnd->zcp_send_mr_hnd,
             MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(dereg_op_hnd));
     if(hr == ND_PENDING){
-        SIZE_T nb;
+		/* Manual event */
+		conn_hnd->npending_ops++;
+		mpi_errno = MPID_Nem_nd_sm_block(dereg_op_hnd);
+		if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+		/*
         hr = MPID_Nem_nd_dev_hnd_g->p_ad->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(dereg_op_hnd), &nb, TRUE);
+		*/
     }
     MPIU_ERR_CHKANDJUMP2(FAILED(hr),
         mpi_errno, MPI_ERR_OTHER, "**nd_write", "**nd_write %s %d",
@@ -1417,6 +1502,7 @@
     MPIU_Assert(pmsg != NULL);
 
     MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Send succeeded...");
+	conn_hnd->npending_ops--;
 
     if(conn_hnd->vc != NULL){
         /* Increment number of available send credits only when a credit packet is recvd */
@@ -1463,6 +1549,7 @@
     MPIU_Assert(pmsg != NULL);
 
     MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Send succeeded...");
+	conn_hnd->npending_ops--;
 
     if(conn_hnd->vc != NULL){
         /* Increment number of available send credits only when a credit packet is recvd */
@@ -1875,6 +1962,7 @@
                 MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "Received DATA PKT (len = %d, credits = %d)",udata_len, pmsg->hdr.credits));
 
                 /* The msg just contains the type and udata */
+				/* FIXME: We need to keep track of incomplete recv reqs on the conn */
                 mpi_errno = MPID_nem_handle_pkt(conn_hnd->vc, pmsg->buf, udata_len);
                 if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
@@ -1919,13 +2007,18 @@
                 conn_hnd->zcp_recv_sge.Length = rreq->dev.iov[rreq->dev.iov_offset].MPID_IOV_LEN;
                 conn_hnd->zcp_recv_sge.pAddr = rreq->dev.iov[rreq->dev.iov_offset].MPID_IOV_BUF;
                 /* Registering the local IOV */
-                mpi_errno = MPID_Nem_nd_block_op_init(&zcp_op_hnd);
+                mpi_errno = MPID_Nem_nd_block_op_init(&zcp_op_hnd, conn_hnd);
                 if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
                 hr = MPID_Nem_nd_dev_hnd_g->p_ad->RegisterMemory(conn_hnd->zcp_recv_sge.pAddr, conn_hnd->zcp_recv_sge.Length, MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(zcp_op_hnd), &(conn_hnd->zcp_recv_sge.hMr));
                 if(hr == ND_PENDING){
-                    SIZE_T nb;
+					/* Manual event */
+					conn_hnd->npending_ops++;
+					mpi_errno = MPID_Nem_nd_sm_block(zcp_op_hnd);
+					if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+					/*
                     hr = MPID_Nem_nd_dev_hnd_g->p_ad->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(zcp_op_hnd), &nb, TRUE);
+					*/
                 }
                 MPIU_ERR_CHKANDJUMP2(FAILED(hr),
                     mpi_errno, MPI_ERR_OTHER, "**nd_read", "**nd_read %s %d",
@@ -1987,13 +2080,18 @@
 
     /* FIXME: We shouldn't block here */
     /* Block and complete the connect() */
-    mpi_errno = MPID_Nem_nd_block_op_init(&op_hnd);
+    mpi_errno = MPID_Nem_nd_block_op_init(&op_hnd, conn_hnd);
     if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
 
     hr = conn_hnd->p_conn->CompleteConnect(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(op_hnd));
     if(hr == ND_PENDING){
-        SIZE_T nb;
+		/* Manual event */
+		conn_hnd->npending_ops++;
+		mpi_errno = MPID_Nem_nd_sm_block(op_hnd);
+		if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+		/*
         hr = MPID_Nem_nd_dev_hnd_g->p_ad->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(op_hnd), &nb, TRUE);
+		*/
     }
     MPIU_ERR_CHKANDJUMP2(FAILED(hr),
         mpi_errno, MPI_ERR_OTHER, "**nd_connect", "**nd_connect %s %d",
@@ -2130,11 +2228,39 @@
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_ND_SM_BLOCK_OP_HANDLER);
     hnd = CONTAINING_RECORD(ov, MPID_Nem_nd_block_op_hnd_, ex_ov);
 
+	MPIU_Assert(MPID_NEM_ND_CONN_HND_IS_VALID(hnd->conn_hnd));
+	/* Handle manual event completion */
+	hnd->conn_hnd->npending_ops--;
+
     MPID_Nem_nd_block_op_finalize(&hnd);
 
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_ND_SM_BLOCK_OP_HANDLER);
     return mpi_errno;
 }
+
+#undef FUNCNAME
+#define FUNCNAME manual_event_handler
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int __cdecl manual_event_handler(MPIU_EXOVERLAPPED *ov)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_Nem_nd_block_op_hnd_t hnd;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_ND_SM_MANUAL_EVENT_HANDLER);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_ND_SM_MANUAL_EVENT_HANDLER);
+    hnd = CONTAINING_RECORD(ov, MPID_Nem_nd_block_op_hnd_, ex_ov);
+
+	MPIU_Assert(MPID_NEM_ND_CONN_HND_IS_VALID(hnd->conn_hnd));
+	/* Handle manual event completion */
+	hnd->conn_hnd->npending_ops--;
+
+	MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "[%d] manual events pending", hnd->conn_hnd->npending_ops);
+
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_ND_SM_MANUAL_EVENT_HANDLER);
+    return mpi_errno;
+}
+
 /* The caller is responsible for freeing the pg info buffer allocated by
  * this function
  */
@@ -2369,6 +2495,67 @@
     goto fn_exit;
 }
 
+/* Note: Blocking operations are costly since we wait for all
+ * pending ops, i.e., sends - since we track only sends .
+ * FIXME: Alternate method : Keep track of nd progress completions
+ * & use the current value of pending ops in conn to determine the
+ * the number of operns to block
+ */
+#undef FUNCNAME
+#define FUNCNAME MPID_Nem_nd_sm_block
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_Nem_nd_sm_block(MPID_Nem_nd_block_op_hnd_t op_hnd)
+{
+	int mpi_errno = MPI_SUCCESS;
+	BOOL status;
+	int npending_ops = 0;
+	MPID_Nem_nd_conn_hnd_t conn_hnd;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_ND_SM_BLOCK);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_ND_SM_BLOCK);
+
+	/* We need to check conn_hnd status even if block op becomes invalid */
+	conn_hnd = op_hnd->conn_hnd;
+
+	/* Currently only blocking on pending nd ops */
+	while(conn_hnd->npending_ops > 0){
+		HRESULT hr;
+		SIZE_T nb=0;
+
+		/* Wait for an event */
+        hr = MPID_Nem_nd_dev_hnd_g->p_cq->GetOverlappedResult(MPID_NEM_ND_BLOCK_OP_GET_OVERLAPPED_PTR(op_hnd), &nb, TRUE);
+	    MPIU_ERR_CHKANDJUMP(FAILED(hr), mpi_errno, MPI_ERR_OTHER, "**intern");
+
+		/* Process the completed event */
+        status = FALSE;
+        mpi_errno = MPID_Nem_nd_process_completions(MPID_Nem_nd_dev_hnd_g->p_cq, &status);
+        if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+		if(status == FALSE){
+			/* No event on CQ - We must be blocking on a manual event */
+			status = FALSE;
+			mpi_errno = MPIU_ExProcessCompletions(MPID_Nem_nd_exset_hnd_g, &status);
+			if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+			MPIU_Assert(status == TRUE);
+		}
+
+		if(conn_hnd->npending_ops > 0){
+			/* Re-initialize block op */
+			mpi_errno = MPID_Nem_nd_block_op_reinit(op_hnd);
+			if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+		}
+	}
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_ND_SM_BLOCK);
+    return mpi_errno;
+ fn_fail:
+    MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "failed, mpi_errno = %d", mpi_errno);
+    goto fn_exit;
+}
+
 #define FUNCNAME MPID_Nem_nd_sm_finalize
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_alloc.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_alloc.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_alloc.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -30,7 +30,8 @@
 	curr_req->next = next_req;
 	curr_req = next_req;
     }
-
+   curr_req->next = NULL;
+   
  fn_exit:
     return mpi_errno;
  fn_fail: ATTRIBUTE((unused))
@@ -97,6 +98,7 @@
 	   curr_req->next = next_req;
 	   curr_req = next_req;
        }       
+      curr_req->next = NULL;
    }
 
  fn_exit:

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_impl.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_impl.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_impl.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -51,6 +51,7 @@
 /* Any source management */
 void MPID_nem_newmad_anysource_posted(MPID_Request *rreq);
 int MPID_nem_newmad_anysource_matched(MPID_Request *rreq);
+int MPID_nem_newmad_anysource_iprobe(int tag, MPID_Comm *comm, int context_offset, int *flag, MPI_Status *status);
 
 /* Callbacks for events */
 void MPID_nem_newmad_get_adi_msg(nm_sr_event_t event, const nm_sr_event_info_t*info);

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_init.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_init.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_init.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -17,7 +17,8 @@
     MPID_nem_newmad_connect_to_root,
     MPID_nem_newmad_vc_init,
     MPID_nem_newmad_vc_destroy,
-    MPID_nem_newmad_vc_terminate
+    MPID_nem_newmad_vc_terminate,
+    MPID_nem_newmad_anysource_iprobe
 };
 
 static MPIDI_Comm_ops_t comm_ops = {
@@ -73,9 +74,6 @@
     if(ret != NM_ESUCCESS) {
 	fprintf(stdout,"nm_sr_init return err = %d\n", ret);
     }
-
-   ret = nm_sr_monitor(mpid_nem_newmad_session, NM_SR_EVENT_RECV_UNEXPECTED,&MPID_nem_newmad_get_adi_msg);
-   MPIU_Assert( ret == NM_ESUCCESS);
    
  fn_exit:
     return mpi_errno;
@@ -144,7 +142,7 @@
        fprintf(stdout,"===  Req netmod area is %4i | Nmad struct size is %4i   ===\n", 
 	       MPID_NEM_REQ_NETMOD_AREA_LEN, sizeof(MPID_nem_newmad_req_area));
        fprintf(stdout,"===========================================================\n");
-       MPIU_Abort();    
+       /* MPIU_Abort(); */
    }
 
    mpid_nem_newmad_myrank = pg_rank;
@@ -276,6 +274,8 @@
    mpi_errno = MPID_nem_newmad_get_from_bc (business_card, VC_FIELD(vc, hostname), VC_FIELD(vc, url));
    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
 
+   MPIU_Free(business_card);
+   
    ret = nm_session_connect(mpid_nem_newmad_session, &(VC_FIELD(vc,p_gate)), VC_FIELD(vc, url));
    if (ret != NM_ESUCCESS) fprintf(stdout,"nm_session_connect returned ret = %d\n", ret);
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_probe.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_probe.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_probe.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -92,3 +92,13 @@
  fn_fail:  ATTRIBUTE((unused))
     goto fn_exit;
 }
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newmad_anysource_iprobe
+#undef FCNAME
+#define FCNAME MPIU_QUOTE(FUNCNAME)
+int MPID_nem_newmad_anysource_iprobe(int tag, MPID_Comm *comm, int context_offset, int *flag, MPI_Status *status)
+{   
+    return MPID_nem_newmad_iprobe(NULL, MPI_ANY_SOURCE, tag, comm, context_offset, flag, status);
+}
+

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_send.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_send.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/newmad/newmad_send.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -22,7 +22,7 @@
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWMAD_ISENDCONTIG);    
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWMAD_ISENDCONTIG);    
 
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_PktGeneric_t));
     MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newmad_iSendContig");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
 
@@ -69,7 +69,7 @@
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWMAD_ISTARTCONTIGMSG);    
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWMAD_ISTARTCONTIGMSG);    
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_PktGeneric_t));
     MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newmad_iSendContig");
     MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
 
@@ -119,34 +119,55 @@
 {
     int            mpi_errno = MPI_SUCCESS;
     nm_tag_t       match_info = 0;
+    struct iovec   newmad_iov[2];
+    int            num_iov = 1;
+    MPIDI_msg_sz_t last;
+
+    /*
     struct iovec  *newmad_iov = (struct iovec *)MPIU_Malloc(NMAD_IOV_MAX_DEPTH*sizeof(struct iovec));
-    int            num_iov = 1;
     MPIDI_msg_sz_t data_sz;
     int            dt_contig;
     MPI_Aint       dt_true_lb;
     MPID_Datatype *dt_ptr;
-    
+    */
+
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWMAD_SENDNONCONTIGMSG);    
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWMAD_SENDNONCONTIGMSG);    
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));   
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_PktGeneric_t));   
     MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "MPID_nem_newmad_iSendNoncontig");    
+    
+    NEM_NMAD_ADI_MATCH(match_info);
 
+    MPIU_Memcpy(&(sreq->dev.pending_pkt),(char *)header,sizeof(MPIDI_CH3_PktGeneric_t));
+    newmad_iov[0].iov_base = (char *)&(sreq->dev.pending_pkt);
+    newmad_iov[0].iov_len  = sizeof(MPIDI_CH3_PktGeneric_t);
+
+    MPIU_Assert(sreq->dev.segment_first == 0);
+    last = sreq->dev.segment_size;
+    if (last > 0)
+    {
+	sreq->dev.tmpbuf = MPIU_Malloc((size_t)sreq->dev.segment_size);
+	MPID_Segment_pack(sreq->dev.segment_ptr,sreq->dev.segment_first, &last,(char *)(sreq->dev.tmpbuf));
+	MPIU_Assert(last == sreq->dev.segment_size);
+	newmad_iov[1].iov_base = (char *)(sreq->dev.tmpbuf);
+	newmad_iov[1].iov_len = (uint32_t)last;
+	num_iov++;
+    }
+
+    REQ_FIELD(sreq,iov) = newmad_iov;        
+
+    /*
     MPIDI_Datatype_get_info(sreq->dev.user_count,sreq->dev.datatype, dt_contig, data_sz, dt_ptr,dt_true_lb);
-
     if(data_sz)
     {
 	struct iovec *mad_iov_ptr = &(newmad_iov[0]);
 	MPID_nem_newmad_process_sdtype(&sreq,sreq->dev.datatype,dt_ptr,sreq->dev.user_buf,
 				       sreq->dev.user_count,data_sz, &mad_iov_ptr,&num_iov,1);
     }
-    NEM_NMAD_ADI_MATCH(match_info);
 #ifdef DEBUG
     fprintf(stdout,"SendNonContig ========> Sending ADI msg  for req %p (match is %lx) \n",sreq,match_info);
 #endif
-    MPIU_Memcpy(&(sreq->dev.pending_pkt),(char *)header,sizeof(MPIDI_CH3_PktGeneric_t));
-    newmad_iov[0].iov_base = (char *)&(sreq->dev.pending_pkt);
-    newmad_iov[0].iov_len  = sizeof(MPIDI_CH3_PktGeneric_t);
-    REQ_FIELD(sreq,iov) = newmad_iov;        
+    */
 
     nm_sr_isend_iov_with_ref(mpid_nem_newmad_session, VC_FIELD(vc, p_gate), match_info, 
 			     newmad_iov, num_iov, &(REQ_FIELD(sreq,newmad_req)),(void*)sreq);    
@@ -175,7 +196,6 @@
     int            dt_contig;
     MPIDI_msg_sz_t data_sz;
     MPI_Aint       dt_true_lb;
-    int            index;
 
     MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWMAD_DIRECTSEND);    
     MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWMAD_DIRECTSEND);

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/none/none.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/none/none.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/none/none.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -63,5 +63,6 @@
     nm_connect_to_root,
     nm_vc_init,
     nm_vc_destroy,
-    nm_vc_terminate
+    nm_vc_terminate,
+    NULL /* anysource iprobe */
 };

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/psm/psm_init.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/psm/psm_init.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/psm/psm_init.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -18,7 +18,8 @@
     MPID_nem_psm_connect_to_root,
     MPID_nem_psm_vc_init,
     MPID_nem_psm_vc_destroy,
-    MPID_nem_psm_vc_terminate
+    MPID_nem_psm_vc_terminate,
+    NULL /* anysource iprobe */
 };
 
 #define MPIDI_CH3I_ENDPOINT_KEY "endpoint_id"

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/socksm.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/socksm.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/socksm.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -491,10 +491,7 @@
     }
     
     CHECK_EINTR (offset, writev(sc->fd, iov, iov_cnt));
-    if (offset == -1 && errno != EAGAIN) {
-        MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
-        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
-    }
+    MPIU_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
     MPIU_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
 /*     FIXME log appropriate error */
 /*     FIXME-Z1  socket is just connected and we are sending a few bytes. So, there should not */
@@ -543,10 +540,7 @@
     buf_size = sizeof(hdr) + sizeof(port_info);
     
     CHECK_EINTR (offset, writev(sc->fd, iov, iov_cnt));
-    if (offset == -1 && errno != EAGAIN) {
-        MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
-        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
-    }
+    MPIU_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
     MPIU_ERR_CHKANDJUMP1(offset != buf_size, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
 /*     FIXME log appropriate error */
 /*     FIXME-Z1  socket is just connected and we are sending a few bytes. So, there should not */
@@ -591,7 +585,6 @@
         goto fn_exit;
     }
     if (nread == -1 && errno != EAGAIN) {
-        MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
         MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
     }
     MPIU_ERR_CHKANDJUMP1(nread != hdr_len, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));  /* FIXME-Z1 */
@@ -611,7 +604,6 @@
 	} 
 	CHECK_EINTR (nread, readv(sc->fd, iov, iov_cnt));
         if (nread == -1 && errno != EAGAIN) {
-            MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
             MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
         }
 	MPIU_ERR_CHKANDJUMP1(nread != hdr.datalen, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno)); /* FIXME-Z1 */
@@ -674,7 +666,6 @@
 
         CHECK_EINTR (nread, readv(sc->fd, iov, iov_cnt));
         if (nread == -1 && errno != EAGAIN) {
-            MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
             MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
         }
         MPIU_ERR_CHKANDJUMP1(nread != hdr.datalen, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno)); /* FIXME-Z1 */
@@ -723,10 +714,7 @@
     pkt.datalen = 0;
 
     CHECK_EINTR (offset, write(fd, &pkt, pkt_len));
-    if (offset == -1 && errno != EAGAIN) {
-        MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
-        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
-    }
+    MPIU_ERR_CHKANDJUMP1(offset == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno));
     MPIU_ERR_CHKANDJUMP1(offset != pkt_len, mpi_errno, MPI_ERR_OTHER, "**write", "**write %s", MPIU_Strerror(errno)); /* FIXME-Z1 */
  fn_exit:
     return mpi_errno;
@@ -754,10 +742,7 @@
     MPIDI_FUNC_ENTER(MPID_STATE_RECV_CMD_PKT);
 
     CHECK_EINTR (nread, read(fd, &pkt, pkt_len));
-    if (nread == -1 && errno != EAGAIN) {
-        MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
-        MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
-    }
+    MPIU_ERR_CHKANDJUMP1(nread == -1 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read", "**read %s", MPIU_Strerror(errno));
     MPIU_ERR_CHKANDJUMP2(nread != pkt_len, mpi_errno, MPI_ERR_OTHER, "**read", "**read %d %s", nread, MPIU_Strerror(errno)); /* FIXME-Z1 */
     MPIU_Assert(pkt.datalen == 0);
     MPIU_Assert(pkt.pkt_type == MPIDI_NEM_TCP_SOCKSM_PKT_ID_ACK ||
@@ -871,13 +856,8 @@
         sock_addr = &(vc_tcp->sock_id);
 
         CHECK_EINTR(sc->fd, socket(AF_INET, SOCK_STREAM, 0));
-        if (sc->fd == -1) {
-            if (errno == ENOBUFS || errno == ENOMEM) {
-                MPIDU_Ftb_publish(MPIDU_FTB_EV_RESOURCES, "socket");
-            }
-            MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
-            MPIU_ERR_SETANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIU_Strerror(errno), errno);
-        }
+        MPIU_ERR_CHKANDJUMP2(sc->fd == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", MPIU_Strerror(errno), errno);
+
         plfd->fd = sc->fd;
 	MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "sc->fd=%d, plfd->events=%d, plfd->revents=%d, vc=%p, sc=%p", sc->fd, plfd->events, plfd->revents, vc, sc));
         mpi_errno = MPID_nem_tcp_set_sockopts(sc->fd);
@@ -886,10 +866,7 @@
         MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "connecting to 0x%08X:%d", sock_addr->sin_addr.s_addr, sock_addr->sin_port));
         rc = connect(sc->fd, (SA*)sock_addr, sizeof(*sock_addr));
         /* connect should not be called with CHECK_EINTR macro */
-        if (rc < 0 && errno != EINPROGRESS) {
-            MPIDU_FTB_COMMERR(rc == ENETUNREACH ? MPIDU_FTB_EV_UNREACHABLE : MPIDU_FTB_EV_COMMUNICATION, vc);
-            MPIU_ERR_SETANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**sock_connect", "**sock_connect %d %s", errno, MPIU_Strerror(errno));
-        }
+        MPIU_ERR_CHKANDJUMP2(rc < 0 && errno != EINPROGRESS, mpi_errno, MPI_ERR_OTHER, "**sock_connect", "**sock_connect %d %s", errno, MPIU_Strerror(errno));
         
         if (rc == 0) {
             CHANGE_STATE(sc, CONN_STATE_TC_C_CNTD);
@@ -1022,13 +999,8 @@
         goto fn_exit;
 
     CHECK_EINTR(rc, close(sc->fd));
-    if (rc == -1 && errno != EAGAIN && errno != EBADF) {
-        if (sc_vc)
-            MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, sc_vc);
-        else
-            MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
+    if (rc == -1 && errno != EAGAIN && errno != EBADF)
         MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**close", "**close %s", MPIU_Strerror(errno));
-    }
 
     mpi_errno = cleanup_and_free_sc_plfd(sc);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -1611,7 +1583,6 @@
             if (bytes_recvd == -1 && errno == EAGAIN) /* handle this fast */
                 goto fn_exit;
             
-            MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, sc_vc);
             if (bytes_recvd == 0) {
                 MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
             } else {
@@ -1673,7 +1644,7 @@
 fn_fail: /* comm related failures jump here */
     {
         int cleanup_errno = MPI_SUCCESS;
-        MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, sc_vc);
+
         cleanup_errno = MPID_nem_tcp_cleanup_on_error(sc_vc); /* QUIESCENT */
         if (cleanup_errno) {
             MPIU_ERR_SET(cleanup_errno, MPI_ERR_OTHER, "**tcp_cleanup_fail");
@@ -1822,7 +1793,6 @@
 
     CHECK_EINTR(n, poll(MPID_nem_tcp_plfd_tbl, num_polled, 0));
     if (n == -1) {
-        MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
         MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**poll", "**poll %s", MPIU_Strerror(errno));
     }
     /* MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "some sc fd poll event")); */
@@ -1852,7 +1822,6 @@
 #ifdef HAVE_ERROR_CHECKING
                     int pg_rank = it_sc->vc->pg_rank; /* vc goes away on cleanup */
 #endif
-                    MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, it_sc->vc);
                     cleanup_errno = MPID_nem_tcp_cleanup_on_error(it_sc->vc);
                     if (cleanup_errno) {
                         MPIU_ERR_SET(cleanup_errno, MPI_ERR_OTHER, "**tcp_cleanup_fail");
@@ -1860,7 +1829,6 @@
                     }
                     MPIU_ERR_SET2(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d %s", pg_rank, err_str);
                 } else {
-                    MPIDU_Ftb_publish(MPIDU_FTB_EV_COMMUNICATION, "");
                     cleanup_errno = close_cleanup_and_free_sc_plfd(it_sc);
                     if (cleanup_errno) {
                         MPIU_ERR_SET(cleanup_errno, MPI_ERR_OTHER, "**tcp_cleanup_fail");
@@ -1937,8 +1905,7 @@
                 continue;
             else if (errno == EWOULDBLOCK)
                 break; /*  no connection in the listen queue. get out of here.(N1) */
-            if (errno == ENOBUFS || errno == ENOMEM)
-                MPIDU_Ftb_publish(MPIDU_FTB_EV_RESOURCES, "sock_accept");
+
             MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**sock_accept", "**sock_accept %s", MPIU_Strerror(errno));
         }
         else {

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_impl.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_impl.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_impl.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -187,18 +187,6 @@
 #define MPIDI_CH3I_HOST_DESCRIPTION_KEY "description"
 #define MPIDI_CH3I_IFNAME_KEY "ifname"
 
-/* convenience macro for publishing FTB communication error events */
-#define MPIDU_FTB_COMMERR(event_name, vc) do {                                                          \
-        int ret_ = -1;                                                                                  \
-        char bc_[1024] = ""; /* FIXME: How do we find the max length of a bc? */                             \
-        char ifname_[1024] = "";                                                                        \
-                                                                                                        \
-        if (vc && vc->pg)  /* pg can be null for temp VCs (dynamic processes) */                        \
-            ret_ = vc->pg->getConnInfo((vc)->pg_rank, bc_, sizeof(bc_), (vc)->pg);                      \
-        if (!ret_)                                                                                      \
-            ret_ = MPIU_Str_get_string_arg(bc_, MPIDI_CH3I_IFNAME_KEY, ifname_, sizeof(ifname_));       \
-        MPIDU_Ftb_publish((event_name), ifname_);                                                       \
-    } while(0)
 
 
 /* tcp-local packet types */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_init.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_init.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_init.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -31,7 +31,8 @@
     MPID_nem_tcp_connect_to_root,
     MPID_nem_tcp_vc_init,
     MPID_nem_tcp_vc_destroy,
-    MPID_nem_tcp_vc_terminate
+    MPID_nem_tcp_vc_terminate,
+    NULL /* anysource iprobe */
 };
 
 /* in case there are no packet types defined (e.g., they're ifdef'ed out) make sure the array is not zero length */
@@ -372,7 +373,8 @@
 
     mpi_errno = MPIDI_GetTagFromPort(business_card, &new_vc->port_name_tag);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    MPID_nem_tcp_connect(new_vc);
+    mpi_errno = MPID_nem_tcp_connect(new_vc);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT);

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_send.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_send.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/tcp/tcp_send.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -99,7 +99,7 @@
         CHECK_EINTR(offset, writev(vc_tcp->sc->fd, iov, sreq->dev.iov_count));
         if (offset == 0) {
             int cleanup_errno = MPI_SUCCESS;
-            MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
             MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
             MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
             cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -115,7 +115,7 @@
                 break;
             } else {
                 int cleanup_errno = MPI_SUCCESS;
-                MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                 MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror (errno));
                 MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                 cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -265,7 +265,7 @@
                 CHECK_EINTR(offset, writev(sc->fd, iov, 2));
                 if (offset == 0) {
                     int cleanup_errno = MPI_SUCCESS;
-                    MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
                     MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                     cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -278,7 +278,7 @@
                         offset = 0;
                     else {
                         int cleanup_errno = MPI_SUCCESS;
-                        MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                         MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror (errno));
                         MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                         cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -405,7 +405,7 @@
             CHECK_EINTR(offset, writev(sc->fd, iov, 2));
             if (offset == 0) {
                 int cleanup_errno = MPI_SUCCESS;
-                MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                 MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
                 MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                 cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -418,7 +418,7 @@
                     offset = 0;
                 else {
                     int cleanup_errno = MPI_SUCCESS;
-                    MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                     MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror (errno));
                     MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                     cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc); /* ignoring return code */
@@ -540,7 +540,7 @@
                 CHECK_EINTR(offset, writev(sc->fd, iov, 2));
                 if (offset == 0) {
                     int cleanup_errno = MPI_SUCCESS;
-                    MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
                     MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                     cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -553,7 +553,7 @@
                         offset = 0;
                     else {
                         int cleanup_errno = MPI_SUCCESS;
-                        MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                         MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror (errno));
                         MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                         cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -698,7 +698,7 @@
                 CHECK_EINTR(offset, writev(vc_tcp->sc->fd, iov, iov_n));
                 if (offset == 0) {
                     int cleanup_errno = MPI_SUCCESS;
-                    MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                     MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
                     MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                     cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);
@@ -711,7 +711,7 @@
                         offset = 0;
                     else {
                         int cleanup_errno = MPI_SUCCESS;
-                        MPIDU_FTB_COMMERR(MPIDU_FTB_EV_COMMUNICATION, vc);
+
                         MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**writev", "**writev %s", MPIU_Strerror (errno));
                         MPIU_ERR_SET1(mpi_errno, MPI_ERR_OTHER, "**comm_fail", "**comm_fail %d", vc->pg_rank);
                         cleanup_errno = MPID_nem_tcp_cleanup_on_error(vc);


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5050
/mpich2/branches/dev/ckpt2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5057-6537
/mpich2/branches/dev/ftb/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5661-5730
/mpich2/branches/dev/lapi/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5817
/mpich2/branches/dev/win_rrvm/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:6416,6428
/mpich2/branches/dev/wintcp_async_progress/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5406
   + /mpich2/branches/dev/ckpt/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5050
/mpich2/branches/dev/ckpt2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5057-6537
/mpich2/branches/dev/ftb/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5661-5730
/mpich2/branches/dev/lapi/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5817
/mpich2/branches/dev/win_rrvm/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:6416,6428
/mpich2/branches/dev/wintcp_async_progress/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:5406
/mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp/socksm.c:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_network.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_network.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/nemesis/src/mpid_nem_network.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -32,6 +32,9 @@
         /* netmod not specified, using the default */
         MPID_nem_netmod_func = MPID_nem_netmod_funcs[0];
         MPID_nem_netmod_id = 0;
+#ifdef ENABLE_COMM_OVERRIDES
+        MPIDI_Anysource_iprobe_fn = MPID_nem_netmod_func->anysource_iprobe;
+#endif
         goto fn_exit;
     }
 
@@ -41,6 +44,9 @@
         {
             MPID_nem_netmod_func = MPID_nem_netmod_funcs[i];
             MPID_nem_netmod_id = i;
+#ifdef ENABLE_COMM_OVERRIDES
+            MPIDI_Anysource_iprobe_fn = MPID_nem_netmod_func->anysource_iprobe;
+#endif
             goto fn_exit;
         }
     }

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_finalize.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -16,6 +16,10 @@
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_FINALIZE);
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_FINALIZE);
+
+    mpi_errno = MPIDI_CH3I_Progress_finalize();
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    
     mpi_errno = MPID_nem_finalize();
     if (mpi_errno) MPIU_ERR_POP (mpi_errno);
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_progress.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_progress.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/channels/nemesis/src/ch3_progress.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -798,10 +798,17 @@
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
 int MPIDI_CH3I_Progress_finalize(void)
 {
+    qn_ent_t *ent;
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
 
+    while(qn_head) {
+        ent = qn_head->next;
+        MPIU_Free(qn_head);
+        qn_head = ent;
+    }
+
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_PROGRESS_FINALIZE);
     return MPI_SUCCESS;
 }

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidftb.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidftb.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidftb.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -8,20 +8,26 @@
 #define MPICH2_FTB_H_INCLUDED
 
 /* FTB events we can throw */
-#define MPIDU_FTB_EV_OTHER         "OTHER"
-#define MPIDU_FTB_EV_RESOURCES     "RESOURCES"
-#define MPIDU_FTB_EV_UNREACHABLE   "UNREACHABLE"
-#define MPIDU_FTB_EV_COMMUNICATION "COMUNICATION"
-#define MPIDU_FTB_EV_ABORT         "ABORT"
+#define MPIDU_FTB_EV_OTHER         "FTB_MPICH_OTHER"
+#define MPIDU_FTB_EV_RESOURCES     "FTB_MPICH_RESOURCES"
+#define MPIDU_FTB_EV_UNREACHABLE   "FTB_MPI_PROCS_UNREACHABLE"
+#define MPIDU_FTB_EV_COMMUNICATION "FTB_MPI_PROCS_COMM_ERROR"
+#define MPIDU_FTB_EV_ABORT         "FTB_MPI_PROCS_ABORTED"
 
 #ifdef ENABLE_FTB
+struct MPIDI_VC;
+
 /* prototypes */
 int MPIDU_Ftb_init(void);
 void MPIDU_Ftb_publish(const char *event_name, const char *event_payload);
+void MPIDU_Ftb_publish_vc(const char *event_name, struct MPIDI_VC *vc);
+void MPIDU_Ftb_publish_me(const char *event_name);
 void MPIDU_Ftb_finalize(void);
 #else /* ENABLE_FTB */
 #define MPIDU_Ftb_init() (MPI_SUCCESS)
 #define MPIDU_Ftb_publish(event_name, event_payload) do {} while(0)
+#define MPIDU_Ftb_publish_vc(event_name, vc) do {} while(0)
+#define MPIDU_Ftb_publish_me(event_name) do {} while(0)
 #define MPIDU_Ftb_finalize() do {} while(0)
 #endif /* ENABLE_FTB */
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidimpl.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidimpl.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidimpl.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -684,6 +684,9 @@
 		  int *flag, MPI_Status *status);
    
 } MPIDI_Comm_ops_t;
+
+extern int (*MPIDI_Anysource_iprobe_fn)(int tag, MPID_Comm * comm, int context_offset, int *flag,
+                                        MPI_Status * status);
 #endif
 
 typedef struct MPIDI_VC
@@ -1091,10 +1094,16 @@
 #define MPIDI_RMAFNS_VERSION 1
 int MPIDI_CH3_RMAFnsInit( MPIDI_RMAFns * );
 
+/* FIXME: These are specific to the RMA code and should be in the RMA 
+   header file. */
 #define MPIDI_RMA_PUT 23
 #define MPIDI_RMA_GET 24
 #define MPIDI_RMA_ACCUMULATE 25
 #define MPIDI_RMA_LOCK 26
+
+/* Special case RMA operations */
+#define MPIDI_RMA_ACC_CONTIG 27
+
 #define MPIDI_RMA_DATATYPE_BASIC 50
 #define MPIDI_RMA_DATATYPE_DERIVED 51
 
@@ -1701,6 +1710,8 @@
 			      MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_Accumulate( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 
 				     MPIDI_msg_sz_t *, MPID_Request ** );
+int MPIDI_CH3_PktHandler_Accumulate_Immed( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 
+				     MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_Get( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 
 			      MPIDI_msg_sz_t *, MPID_Request ** );
 int MPIDI_CH3_PktHandler_GetResp( MPIDI_VC_t *, MPIDI_CH3_Pkt_t *, 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpkt.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpkt.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpkt.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -10,8 +10,12 @@
 /* Enable the use of data within the message packet for small messages */
 #define USE_EAGER_SHORT
 #define MPIDI_EAGER_SHORT_INTS 4
+/* FIXME: This appears to assume that sizeof(int) == 4 (or at least >= 4) */
 #define MPIDI_EAGER_SHORT_SIZE 16
 
+/* This is the number of ints that can be carried within an RMA packet */
+#define MPIDI_RMA_IMMED_INTS 1
+
 /*
  * MPIDI_CH3_Pkt_type_t
  *
@@ -44,6 +48,8 @@
     MPIDI_CH3_PKT_LOCK_GET_UNLOCK, /* optimization for single gets */
     MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK, /* optimization for single accumulates */
                                      /* RMA Packets end here */
+    MPIDI_CH3_PKT_ACCUM_IMMED,     /* optimization for short accumulate */
+    /* FIXME: Add PUT, GET_IMMED packet types */
     MPIDI_CH3_PKT_FLOW_CNTL_UPDATE,  /* FIXME: Unused */
     MPIDI_CH3_PKT_CLOSE,
     MPIDI_CH3_PKT_END_CH3
@@ -193,6 +199,26 @@
 }
 MPIDI_CH3_Pkt_accum_t;
 
+typedef struct MPIDI_CH3_Pkt_accum_immed
+{
+    MPIDI_CH3_Pkt_type_t type;
+    void *addr;
+    int count;
+    /* FIXME: Compress datatype/op into a single word (immedate mode) */
+    MPI_Datatype datatype;
+    MPI_Op op;
+    /* FIXME: do we need these (use a regular accum packet if we do?) */
+    MPI_Win target_win_handle; /* Used in the last RMA operation in each
+                               * epoch for decrementing rma op counter in
+                               * active target rma and for unlocking window 
+                               * in passive target rma. Otherwise set to NULL*/
+    MPI_Win source_win_handle; /* Used in the last RMA operation in an
+                               * epoch in the case of passive target rma
+                               * with shared locks. Otherwise set to NULL*/
+    int data[MPIDI_RMA_IMMED_INTS];
+}
+MPIDI_CH3_Pkt_accum_immed_t;
+
 typedef struct MPIDI_CH3_Pkt_lock
 {
     MPIDI_CH3_Pkt_type_t type;
@@ -276,6 +302,7 @@
     MPIDI_CH3_Pkt_get_t get;
     MPIDI_CH3_Pkt_get_resp_t get_resp;
     MPIDI_CH3_Pkt_accum_t accum;
+    MPIDI_CH3_Pkt_accum_immed_t accum_immed;
     MPIDI_CH3_Pkt_lock_t lock;
     MPIDI_CH3_Pkt_lock_granted_t lock_granted;
     MPIDI_CH3_Pkt_pt_rma_done_t pt_rma_done;    

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpre.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpre.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidpre.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -170,7 +170,9 @@
     int *disp_units;      /* array of displacement units of all windows */\
     MPI_Win *all_win_handles;    /* array of handles to the window objects\
                                           of all processes */            \
-    struct MPIDI_RMA_ops *rma_ops_list; /* list of outstanding RMA requests */  \
+    struct MPIDI_RMA_ops *rma_ops_list_head; /* list of outstanding \
+                                                RMA requests */ \
+    struct MPIDI_RMA_ops *rma_ops_list_tail; \
     volatile int lock_granted;  /* flag to indicate whether lock has     \
                                    been granted to this process (as source) for         \
                                    passive target rma */                 \

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidrma.h
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidrma.h	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/include/mpidrma.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -28,6 +28,11 @@
 /* for keeping track of RMA ops, which will be executed at the next sync call */
 typedef struct MPIDI_RMA_ops {
     struct MPIDI_RMA_ops *next;  /* pointer to next element in list */
+    /* FIXME: It would be better to setup the packet that will be sent, at 
+       least in most cases (if, as a result of the sync/ops/sync sequence,
+       a different packet type is needed, it can be extracted from the 
+       information otherwise stored). */
+    /* FIXME: Use enum for RMA op type? */
     int type;  /* MPIDI_RMA_PUT, MPID_REQUEST_GET,
 		  MPIDI_RMA_ACCUMULATE, MPIDI_RMA_LOCK */
     void *origin_addr;
@@ -39,6 +44,10 @@
     MPI_Datatype target_datatype;
     MPI_Op op;  /* for accumulate */
     int lock_type;  /* for win_lock */
+    /* Used to complete operations */
+    struct MPID_Request *request;
+    MPIDI_RMA_dtype_info dtype_info;
+    void *dataloop;
 } MPIDI_RMA_ops;
 
 typedef struct MPIDI_PT_single_op {
@@ -59,5 +68,4 @@
     MPIDI_VC_t * vc;
     struct MPIDI_PT_single_op *pt_single_op;  /* to store info for lock-put-unlock optimization */
 } MPIDI_Win_lock_queue;
-
 #endif

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_connection.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_connection.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_connection.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -74,6 +74,7 @@
                     
  		    MPIU_DBG_MSG(CH3_DISCONNECT,TYPICAL, "Connection closed prematurely.");
 
+                    MPIDU_Ftb_publish_vc(MPIDU_FTB_EV_UNREACHABLE, vc);
                     MPIDI_CHANGE_VC_STATE(vc, MORIBUND);
 
                     break;
@@ -97,6 +98,7 @@
 
  		    MPIU_DBG_MSG_D(CH3_DISCONNECT,TYPICAL, "Connection closed prematurely during close protocol.  "
                                    "Outstanding close operations = %d", MPIDI_Outstanding_close_ops);
+                    MPIDU_Ftb_publish_vc(MPIDU_FTB_EV_UNREACHABLE, vc);
                     MPIDI_CHANGE_VC_STATE(vc, MORIBUND);
                     
 		    /* MT: this is not thread safe */

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_pkt.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_pkt.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_pkt.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -585,6 +585,8 @@
 	MPIDI_CH3_PktHandler_LockAccumUnlock;
     pktArray[MPIDI_CH3_PKT_LOCK_GET_UNLOCK] = 
 	MPIDI_CH3_PktHandler_LockGetUnlock;
+    pktArray[MPIDI_CH3_PKT_ACCUM_IMMED] = 
+	MPIDI_CH3_PktHandler_Accumulate_Immed;
     /* End of default RMA operations */
 
  fn_fail:

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_req.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_req.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_handle_recv_req.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -630,7 +630,7 @@
     if (HANDLE_GET_KIND(rreq->dev.op) == HANDLE_KIND_BUILTIN)
     {
         /* get the function by indexing into the op table */
-        uop = MPIR_Op_table[(rreq->dev.op)%16 - 1];
+        uop = MPIR_Op_table[((rreq->dev.op)&0xf) - 1];
     }
     else
     {
@@ -956,7 +956,7 @@
     if (HANDLE_GET_KIND(single_op->op) == HANDLE_KIND_BUILTIN)
     {
         /* get the function by indexing into the op table */
-        uop = MPIR_Op_table[(single_op->op)%16 - 1];
+        uop = MPIR_Op_table[((single_op->op)&0xf) - 1];
     }
     else
     {

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_port.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_port.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_port.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -335,29 +335,22 @@
     int mpi_errno=MPI_SUCCESS;
     int j, i, rank, recv_ints[3], send_ints[3], context_id;
     int remote_comm_size=0;
-    MPID_Comm *tmp_comm = NULL, *intercomm;
+    MPID_Comm *tmp_comm = NULL;
     MPIDI_VC_t *new_vc = NULL;
     int sendtag=100, recvtag=100, n_remote_pgs;
     int n_local_pgs=1, local_comm_size;
     pg_translation *local_translation = NULL, *remote_translation = NULL;
     pg_node *pg_list = NULL;
     MPIDI_PG_t **remote_pg = NULL;
+    MPIR_Context_id_t recvcontext_id = MPIR_INVALID_CONTEXT_ID;
     MPIU_CHKLMEM_DECL(3);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMM_CONNECT);
 
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_COMM_CONNECT);
 
-    /* Create the new intercommunicator here. We need to send the
-       context id to the other side. */
-    /* FIXME: If we fail to connect, someone needs to free this newcomm */
-    mpi_errno = MPIR_Comm_create(newcomm);
-    if (mpi_errno) {
-	MPIU_ERR_POP(mpi_errno);
-    }
-    mpi_errno = MPIR_Get_contextid( comm_ptr, &(*newcomm)->recvcontext_id );
+    /* Get the context ID here because we need to send it to the remote side */
+    mpi_errno = MPIR_Get_contextid( comm_ptr, &recvcontext_id );
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    /* FIXME why is this commented out? */
-    /* (*newcomm)->context_id = (*newcomm)->recvcontext_id; */
 
     rank = comm_ptr->rank;
     local_comm_size = comm_ptr->local_size;
@@ -369,7 +362,7 @@
 	mpi_errno = MPIDI_Create_inter_root_communicator_connect(
 	    port_name, &tmp_comm, &new_vc);
 	if (mpi_errno != MPI_SUCCESS) {
-	    MPIU_ERR_POP(mpi_errno);
+	    MPIU_ERR_POP_LABEL(mpi_errno, no_port);
 	}
 
 	/* Make an array to translate local ranks to process group index 
@@ -384,25 +377,27 @@
 	   group id, size and all its KVS values */
 	mpi_errno = ExtractLocalPGInfo( comm_ptr, local_translation, 
 					&pg_list, &n_local_pgs );
+        MPIU_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Can't extract local PG info.");
 
-
 	/* Send the remote root: n_local_pgs, local_comm_size,
            Recv from the remote root: n_remote_pgs, remote_comm_size,
-           context_id for newcomm */
+           recvcontext_id for newcomm */
 
         send_ints[0] = n_local_pgs;
         send_ints[1] = local_comm_size;
-        send_ints[2] = (*newcomm)->recvcontext_id;
+        send_ints[2] = recvcontext_id;
 
 	MPIU_DBG_MSG_FMT(CH3_CONNECT,VERBOSE,(MPIU_DBG_FDEST,
-		  "sending two ints, %d and %d, and receiving 3 ints", 
-                  send_ints[0], send_ints[1]));
+		  "sending 3 ints, %d, %d and %d, and receiving 3 ints", 
+                  send_ints[0], send_ints[1], send_ints[2]));
         mpi_errno = MPIC_Sendrecv(send_ints, 3, MPI_INT, 0,
                                   sendtag++, recv_ints, 3, MPI_INT,
                                   0, recvtag++, tmp_comm->handle,
                                   MPI_STATUS_IGNORE);
         if (mpi_errno != MPI_SUCCESS) {
-	    MPIU_ERR_POP(mpi_errno);
+            /* this is a no_port error because we may fail to connect
+               on the send if the port name is invalid */
+	    MPIU_ERR_POP_LABEL(mpi_errno, no_port);
 	}
     }
 
@@ -413,6 +408,9 @@
 	MPIU_ERR_POP(mpi_errno);
     }
 
+    /* check if root was unable to connect to the port */
+    MPIU_ERR_CHKANDJUMP1(recv_ints[0] == -1, mpi_errno, MPI_ERR_PORT, "**portexist", "**portexist %s", port_name);
+    
     n_remote_pgs     = recv_ints[0];
     remote_comm_size = recv_ints[1];
     context_id	     = recv_ints[2];
@@ -476,12 +474,15 @@
     }
 #endif
 
-    intercomm                 = *newcomm;
-    intercomm->context_id     = context_id;
-    intercomm->is_low_group   = 1;
+    mpi_errno = MPIR_Comm_create(newcomm);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
 
+    (*newcomm)->context_id     = context_id;
+    (*newcomm)->recvcontext_id = recvcontext_id;
+    (*newcomm)->is_low_group   = 1;
+
     mpi_errno = SetupNewIntercomm( comm_ptr, remote_comm_size, 
-				   remote_translation, remote_pg, intercomm );
+				   remote_translation, remote_pg, *newcomm );
     if (mpi_errno != MPI_SUCCESS) {
 	MPIU_ERR_POP(mpi_errno);
     }
@@ -519,7 +520,37 @@
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_COMM_CONNECT);
     return mpi_errno;
  fn_fail:
-    goto fn_exit;
+    {
+        int mpi_errno2 = MPI_SUCCESS;
+        if (new_vc) {
+            mpi_errno2 = MPIDI_CH3_VC_Destroy(new_vc);
+            if (mpi_errno2) MPIU_ERR_SET(mpi_errno2, MPI_ERR_OTHER, "**fail");
+        }
+
+        if (recvcontext_id != MPIR_INVALID_CONTEXT_ID)
+            MPIR_Free_contextid(recvcontext_id);
+        
+        if (mpi_errno2) MPIU_ERR_ADD(mpi_errno, mpi_errno2);
+
+        goto fn_exit;
+    }
+ no_port:
+    {
+        int mpi_errno2 = MPI_SUCCESS;
+
+       /* broadcast error notification to other processes */
+        MPIU_Assert(rank == root);
+        recv_ints[0] = -1;
+        recv_ints[1] = -1;
+        recv_ints[2] = -1;
+        MPIU_ERR_SET1(mpi_errno, MPI_ERR_PORT, "**portexist", "**portexist %s", port_name);
+
+        /* notify other processes to return an error */
+        MPIU_DBG_MSG(CH3_CONNECT,VERBOSE,"broadcasting 3 ints: error case");
+        mpi_errno2 = MPIR_Bcast_intra(recv_ints, 3, MPI_INT, root, comm_ptr);
+        if (mpi_errno2) MPIU_ERR_ADD(mpi_errno, mpi_errno2);
+        goto fn_fail;
+    }
 }
 
 /*

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_ops.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_ops.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_ops.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -7,6 +7,18 @@
 #include "mpidi_ch3_impl.h"
 #include "mpidrma.h"
 
+static int enableShortACC=1;
+
+MPIU_THREADSAFE_INIT_DECL(initRMAoptions);
+#ifdef USE_MPIU_INSTR
+MPIU_INSTR_DURATION_DECL(wincreate_allgather);
+MPIU_INSTR_DURATION_DECL(winfree_rs);
+MPIU_INSTR_DURATION_DECL(winfree_complete);
+MPIU_INSTR_DURATION_DECL(rmaqueue_alloc);
+extern void MPIDI_CH3_RMA_InitInstr(void);
+#endif
+extern void MPIDI_CH3_RMA_SetAccImmed( int );
+
 #define MPIDI_PASSIVE_TARGET_DONE_TAG  348297
 #define MPIDI_PASSIVE_TARGET_RMA_TAG 563924
 
@@ -18,7 +30,7 @@
 int MPIDI_Win_create(void *base, MPI_Aint size, int disp_unit, MPID_Info *info,
 		     MPID_Comm *comm_ptr, MPID_Win **win_ptr )
 {
-    int mpi_errno=MPI_SUCCESS, i, comm_size, rank;
+    int mpi_errno=MPI_SUCCESS, i, k, comm_size, rank;
     MPI_Aint *tmp_buf;
     MPID_Comm *win_comm_ptr;
     MPIU_CHKPMEM_DECL(4);
@@ -30,6 +42,26 @@
     /* FIXME: There should be no unreferenced args */
     MPIU_UNREFERENCED_ARG(info);
 
+    if(initRMAoptions) {
+	int rc;
+	MPIU_THREADSAFE_INIT_BLOCK_BEGIN(initRMAoptions);
+	/* Default is to enable the use of the immediate accumulate feature */
+	if (!MPL_env2bool( "MPICH_RMA_ACC_IMMED", &rc ))
+	    rc = 1;
+	MPIDI_CH3_RMA_SetAccImmed(rc);
+#ifdef USE_MPIU_INSTR
+    /* Define all instrumentation handle used in the CH3 RMA here*/
+	MPIU_INSTR_DURATION_INIT(wincreate_allgather,0,"WIN_CREATE:Allgather");
+	MPIU_INSTR_DURATION_INIT(winfree_rs,0,"WIN_FREE:ReduceScatterBlock");
+	MPIU_INSTR_DURATION_INIT(winfree_complete,0,"WIN_FREE:Complete");
+	MPIU_INSTR_DURATION_INIT(rmaqueue_alloc,0,"Allocate RMA Queue element");
+	MPIDI_CH3_RMA_InitInstr();
+
+#endif    
+	MPIU_THREADSAFE_INIT_CLEAR(initRMAoptions);
+	MPIU_THREADSAFE_INIT_BLOCK_END(initRMAoptions);
+    }
+
     comm_size = comm_ptr->local_size;
     rank = comm_ptr->rank;
     
@@ -46,7 +78,8 @@
     (*win_ptr)->start_group_ptr = NULL; 
     (*win_ptr)->start_assert = 0; 
     (*win_ptr)->attributes = NULL;
-    (*win_ptr)->rma_ops_list = NULL;
+    (*win_ptr)->rma_ops_list_head = NULL;
+    (*win_ptr)->rma_ops_list_tail = NULL;
     (*win_ptr)->lock_granted = 0;
     (*win_ptr)->current_lock_type = MPID_LOCK_NONE;
     (*win_ptr)->shared_lock_ref_cnt = 0;
@@ -56,8 +89,10 @@
     
     mpi_errno = MPIR_Comm_dup_impl(comm_ptr, &win_comm_ptr);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    (*win_ptr)->comm = win_comm_ptr->handle;
-    
+    (*win_ptr)->comm_ptr   = win_comm_ptr;
+    (*win_ptr)->myrank = rank;
+
+    MPIU_INSTR_DURATION_START(wincreate_allgather);
     /* allocate memory for the base addresses, disp_units, and
        completion counters of all processes */ 
     MPIU_CHKPMEM_MALLOC((*win_ptr)->base_addrs, void **,
@@ -82,20 +117,22 @@
 			mpi_errno, "tmp_buf");
     
     /* FIXME: This needs to be fixed for heterogeneous systems */
-    tmp_buf[3*rank] = MPIU_PtrToAint(base);
+    tmp_buf[3*rank]   = MPIU_PtrToAint(base);
     tmp_buf[3*rank+1] = (MPI_Aint) disp_unit;
     tmp_buf[3*rank+2] = (MPI_Aint) (*win_ptr)->handle;
     
     mpi_errno = MPIR_Allgather_impl(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
                                     tmp_buf, 3 * sizeof(MPI_Aint), MPI_BYTE,
                                     comm_ptr);
+    MPIU_INSTR_DURATION_END(wincreate_allgather);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
-    
+
+    k = 0;
     for (i=0; i<comm_size; i++)
     {
-	(*win_ptr)->base_addrs[i] = MPIU_AintToPtr(tmp_buf[3*i]);
-	(*win_ptr)->disp_units[i] = (int) tmp_buf[3*i+1];
-	(*win_ptr)->all_win_handles[i] = (MPI_Win) tmp_buf[3*i+2];
+	(*win_ptr)->base_addrs[i] = MPIU_AintToPtr(tmp_buf[k++]);
+	(*win_ptr)->disp_units[i] = (int) tmp_buf[k++];
+	(*win_ptr)->all_win_handles[i] = (MPI_Win) tmp_buf[k++];
     }
         
  fn_exit:
@@ -126,18 +163,20 @@
         
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FREE);
         
-    MPID_Comm_get_ptr( (*win_ptr)->comm, comm_ptr );
-
+    comm_ptr = (*win_ptr)->comm_ptr;
+    MPIU_INSTR_DURATION_START(winfree_rs);
     mpi_errno = MPIR_Reduce_scatter_block_impl((*win_ptr)->pt_rma_puts_accs, 
                                                &total_pt_rma_puts_accs, 1, 
                                                MPI_INT, MPI_SUM, comm_ptr);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
+    MPIU_INSTR_DURATION_END(winfree_rs);
 
     if (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)
     {
 	MPID_Progress_state progress_state;
             
 	/* poke the progress engine until the two are equal */
+	MPIU_INSTR_DURATION_START(winfree_complete);
 	MPID_Progress_start(&progress_state);
 	while (total_pt_rma_puts_accs != (*win_ptr)->my_pt_rma_puts_accs)
 	{
@@ -151,6 +190,7 @@
 	    /* --END ERROR HANDLING-- */
 	}
 	MPID_Progress_end(&progress_state);
+	MPIU_INSTR_DURATION_END(winfree_complete);
     }
 
     
@@ -187,11 +227,10 @@
 {
     int mpi_errno = MPI_SUCCESS;
     int dt_contig, rank, predefined;
-    MPIDI_RMA_ops *curr_ptr, *prev_ptr, *new_ptr;
+    MPIDI_RMA_ops *new_ptr;
     MPID_Datatype *dtp;
     MPI_Aint dt_true_lb;
     MPIDI_msg_sz_t data_sz;
-    MPID_Comm *win_comm_ptr;
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_PUT);
         
@@ -205,13 +244,7 @@
 	goto fn_exit;
     }
 
-    /* FIXME: It makes sense to save the rank (and size) of the
-       communicator in the window structure to speed up these operations,
-       or to save a pointer to the communicator structure, rather than
-       just the handle 
-    */
-    MPID_Comm_get_ptr(win_ptr->comm, win_comm_ptr);
-    rank = MPIR_Comm_rank(win_comm_ptr);
+    rank = win_ptr->myrank;
     
     /* If the put is a local operation, do it here */
     if (target_rank == rank)
@@ -223,22 +256,18 @@
     else
     {
 	/* queue it up */
-	curr_ptr = win_ptr->rma_ops_list;
-	prev_ptr = curr_ptr;
-	while (curr_ptr != NULL)
-	{
-	    prev_ptr = curr_ptr;
-	    curr_ptr = curr_ptr->next;
-	}
-
-	/* FIXME: Where does this memory get freed? */
+	/* FIXME: For short operations, should we use a (per-thread) pool? */
+	MPIU_INSTR_DURATION_START(rmaqueue_alloc);
 	MPIU_CHKPMEM_MALLOC(new_ptr, MPIDI_RMA_ops *, sizeof(MPIDI_RMA_ops), 
 			    mpi_errno, "RMA operation entry");
-	if (prev_ptr != NULL)
-	    prev_ptr->next = new_ptr;
-	else 
-	    win_ptr->rma_ops_list = new_ptr;
-	
+	MPIU_INSTR_DURATION_END(rmaqueue_alloc);
+	if (win_ptr->rma_ops_list_tail) 
+	    win_ptr->rma_ops_list_tail->next = new_ptr;
+	else
+	    win_ptr->rma_ops_list_head = new_ptr;
+	win_ptr->rma_ops_list_tail = new_ptr;
+
+	/* FIXME: For contig and very short operations, use a streamlined op */
 	new_ptr->next = NULL;  
 	new_ptr->type = MPIDI_RMA_PUT;
 	new_ptr->origin_addr = origin_addr;
@@ -290,9 +319,8 @@
     MPIDI_msg_sz_t data_sz;
     int dt_contig, rank, predefined;
     MPI_Aint dt_true_lb;
-    MPIDI_RMA_ops *curr_ptr, *prev_ptr, *new_ptr;
+    MPIDI_RMA_ops *new_ptr;
     MPID_Datatype *dtp;
-    MPID_Comm *win_comm_ptr;
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET);
         
@@ -306,10 +334,7 @@
 	goto fn_exit;
     }
 
-    /* FIXME: It makes sense to save the rank (and size) of the
-       communicator in the window structure to speed up these operations */
-    MPID_Comm_get_ptr(win_ptr->comm, win_comm_ptr);
-    rank = MPIR_Comm_rank(win_comm_ptr);
+    rank = win_ptr->myrank;
     
     /* If the get is a local operation, do it here */
     if (target_rank == rank)
@@ -323,25 +348,17 @@
     else
     {
 	/* queue it up */
-	curr_ptr = win_ptr->rma_ops_list;
-	prev_ptr = curr_ptr;
-	while (curr_ptr != NULL)
-	{
-	    prev_ptr = curr_ptr;
-	    curr_ptr = curr_ptr->next;
-	}
-	
+	MPIU_INSTR_DURATION_START(rmaqueue_alloc);
 	MPIU_CHKPMEM_MALLOC(new_ptr, MPIDI_RMA_ops *, sizeof(MPIDI_RMA_ops), 
 			    mpi_errno, "RMA operation entry");
-	if (prev_ptr != NULL)
-	{
-	    prev_ptr->next = new_ptr;
-	}
+	MPIU_INSTR_DURATION_END(rmaqueue_alloc);
+	if (win_ptr->rma_ops_list_tail) 
+	    win_ptr->rma_ops_list_tail->next = new_ptr;
 	else
-	{
-	    win_ptr->rma_ops_list = new_ptr;
-	}
+	    win_ptr->rma_ops_list_head = new_ptr;
+	win_ptr->rma_ops_list_tail = new_ptr;
             
+	/* FIXME: For contig and very short operations, use a streamlined op */
 	new_ptr->next = NULL;  
 	new_ptr->type = MPIDI_RMA_GET;
 	new_ptr->origin_addr = origin_addr;
@@ -394,9 +411,8 @@
     MPIDI_msg_sz_t data_sz;
     int dt_contig, rank, origin_predefined, target_predefined;
     MPI_Aint dt_true_lb;
-    MPIDI_RMA_ops *curr_ptr, *prev_ptr, *new_ptr;
+    MPIDI_RMA_ops *new_ptr;
     MPID_Datatype *dtp;
-    MPID_Comm *win_comm_ptr;
     MPIU_CHKLMEM_DECL(2);
     MPIU_CHKPMEM_DECL(1);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_ACCUMULATE);
@@ -410,18 +426,13 @@
     {
 	goto fn_exit;
     }
+
+    rank = win_ptr->myrank;
     
-    /* FIXME: It makes sense to save the rank (and size) of the
-       communicator in the window structure to speed up these operations,
-       or to save a pointer to the communicator structure, rather than
-       just the handle 
-    */
-    MPID_Comm_get_ptr(win_ptr->comm, win_comm_ptr);
-    rank = MPIR_Comm_rank(win_comm_ptr);
-    
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, origin_predefined);
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);
 
+    /* Do =! rank first (most likely branch?) */
     if (target_rank == rank)
     {
 	MPI_User_function *uop;
@@ -440,7 +451,7 @@
 			     "**opnotpredefined %d", op );
 	
 	/* get the function by indexing into the op table */
-	uop = MPIR_Op_table[(op)%16 - 1];
+	uop = MPIR_Op_table[((op)&0xf) - 1];
 	
 	if (origin_predefined && target_predefined)
 	{    
@@ -524,25 +535,32 @@
     else
     {
 	/* queue it up */
-	curr_ptr = win_ptr->rma_ops_list;
-	prev_ptr = curr_ptr;
-	while (curr_ptr != NULL)
-	{
-	    prev_ptr = curr_ptr;
-	    curr_ptr = curr_ptr->next;
-	}
-	
+	MPIU_INSTR_DURATION_START(rmaqueue_alloc);
 	MPIU_CHKPMEM_MALLOC(new_ptr, MPIDI_RMA_ops *, sizeof(MPIDI_RMA_ops), 
 			    mpi_errno, "RMA operation entry");
-	if (prev_ptr != NULL)
-	{
-	    prev_ptr->next = new_ptr;
-	}
+	MPIU_INSTR_DURATION_END(rmaqueue_alloc);
+	if (win_ptr->rma_ops_list_tail) 
+	    win_ptr->rma_ops_list_tail->next = new_ptr;
 	else
-	{
-	    win_ptr->rma_ops_list = new_ptr;
+	    win_ptr->rma_ops_list_head = new_ptr;
+	win_ptr->rma_ops_list_tail = new_ptr;
+
+	/* If predefined and contiguous, use a simplified element */
+	if (origin_predefined && target_predefined && enableShortACC) {
+	    new_ptr->next = NULL;
+	    new_ptr->type = MPIDI_RMA_ACC_CONTIG;
+	    /* Only the information needed for the contig/predefined acc */
+	    new_ptr->origin_addr = origin_addr;
+	    new_ptr->origin_count = origin_count;
+	    new_ptr->origin_datatype = origin_datatype;
+	    new_ptr->target_rank = target_rank;
+	    new_ptr->target_disp = target_disp;
+	    new_ptr->target_count = target_count;
+	    new_ptr->target_datatype = target_datatype;
+	    new_ptr->op = op;
+	    goto fn_exit;
 	}
-        
+
 	new_ptr->next = NULL;  
 	new_ptr->type = MPIDI_RMA_ACCUMULATE;
 	new_ptr->origin_addr = origin_addr;

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_sync.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_sync.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/ch3u_rma_sync.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -7,6 +7,63 @@
 #include "mpidimpl.h"
 #include "mpidrma.h"
 
+static int EnableImmedAcc = 1;
+void MPIDI_CH3_RMA_SetAccImmed( int flag )
+{
+    EnableImmedAcc = flag;
+}
+
+#ifdef USE_MPIU_INSTR
+MPIU_INSTR_DURATION_DECL(winfence_clearlock);
+MPIU_INSTR_DURATION_DECL(winfence_rs);
+MPIU_INSTR_DURATION_DECL(winfence_issue);
+MPIU_INSTR_DURATION_DECL(winfence_complete);
+MPIU_INSTR_DURATION_DECL(winfence_wait);
+MPIU_INSTR_DURATION_DECL(winfence_block);
+MPIU_INSTR_DURATION_DECL(winpost_clearlock);
+MPIU_INSTR_DURATION_DECL(winpost_sendsync);
+MPIU_INSTR_DURATION_DECL(winstart_clearlock);
+MPIU_INSTR_DURATION_DECL(wincomplete_issue);
+MPIU_INSTR_DURATION_DECL(wincomplete_complete);
+MPIU_INSTR_DURATION_DECL(wincomplete_recvsync);
+MPIU_INSTR_DURATION_DECL(winwait_wait);
+MPIU_INSTR_DURATION_DECL(winlock_getlocallock);
+MPIU_INSTR_DURATION_DECL(winunlock_getlock);
+MPIU_INSTR_DURATION_DECL(winunlock_issue);
+MPIU_INSTR_DURATION_DECL(winunlock_complete);
+MPIU_INSTR_DURATION_DECL(lockqueue_alloc);
+MPIU_INSTR_DURATION_DECL(rmapkt_acc);
+MPIU_INSTR_DURATION_DECL(rmapkt_acc_predef);
+MPIU_INSTR_DURATION_DECL(rmapkt_acc_immed);
+MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_alloc);
+void MPIDI_CH3_RMA_InitInstr(void);
+
+void MPIDI_CH3_RMA_InitInstr(void)
+{
+    MPIU_INSTR_DURATION_INIT(lockqueue_alloc,0,"Allocate Lock Queue element");
+    MPIU_INSTR_DURATION_INIT(winfence_clearlock,1,"WIN_FENCE:Clear prior lock");
+    MPIU_INSTR_DURATION_INIT(winfence_rs,0,"WIN_FENCE:ReduceScatterBlock");
+    MPIU_INSTR_DURATION_INIT(winfence_issue,2,"WIN_FENCE:Issue RMA ops");
+    MPIU_INSTR_DURATION_INIT(winfence_complete,1,"WIN_FENCE:Complete RMA ops");
+    MPIU_INSTR_DURATION_INIT(winfence_wait,1,"WIN_FENCE:Wait for ops from other processes");
+    MPIU_INSTR_DURATION_INIT(winfence_block,0,"WIN_FENCE:Wait for any progress");
+    MPIU_INSTR_DURATION_INIT(winpost_clearlock,1,"WIN_POST:Clear prior lock");
+    MPIU_INSTR_DURATION_INIT(winpost_sendsync,1,"WIN_POST:Senc sync messages");
+    MPIU_INSTR_DURATION_INIT(winstart_clearlock,1,"WIN_START:Clear prior lock");
+    MPIU_INSTR_DURATION_INIT(wincomplete_recvsync,1,"WIN_COMPLETE:Recv sync messages");
+    MPIU_INSTR_DURATION_INIT(wincomplete_issue,2,"WIN_COMPLETE:Issue RMA ops");
+    MPIU_INSTR_DURATION_INIT(wincomplete_complete,1,"WIN_COMPLETE:Complete RMA ops");
+    MPIU_INSTR_DURATION_INIT(winwait_wait,1,"WIN_WAIT:Wait for ops from other processes");
+    MPIU_INSTR_DURATION_INIT(winlock_getlocallock,0,"WIN_LOCK:Get local lock");
+    MPIU_INSTR_DURATION_INIT(winunlock_issue,2,"WIN_UNLOCK:Issue RMA ops");
+    MPIU_INSTR_DURATION_INIT(winunlock_complete,1,"WIN_UNLOCK:Complete RMA ops");
+    MPIU_INSTR_DURATION_INIT(winunlock_getlock,0,"WIN_UNLOCK:Acquire lock");
+    MPIU_INSTR_DURATION_INIT(rmapkt_acc,0,"RMA:PKTHANDLER for Accumulate");
+    MPIU_INSTR_DURATION_INIT(rmapkt_acc_predef,0,"RMA:PKTHANDLER for Accumulate: predef dtype");
+    MPIU_INSTR_DURATION_INIT(rmapkt_acc_immed,0,"RMA:PKTHANDLER for Accum immed");
+}
+#endif
+
 /*
  * These routines provide a default implementation of the MPI RMA operations
  * in terms of the low-level, two-sided channel operations.  A channel
@@ -31,6 +88,11 @@
 				   MPI_Win target_win_handle, 
 				   MPIDI_RMA_dtype_info * dtype_info, 
 				   void ** dataloop, MPID_Request ** request); 
+static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_ops * rma_op, 
+					  MPID_Win * win_ptr, 
+					  MPI_Win source_win_handle, 
+					  MPI_Win target_win_handle, 
+					  MPID_Request ** request);
 static int MPIDI_CH3I_Do_passive_target_rma(MPID_Win *win_ptr, 
 					    int *wait_for_rma_done_pkt);
 static int MPIDI_CH3I_Send_lock_put_or_acc(MPID_Win *win_ptr);
@@ -48,16 +110,13 @@
 int MPIDI_Win_fence(int assert, MPID_Win *win_ptr)
 {
     int mpi_errno = MPI_SUCCESS;
-    int comm_size, done;
+    int comm_size;
     int *rma_target_proc, *nops_to_proc, i, total_op_count, *curr_ops_cnt;
-    MPIDI_RMA_ops *curr_ptr, *next_ptr;
+    MPIDI_RMA_ops *curr_ptr;
     MPID_Comm *comm_ptr;
-    MPID_Request **requests=NULL; /* array of requests */
     MPI_Win source_win_handle, target_win_handle;
-    MPIDI_RMA_dtype_info *dtype_infos=NULL;
-    void **dataloops=NULL;    /* to store dataloops for each datatype */
     MPID_Progress_state progress_state;
-    MPIU_CHKLMEM_DECL(6);
+    MPIU_CHKLMEM_DECL(3);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_WIN_FENCE);
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_FENCE);
@@ -71,6 +130,7 @@
      * have completed and the lock is released. */
     if (win_ptr->current_lock_type != MPID_LOCK_NONE)
     {
+	MPIU_INSTR_DURATION_START(winfence_clearlock);
 	MPID_Progress_start(&progress_state);
 	while (win_ptr->current_lock_type != MPID_LOCK_NONE)
 	{
@@ -82,9 +142,10 @@
 		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
 	    }
 	    /* --END ERROR HANDLING-- */
-
+	    MPIU_INSTR_DURATION_INCR(winfence_clearlock,0,1);
 	}
 	MPID_Progress_end(&progress_state);
+	MPIU_INSTR_DURATION_END(winfence_clearlock);
     }
 
     /* Note that the NOPRECEDE and NOSUCCEED must be specified by all processes
@@ -108,10 +169,10 @@
     }
     else
     {
+	MPIDI_RMA_ops **prevNextPtr, *tmpptr;
+	MPIU_INSTR_DURATION_START(winfence_rs);
 	/* This is the second or later fence. Do all the preceding RMA ops. */
-	
-	MPID_Comm_get_ptr( win_ptr->comm, comm_ptr );
-	
+	comm_ptr = win_ptr->comm_ptr;
 	/* First inform every process whether it is a target of RMA
 	   ops from this process */
 	comm_size = comm_ptr->local_size;
@@ -131,7 +192,7 @@
 	/* set rma_target_proc[i] to 1 if rank i is a target of RMA
 	   ops from this process */
 	total_op_count = 0;
-	curr_ptr = win_ptr->rma_ops_list;
+	curr_ptr = win_ptr->rma_ops_list_head;
 	while (curr_ptr != NULL)
 	{
 	    total_op_count++;
@@ -143,22 +204,8 @@
 	MPIU_CHKLMEM_MALLOC(curr_ops_cnt, int *, comm_size*sizeof(int),
 			    mpi_errno, "curr_ops_cnt");
 	for (i=0; i<comm_size; i++) curr_ops_cnt[i] = 0;
-	
-	if (total_op_count != 0)
-	{
-	    MPIU_CHKLMEM_MALLOC(requests, MPID_Request **, 
-				total_op_count*sizeof(MPID_Request*),
-				mpi_errno, "requests");
-	    MPIU_CHKLMEM_MALLOC(dtype_infos, MPIDI_RMA_dtype_info *, 
-				total_op_count*sizeof(MPIDI_RMA_dtype_info),
-				mpi_errno, "dtype_infos");
-	    MPIU_CHKLMEM_MALLOC(dataloops, void **, 
-				total_op_count*sizeof(void*),
-				mpi_errno, "dataloops");
-	    for (i=0; i<total_op_count; i++) dataloops[i] = NULL;
-	}
-	
-	/* do a reduce_scatter_block (with MPI_SUM) on rma_target_proc. As a result,
+	/* do a reduce_scatter_block (with MPI_SUM) on rma_target_proc. 
+	   As a result,
 	   each process knows how many other processes will be doing
 	   RMA ops on its window */  
             
@@ -167,6 +214,7 @@
             
 	mpi_errno = MPIR_Reduce_scatter_block_impl(MPI_IN_PLACE, rma_target_proc, 1,
                                                    MPI_INT, MPI_SUM, comm_ptr);
+	MPIU_INSTR_DURATION_END(winfence_rs);
 	/* result is stored in rma_target_proc[0] */
 	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
@@ -175,9 +223,13 @@
 	   procs have the address and could decrement it. */
 	win_ptr->my_counter = win_ptr->my_counter - comm_size + 
 	    rma_target_proc[0];  
-            
+
+	MPIU_INSTR_DURATION_START(winfence_issue);
+	MPIU_INSTR_DURATION_INCR(winfence_issue,0,total_op_count);
+	MPIU_INSTR_DURATION_MAX(winfence_issue,1,total_op_count);
 	i = 0;
-	curr_ptr = win_ptr->rma_ops_list;
+	curr_ptr = win_ptr->rma_ops_list_head;
+	prevNextPtr = &win_ptr->rma_ops_list_head;
 	while (curr_ptr != NULL)
 	{
 	    /* The completion counter at the target is decremented only on 
@@ -191,22 +243,28 @@
 		source_win_handle = MPI_WIN_NULL;
 	    
 	    target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
-	    
+
+	    curr_ptr->dataloop = 0;
 	    switch (curr_ptr->type)
 	    {
 	    case (MPIDI_RMA_PUT):
 	    case (MPIDI_RMA_ACCUMULATE):
 		mpi_errno = MPIDI_CH3I_Send_rma_msg(curr_ptr, win_ptr,
 					source_win_handle, target_win_handle, 
-					&dtype_infos[i],
-					&dataloops[i], &requests[i]);
+					&curr_ptr->dtype_info,
+					&curr_ptr->dataloop, &curr_ptr->request);
 		if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 		break;
+	    case MPIDI_RMA_ACC_CONTIG:
+		mpi_errno = MPIDI_CH3I_Send_contig_acc_msg(curr_ptr, win_ptr,
+				   source_win_handle, target_win_handle, 
+				   &curr_ptr->request );
+		break;
 	    case (MPIDI_RMA_GET):
 		mpi_errno = MPIDI_CH3I_Recv_rma_msg(curr_ptr, win_ptr,
 					source_win_handle, target_win_handle, 
-					&dtype_infos[i], 
-					&dataloops[i], &requests[i]);
+					&curr_ptr->dtype_info, 
+					&curr_ptr->dataloop, &curr_ptr->request);
 		if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 		break;
 	    default:
@@ -214,87 +272,119 @@
 	    }
 	    i++;
 	    curr_ops_cnt[curr_ptr->target_rank]++;
-	    curr_ptr = curr_ptr->next;
+	    /* If the request is null, we can remove it immediately */
+	    if (!curr_ptr->request) {
+		if (curr_ptr->dataloop != NULL) {
+		    MPIU_Free(curr_ptr->dataloop); /* allocated in send_rma_msg or 
+						      recv_rma_msg */
+		}
+		tmpptr       = curr_ptr->next;
+		*prevNextPtr = tmpptr;
+		MPIU_Free( curr_ptr );
+		curr_ptr     = tmpptr;
+	    }
+	    else  {
+		curr_ptr    = curr_ptr->next;
+		prevNextPtr = &curr_ptr->next;
+		/* FIXME: We could at least occassionally try to wait
+		   on completion of the pending send requests rather than
+		   focus on filling the queues.  */
+	    }
 	}
-	
-            
+	MPIU_INSTR_DURATION_END(winfence_issue);
+
+	/* We replaced a loop over an array of requests with a list of the
+	   incomplete requests.  The reason to do 
+	   that is for long lists - processing the entire list until
+	   all are done introduces a potentially n^2 time.  In 
+	   testing with test/mpi/perf/manyrma.c , the number of iterations
+	   within the "while (total_op_count) was O(total_op_count).
+	   
+	   Another alternative is to create a more compressed list (storing
+	   only the necessary information, reducing the number of cache lines
+	   needed while looping through the requests.
+	*/
 	if (total_op_count)
 	{ 
-	    done = 1;
+	    int ntimes = 0;
+	    MPIU_INSTR_DURATION_START(winfence_complete);
 	    MPID_Progress_start(&progress_state);
-	    while (total_op_count)
-	    {
-		for (i=0; i<total_op_count; i++)
-		{
-		    if (requests[i] != NULL)
-		    {
-			if (!MPID_Request_is_complete(requests[i]))
-			{
-			    done = 0;
-			    break;
-			}
-			else
-			{
-			    mpi_errno = requests[i]->status.MPI_ERROR;
+	    /* Process all operations until they are complete */
+	    while (win_ptr->rma_ops_list_head) {
+		int loopcount = 0;
+		prevNextPtr = &win_ptr->rma_ops_list_head;
+		ntimes++;
+		curr_ptr = win_ptr->rma_ops_list_head;
+		do {
+		    if (MPID_Request_is_complete(curr_ptr->request)) {
+			/* Once we find a complete request, we complete
+			 as many as possible until we find an incomplete
+			 or null request */
+			do {
+			    mpi_errno = curr_ptr->request->status.MPI_ERROR;
 			    /* --BEGIN ERROR HANDLING-- */
-			    if (mpi_errno != MPI_SUCCESS)
-			    {
+			    if (mpi_errno != MPI_SUCCESS) {
 				MPID_Progress_end(&progress_state);
 				MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
 			    }
 			    /* --END ERROR HANDLING-- */
-			    /* if origin datatype was a derived
-			       datatype, it will get freed when the
-			       request gets freed. */ 
-			    MPID_Request_release(requests[i]);
-			    requests[i] = NULL;
+			    MPID_Request_release(curr_ptr->request);
+			    if (curr_ptr->dataloop != NULL) {
+				MPIU_Free(curr_ptr->dataloop); /* allocated in send_rma_msg or 
+								  recv_rma_msg */
+			    }
+			    /* We can remove and free this rma op element */
+			    tmpptr       = curr_ptr->next;
+			    *prevNextPtr = tmpptr;
+			    MPIU_Free( curr_ptr );
+			    curr_ptr     = tmpptr;
 			}
+			while (curr_ptr &&
+			       MPID_Request_is_complete(curr_ptr->request));
+			/* Once a request completes, we wait for another
+			   operation to arrive rather than check the
+			   rest of the requests.  */
+			break;
 		    }
+		    else {
+			/* In many cases, if the list of pending requests
+			   is long, there's no point in checking the entire
+			   list */
+			if (loopcount++ > 4) /* FIXME: threshold as parameter */
+			    break;  /* wait for an event */
+			prevNextPtr = &curr_ptr->next;
+			curr_ptr    = curr_ptr->next;
+		    }
+		} while (curr_ptr);
+         
+		/* Wait for something to arrive*/
+		/* In some tests, this hung unless the test ensured that 
+		   there was an incomplete request. */
+		curr_ptr = win_ptr->rma_ops_list_head;
+		if (curr_ptr && !MPID_Request_is_complete(curr_ptr->request) ) {
+		    MPIU_INSTR_DURATION_START(winfence_block);
+		    mpi_errno = MPID_Progress_wait(&progress_state);
+		    /* --BEGIN ERROR HANDLING-- */
+		    if (mpi_errno != MPI_SUCCESS) {
+			MPID_Progress_end(&progress_state);
+			MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
+		    }
+		    /* --END ERROR HANDLING-- */
+		    MPIU_INSTR_DURATION_END(winfence_block);
 		}
-                    
-		if (done)
-		{
-		    break;
-		}
-                    
-		mpi_errno = MPID_Progress_wait(&progress_state);
-		/* --BEGIN ERROR HANDLING-- */
-		if (mpi_errno != MPI_SUCCESS) {
-		    MPID_Progress_end(&progress_state);
-		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
-		}
-		/* --END ERROR HANDLING-- */
-		
-		done = 1;
-	    } 
+	    } /* While list of rma operation is non-empty */
 	    MPID_Progress_end(&progress_state);
+	    MPIU_INSTR_DURATION_INCR(winfence_complete,0,ntimes);
+	    MPIU_INSTR_DURATION_END(winfence_complete);
 	}
             
-	if (total_op_count != 0)
-	{
-	    for (i=0; i<total_op_count; i++)
-	    {
-		if (dataloops[i] != NULL)
-		{
-		    MPIU_Free(dataloops[i]); /* allocated in send_rma_msg or 
-						recv_rma_msg */
-		}
-	    }
-	}
+	win_ptr->rma_ops_list_head = NULL;
+	win_ptr->rma_ops_list_tail = NULL;
 	
-	/* free MPIDI_RMA_ops_list */
-	curr_ptr = win_ptr->rma_ops_list;
-	while (curr_ptr != NULL)
-	{
-	    next_ptr = curr_ptr->next;
-	    MPIU_Free(curr_ptr);
-	    curr_ptr = next_ptr;
-	}
-	win_ptr->rma_ops_list = NULL;
-	
 	/* wait for all operations from other processes to finish */
 	if (win_ptr->my_counter)
 	{
+	    MPIU_INSTR_DURATION_START(winfence_wait);
 	    MPID_Progress_start(&progress_state);
 	    while (win_ptr->my_counter)
 	    {
@@ -305,8 +395,10 @@
 		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
 		}
 		/* --END ERROR HANDLING-- */
+		MPIU_INSTR_DURATION_INCR(winfence_wait,0,1);
 	    }
 	    MPID_Progress_end(&progress_state);
+	    MPIU_INSTR_DURATION_END(winfence_wait);
 	} 
 	
 	if (assert & MPI_MODE_NOSUCCEED)
@@ -470,7 +562,7 @@
           fflush(stdout);
     */
 
-    MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
+    comm_ptr = win_ptr->comm_ptr;
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(rma_op->origin_datatype, predefined);
@@ -641,8 +733,120 @@
     /* --END ERROR HANDLING-- */
 }
 
+/*
+ * Use this for contiguous accumulate operations
+ */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3I_Send_contig_acc_msg
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int MPIDI_CH3I_Send_contig_acc_msg(MPIDI_RMA_ops *rma_op, 
+					  MPID_Win *win_ptr,
+					  MPI_Win source_win_handle, 
+					  MPI_Win target_win_handle, 
+					  MPID_Request **request) 
+{
+    MPIDI_CH3_Pkt_t upkt;
+    MPIDI_CH3_Pkt_accum_t *accum_pkt = &upkt.accum;
+    MPID_IOV iov[MPID_IOV_LIMIT];
+    int mpi_errno=MPI_SUCCESS;
+    int origin_type_size, iovcnt; 
+    MPIDI_VC_t * vc;
+    MPID_Comm *comm_ptr;
+    int len;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_CONTIG_ACC_MSG);
+    MPIDI_STATE_DECL(MPID_STATE_MEMCPY);
 
+    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_CONTIG_ACC_MSG);
 
+    *request = NULL;
+
+    MPID_Datatype_get_size_macro(rma_op->origin_datatype, origin_type_size);
+    /* FIXME: Make this size check efficient and match the packet type */
+    len = rma_op->origin_count * origin_type_size;
+    if (EnableImmedAcc && len <= MPIDI_RMA_IMMED_INTS*sizeof(int)) {
+	MPIDI_CH3_Pkt_accum_immed_t * accumi_pkt = &upkt.accum_immed;
+	void *dest = accumi_pkt->data, *src = rma_op->origin_addr;
+	
+	MPIDI_Pkt_init(accumi_pkt, MPIDI_CH3_PKT_ACCUM_IMMED);
+	accumi_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
+	    win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+	accumi_pkt->count = rma_op->target_count;
+	accumi_pkt->datatype = rma_op->target_datatype;
+	accumi_pkt->op = rma_op->op;
+	accumi_pkt->target_win_handle = target_win_handle;
+	accumi_pkt->source_win_handle = source_win_handle;
+	
+	switch (len) {
+	case 1: *(uint8_t *)dest  = *(uint8_t *)src;  break;
+	case 2: *(uint16_t *)dest = *(uint16_t *)src; break;
+	case 4: *(uint32_t *)dest = *(uint32_t *)src; break;
+	case 8: *(uint64_t *)dest = *(uint64_t *)src; break;
+	default:
+	    MPIU_Memcpy( accumi_pkt->data, (void *)rma_op->origin_addr, len );
+	}
+	comm_ptr = win_ptr->comm_ptr;
+	MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
+	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
+	mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, accumi_pkt, sizeof(*accumi_pkt), request));
+	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
+	MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
+	goto fn_exit;
+    }
+
+    MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_ACCUMULATE);
+    accum_pkt->addr = (char *) win_ptr->base_addrs[rma_op->target_rank] +
+	win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+    accum_pkt->count = rma_op->target_count;
+    accum_pkt->datatype = rma_op->target_datatype;
+    accum_pkt->dataloop_size = 0;
+    accum_pkt->op = rma_op->op;
+    accum_pkt->target_win_handle = target_win_handle;
+    accum_pkt->source_win_handle = source_win_handle;
+    
+    iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) accum_pkt;
+    iov[0].MPID_IOV_LEN = sizeof(*accum_pkt);
+
+    /*    printf("send pkt: type %d, addr %d, count %d, base %d\n", rma_pkt->type,
+          rma_pkt->addr, rma_pkt->count, win_ptr->base_addrs[rma_op->target_rank]);
+          fflush(stdout);
+    */
+
+    comm_ptr = win_ptr->comm_ptr;
+    MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
+
+
+    /* basic datatype on target */
+    /* basic datatype on origin */
+    /* FIXME: This is still very heavyweight for a small message operation,
+       such as a single word update */
+    /* One possibility is to use iStartMsg with a buffer that is just large 
+       enough, though note that nemesis has an optimization for this */
+    iov[1].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rma_op->origin_addr;
+    iov[1].MPID_IOV_LEN = rma_op->origin_count * origin_type_size;
+    iovcnt = 2;
+    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
+    mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsgv(vc, iov, iovcnt, request));
+    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
+    MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
+
+ fn_exit:
+    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_CONTIG_ACC_MSG);
+    return mpi_errno;
+    /* --BEGIN ERROR HANDLING-- */
+ fn_fail:
+    if (*request)
+    {
+        MPIU_Object_set_ref(*request, 0);
+        MPIDI_CH3_Request_destroy(*request);
+    }
+    *request = NULL;
+    goto fn_exit;
+    /* --END ERROR HANDLING-- */
+}
+
+
+
 #undef FUNCNAME
 #define FUNCNAME MPIDI_CH3I_Recv_rma_msg
 #undef FCNAME
@@ -708,7 +912,7 @@
     fflush(stdout);
 */
 	    
-    MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
+    comm_ptr = win_ptr->comm_ptr;
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(rma_op->target_datatype, predefined);
@@ -790,8 +994,6 @@
 }
 
 
-
-
 #undef FUNCNAME
 #define FUNCNAME MPIDI_Win_post
 #undef FCNAME
@@ -824,6 +1026,7 @@
     {
 	MPID_Progress_state progress_state;
 	
+	MPIU_INSTR_DURATION_START(winpost_clearlock);
 	/* poke the progress engine */
 	MPID_Progress_start(&progress_state);
 	while (win_ptr->current_lock_type != MPID_LOCK_NONE)
@@ -835,8 +1038,10 @@
 		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
 	    }
 	    /* --END ERROR HANDLING-- */
+	    MPIU_INSTR_DURATION_INCR(winpost_clearlock,0,1);
 	}
 	MPID_Progress_end(&progress_state);
+	MPIU_INSTR_DURATION_END(winpost_clearlock);
     }
         
     post_grp_size = post_grp_ptr->size;
@@ -848,12 +1053,14 @@
     {
         MPI_Request *req;
         MPI_Status *status;
+
+	MPIU_INSTR_DURATION_START(winpost_sendsync);
  
 	/* NOCHECK not specified. We need to notify the source
 	   processes that Post has been called. */  
 	
 	/* We need to translate the ranks of the processes in
-	   post_group to ranks in win_ptr->comm, so that we
+	   post_group to ranks in win_ptr->comm_ptr, so that we
 	   can do communication */
             
 	MPIU_CHKLMEM_MALLOC(ranks_in_post_grp, int *, 
@@ -868,7 +1075,7 @@
 	    ranks_in_post_grp[i] = i;
 	}
         
-        MPID_Comm_get_ptr( win_ptr->comm, win_comm_ptr );
+	win_comm_ptr = win_ptr->comm_ptr;
 
         mpi_errno = MPIR_Comm_group_impl(win_comm_ptr, &win_grp_ptr);
 	if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -877,15 +1084,20 @@
 	MPIR_Group_translate_ranks_impl(post_grp_ptr, post_grp_size, ranks_in_post_grp,
                                         win_grp_ptr, ranks_in_win_grp);
 	
-        rank = MPIR_Comm_rank(win_comm_ptr);
+        rank = win_ptr->myrank;
 	
 	MPIU_CHKLMEM_MALLOC(req, MPI_Request *, post_grp_size * sizeof(MPI_Request), mpi_errno, "req");
         MPIU_CHKLMEM_MALLOC(status, MPI_Status *, post_grp_size*sizeof(MPI_Status), mpi_errno, "status");
 
 	/* Send a 0-byte message to the source processes */
+	MPIU_INSTR_DURATION_INCR(winpost_sendsync,0,post_grp_size);
 	for (i = 0; i < post_grp_size; i++) {
 	    dst = ranks_in_win_grp[i];
-	    
+
+	    /* FIXME: Short messages like this shouldn't normally need a 
+	       request - this should consider using the ch3 call to send
+	       a short message and return a request only if the message is
+	       not delivered. */
 	    if (dst != rank) {
                 MPID_Request *req_ptr;
 		mpi_errno = MPID_Isend(&i, 0, MPI_INT, dst, SYNC_POST_TAG, win_comm_ptr,
@@ -912,6 +1124,7 @@
 
         mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
 	if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+	MPIU_INSTR_DURATION_END(winpost_sendsync);
     }
 
  fn_exit:
@@ -954,6 +1167,7 @@
     {
 	MPID_Progress_state progress_state;
 	
+	MPIU_INSTR_DURATION_START(winstart_clearlock);
 	/* poke the progress engine */
 	MPID_Progress_start(&progress_state);
 	while (win_ptr->current_lock_type != MPID_LOCK_NONE)
@@ -965,8 +1179,10 @@
 		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
 	    }
 	    /* --END ERROR HANDLING-- */
+	    MPIU_INSTR_DURATION_INCR(winstart_clearlock,0,1);
 	}
 	MPID_Progress_end(&progress_state);
+	MPIU_INSTR_DURATION_END(winstart_clearlock);
     }
     
     win_ptr->start_group_ptr = group_ptr;
@@ -989,12 +1205,9 @@
     int mpi_errno = MPI_SUCCESS;
     int comm_size, *nops_to_proc, src, new_total_op_count;
     int i, j, dst, done, total_op_count, *curr_ops_cnt;
-    MPIDI_RMA_ops *curr_ptr, *next_ptr;
+    MPIDI_RMA_ops *curr_ptr, *tmpptr, **prevNextPtr;
     MPID_Comm *comm_ptr;
-    MPID_Request **requests; /* array of requests */
     MPI_Win source_win_handle, target_win_handle;
-    MPIDI_RMA_dtype_info *dtype_infos=NULL;
-    void **dataloops=NULL;    /* to store dataloops for each datatype */
     MPID_Group *win_grp_ptr;
     int start_grp_size, *ranks_in_start_grp, *ranks_in_win_grp, rank;
     MPIU_CHKLMEM_DECL(9);
@@ -1002,14 +1215,15 @@
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_WIN_COMPLETE);
 
-    MPID_Comm_get_ptr( win_ptr->comm, comm_ptr );
+    comm_ptr = win_ptr->comm_ptr;
     comm_size = comm_ptr->local_size;
         
     /* Translate the ranks of the processes in
-       start_group to ranks in win_ptr->comm */
+       start_group to ranks in win_ptr->comm_ptr */
     
     start_grp_size = win_ptr->start_group_ptr->size;
-        
+
+    MPIU_INSTR_DURATION_START(wincomplete_recvsync);
     MPIU_CHKLMEM_MALLOC(ranks_in_start_grp, int *, start_grp_size*sizeof(int), 
 			mpi_errno, "ranks_in_start_grp");
         
@@ -1024,10 +1238,11 @@
     mpi_errno = MPIR_Comm_group_impl(comm_ptr, &win_grp_ptr);
     if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 
-    MPIR_Group_translate_ranks_impl(win_ptr->start_group_ptr, start_grp_size, ranks_in_start_grp,
+    MPIR_Group_translate_ranks_impl(win_ptr->start_group_ptr, start_grp_size, 
+				    ranks_in_start_grp,
                                     win_grp_ptr, ranks_in_win_grp);
         
-    rank = MPIR_Comm_rank(comm_ptr);
+    rank = win_ptr->myrank;
 
     /* If MPI_MODE_NOCHECK was not specified, we need to check if
        Win_post was called on the target processes. Wait for a 0-byte sync
@@ -1036,14 +1251,19 @@
     {
         MPI_Request *req;
         MPI_Status *status;
-        
+
         MPIU_CHKLMEM_MALLOC(req, MPI_Request *, start_grp_size*sizeof(MPI_Request), mpi_errno, "req");
         MPIU_CHKLMEM_MALLOC(status, MPI_Status *, start_grp_size*sizeof(MPI_Status), mpi_errno, "status");
 
+	MPIU_INSTR_DURATION_INCR(wincomplete_recvsync,0,start_grp_size);
 	for (i = 0; i < start_grp_size; i++) {
 	    src = ranks_in_win_grp[i];
 	    if (src != rank) {
                 MPID_Request *req_ptr;
+		/* FIXME: This is a heavyweight way to process these sync 
+		   messages - this should be handled with a special packet
+		   type and callback function.
+		*/
                 mpi_errno = MPID_Irecv(NULL, 0, MPI_INT, src, SYNC_POST_TAG,
                                        comm_ptr, MPID_CONTEXT_INTRA_PT2PT, &req_ptr);
 		if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -1067,28 +1287,31 @@
         }
         /* --END ERROR HANDLING-- */
     }
-        
+    MPIU_INSTR_DURATION_END(wincomplete_recvsync);
+
     /* keep track of no. of ops to each proc. Needed for knowing
        whether or not to decrement the completion counter. The
        completion counter is decremented only on the last
        operation. */
-        
+
+    MPIU_INSTR_DURATION_START(wincomplete_issue);
+
     MPIU_CHKLMEM_MALLOC(nops_to_proc, int *, comm_size*sizeof(int), 
 			mpi_errno, "nops_to_proc");
     for (i=0; i<comm_size; i++) nops_to_proc[i] = 0;
 
     total_op_count = 0;
-    curr_ptr = win_ptr->rma_ops_list;
+    curr_ptr = win_ptr->rma_ops_list_head;
     while (curr_ptr != NULL)
     {
 	nops_to_proc[curr_ptr->target_rank]++;
 	total_op_count++;
 	curr_ptr = curr_ptr->next;
     }
-    
-    MPIU_CHKLMEM_MALLOC(requests, MPID_Request **, 
-			(total_op_count+start_grp_size) * sizeof(MPID_Request*),
-			mpi_errno, "requests");
+
+    MPIU_INSTR_DURATION_INCR(wincomplete_issue,0,total_op_count);
+    MPIU_INSTR_DURATION_MAX(wincomplete_issue,1,total_op_count);
+
     /* We allocate a few extra requests because if there are no RMA
        ops to a target process, we need to send a 0-byte message just
        to decrement the completion counter. */
@@ -1096,19 +1319,10 @@
     MPIU_CHKLMEM_MALLOC(curr_ops_cnt, int *, comm_size*sizeof(int),
 			mpi_errno, "curr_ops_cnt");
     for (i=0; i<comm_size; i++) curr_ops_cnt[i] = 0;
-    
-    if (total_op_count != 0)
-    {
-	MPIU_CHKLMEM_MALLOC(dtype_infos, MPIDI_RMA_dtype_info *, 
-			    total_op_count*sizeof(MPIDI_RMA_dtype_info),
-			    mpi_errno, "dtype_infos");
-	MPIU_CHKLMEM_MALLOC(dataloops, void **, total_op_count*sizeof(void*),
-			    mpi_errno, "dataloops");
-	for (i=0; i<total_op_count; i++) dataloops[i] = NULL;
-    }
         
     i = 0;
-    curr_ptr = win_ptr->rma_ops_list;
+    prevNextPtr = &win_ptr->rma_ops_list_head;
+    curr_ptr = win_ptr->rma_ops_list_head;
     while (curr_ptr != NULL)
     {
 	/* The completion counter at the target is decremented only on 
@@ -1122,22 +1336,28 @@
 	    source_win_handle = MPI_WIN_NULL;
 	
 	target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
-	
+
+	curr_ptr->dataloop = 0;
 	switch (curr_ptr->type)
 	{
 	case (MPIDI_RMA_PUT):
 	case (MPIDI_RMA_ACCUMULATE):
 	    mpi_errno = MPIDI_CH3I_Send_rma_msg(curr_ptr, win_ptr,
 				source_win_handle, target_win_handle, 
-				&dtype_infos[i],
-				&dataloops[i], &requests[i]); 
+				&curr_ptr->dtype_info,
+				&curr_ptr->dataloop, &curr_ptr->request); 
 	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 	    break;
+	case MPIDI_RMA_ACC_CONTIG:
+	    mpi_errno = MPIDI_CH3I_Send_contig_acc_msg(curr_ptr, win_ptr,
+				       source_win_handle, target_win_handle, 
+				       &curr_ptr->request );
+	    break;
 	case (MPIDI_RMA_GET):
 	    mpi_errno = MPIDI_CH3I_Recv_rma_msg(curr_ptr, win_ptr,
 				source_win_handle, target_win_handle, 
-				&dtype_infos[i], 
-				&dataloops[i], &requests[i]);
+				&curr_ptr->dtype_info, 
+				&curr_ptr->dataloop, &curr_ptr->request);
 	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
 	    break;
 	default:
@@ -1145,8 +1365,23 @@
 	}
 	i++;
 	curr_ops_cnt[curr_ptr->target_rank]++;
-	curr_ptr = curr_ptr->next;
+	/* If the request is null, we can remove it immediately */
+	if (!curr_ptr->request) {
+	    if (curr_ptr->dataloop != NULL) {
+		MPIU_Free(curr_ptr->dataloop); /* allocated in send_rma_msg or 
+						  recv_rma_msg */
+	    }
+	    tmpptr       = curr_ptr->next;
+	    *prevNextPtr = tmpptr;
+	    MPIU_Free( curr_ptr );
+	    curr_ptr     = tmpptr;
+	}
+	else  {
+	    curr_ptr    = curr_ptr->next;
+	    prevNextPtr = &curr_ptr->next;
+	}
     }
+    MPIU_INSTR_DURATION_END(wincomplete_issue);
         
     /* If the start_group included some processes that did not end up
        becoming targets of  RMA operations from this process, we need
@@ -1167,6 +1402,7 @@
 	    MPIDI_CH3_Pkt_t upkt;
 	    MPIDI_CH3_Pkt_put_t *put_pkt = &upkt.put;
 	    MPIDI_VC_t * vc;
+	    MPID_Request *request;
 	    
 	    MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
 	    put_pkt->addr = NULL;
@@ -1180,80 +1416,111 @@
 	    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
 	    mpi_errno = MPIU_CALL(MPIDI_CH3,iStartMsg(vc, put_pkt,
 						      sizeof(*put_pkt),
-						      &requests[j]));
+						      &request));
 	    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
 	    if (mpi_errno != MPI_SUCCESS) {
 		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg" );
 	    }
+	    /* In the unlikely event that a request is returned (the message
+	       is not sent yet), add it to the list of pending operations */
+	    if (request) {
+		/* Its hard to use the automatic allocator here, as those 
+		   macros are optimized for a known maximum number of items. */
+		MPIDI_RMA_ops *new_ptr;
+		new_ptr = (MPIDI_RMA_ops *)MPIU_Malloc(sizeof(MPIDI_RMA_ops) );
+		/* --BEGIN ERROR HANDLING-- */
+		if (!new_ptr) {
+		    MPIU_CHKMEM_SETERR(mpi_errno,sizeof(MPIDI_RMA_ops),
+					"RMA operation entry");
+		    goto fn_fail;
+		}
+		/* --END ERROR HANDLING-- */
+		if (win_ptr->rma_ops_list_tail) 
+		    win_ptr->rma_ops_list_tail->next = new_ptr;
+		else
+		    win_ptr->rma_ops_list_head = new_ptr;
+		win_ptr->rma_ops_list_tail = new_ptr;
+		new_ptr->next     = NULL;
+		new_ptr->request  = request;
+		new_ptr->dataloop = 0;
+	    }
 	    j++;
 	    new_total_op_count++;
 	}
     }
-        
+
     if (new_total_op_count)
     {
 	MPID_Progress_state progress_state;
 	
 	done = 1;
+	MPIU_INSTR_DURATION_START(wincomplete_complete);
 	MPID_Progress_start(&progress_state);
-	while (new_total_op_count)
-	{
-	    for (i=0; i<new_total_op_count; i++)
-	    {
-		if (requests[i] != NULL)
-		{
-		    if (!MPID_Request_is_complete(requests[i]))
-		    {
-			done = 0;
-			break;
-		    }
-		    else
-		    {
-			mpi_errno = requests[i]->status.MPI_ERROR;
+	while (win_ptr->rma_ops_list_head) {
+	    prevNextPtr = &win_ptr->rma_ops_list_head;
+	    curr_ptr    = win_ptr->rma_ops_list_head;
+	    do {
+		if (MPID_Request_is_complete(curr_ptr->request)) {
+		    /* Once we find a complete request, we complete
+		       as many as possible until we find an incomplete
+		       or null request */
+		    do {
+			mpi_errno = curr_ptr->request->status.MPI_ERROR;
 			/* --BEGIN ERROR HANDLING-- */
-			if (mpi_errno != MPI_SUCCESS)
-			{
+			if (mpi_errno != MPI_SUCCESS) {
 			    MPID_Progress_end(&progress_state);
-			    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMArequest");
+			    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
 			}
 			/* --END ERROR HANDLING-- */
-			MPID_Request_release(requests[i]);
-			requests[i] = NULL;
+			MPID_Request_release(curr_ptr->request);
+			if (curr_ptr->dataloop != NULL) {
+			    MPIU_Free(curr_ptr->dataloop); /* allocated in send_rma_msg or 
+							      recv_rma_msg */
+			}
+			/* We can remove and free this rma op element */
+			tmpptr       = curr_ptr->next;
+			*prevNextPtr = tmpptr;
+			MPIU_Free( curr_ptr );
+			curr_ptr     = tmpptr;
 		    }
+		    while (curr_ptr &&
+			   MPID_Request_is_complete(curr_ptr->request));
+		    /* Once a request completes, we wait for another
+		       operation to arrive rather than check the
+		       rest of the requests.  */
+		    break;
 		}
+		else {
+		    prevNextPtr = &curr_ptr->next;
+		    curr_ptr    = curr_ptr->next;
+		    break;
+		}
+	    } while (curr_ptr);
+
+	    /* Wait for something to arrive*/
+	    /* In some tests, this hung unless the test ensured that 
+	       there was an incomplete request. */
+	    curr_ptr = win_ptr->rma_ops_list_head;
+	    if (curr_ptr && !MPID_Request_is_complete(curr_ptr->request) ) {
+		MPIU_INSTR_DURATION_START(winfence_block);
+		mpi_errno = MPID_Progress_wait(&progress_state);
+		/* --BEGIN ERROR HANDLING-- */
+		if (mpi_errno != MPI_SUCCESS) {
+		    MPID_Progress_end(&progress_state);
+		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
+		}
+		/* --END ERROR HANDLING-- */
+		MPIU_INSTR_DURATION_END(winfence_block);
 	    }
-                
-	    if (done)
-	    {
-		break;
-	    }
+	} /* While list of rma operation is non-empty */
 	    
-	    mpi_errno = MPID_Progress_wait(&progress_state);
-	    done = 1;
-	} 
+	    
 	MPID_Progress_end(&progress_state);
     }
-        
-    if (total_op_count != 0)
-    {
-	for (i=0; i<total_op_count; i++)
-	{
-	    if (dataloops[i] != NULL)
-	    {
-		MPIU_Free(dataloops[i]);
-	    }
-	}
-    }
-        
-    /* free MPIDI_RMA_ops_list */
-    curr_ptr = win_ptr->rma_ops_list;
-    while (curr_ptr != NULL)
-    {
-	next_ptr = curr_ptr->next;
-	MPIU_Free(curr_ptr);
-	curr_ptr = next_ptr;
-    }
-    win_ptr->rma_ops_list = NULL;
+
+    MPIU_Assert( !win_ptr->rma_ops_list_head );
+    win_ptr->rma_ops_list_head = NULL;
+    win_ptr->rma_ops_list_tail = NULL;
     
     mpi_errno = MPIR_Group_free_impl(win_grp_ptr);
     if (mpi_errno) MPIU_ERR_POP(mpi_errno);
@@ -1291,6 +1558,7 @@
     {
 	MPID_Progress_state progress_state;
 	
+	MPIU_INSTR_DURATION_START(winwait_wait);
 	MPID_Progress_start(&progress_state);
 	while (win_ptr->my_counter)
 	{
@@ -1303,8 +1571,10 @@
 		return mpi_errno;
 	    }
 	    /* --END ERROR HANDLING-- */
+	    MPIU_INSTR_DURATION_INCR(winwait_wait,0,1)
 	}
 	MPID_Progress_end(&progress_state);
+	MPIU_INSTR_DURATION_END(winwait_wait);
     } 
 
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_WIN_WAIT);
@@ -1362,9 +1632,9 @@
 
     if (dest == MPI_PROC_NULL) goto fn_exit;
         
-    MPID_Comm_get_ptr( win_ptr->comm, comm_ptr );
+    comm_ptr = win_ptr->comm_ptr;
     
-    if (dest == comm_ptr->rank) {
+    if (dest == win_ptr->myrank) {
 	/* The target is this process itself. We must block until the lock
 	 * is acquired. */
             
@@ -1373,6 +1643,7 @@
 	{
 	    MPID_Progress_state progress_state;
 	    
+	    MPIU_INSTR_DURATION_START(winlock_getlocallock);
 	    MPID_Progress_start(&progress_state);
 	    while (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) 
 	    {
@@ -1385,6 +1656,7 @@
 		/* --END ERROR HANDLING-- */
 	    }
 	    MPID_Progress_end(&progress_state);
+	    MPIU_INSTR_DURATION_END(winlock_getlocallock);
 	}
 	/* local lock acquired. local puts, gets, accumulates will be done 
 	   directly without queueing. */
@@ -1392,11 +1664,13 @@
     
     else {
 	/* target is some other process. add the lock request to rma_ops_list */
-            
+	MPIU_INSTR_DURATION_START(rmaqueue_alloc);
 	MPIU_CHKPMEM_MALLOC(new_ptr, MPIDI_RMA_ops *, sizeof(MPIDI_RMA_ops), 
 			    mpi_errno, "RMA operation entry");
+	MPIU_INSTR_DURATION_END(rmaqueue_alloc);
             
-	win_ptr->rma_ops_list = new_ptr;
+	win_ptr->rma_ops_list_head = new_ptr;
+	win_ptr->rma_ops_list_tail = new_ptr;
         
 	new_ptr->next = NULL;  
 	new_ptr->type = MPIDI_RMA_LOCK;
@@ -1434,9 +1708,9 @@
 
     if (dest == MPI_PROC_NULL) goto fn_exit;
         
-    MPID_Comm_get_ptr( win_ptr->comm, comm_ptr );
+    comm_ptr = win_ptr->comm_ptr;
         
-    if (dest == comm_ptr->rank) {
+    if (dest == win_ptr->myrank) {
 	/* local lock. release the lock on the window, grant the next one
 	 * in the queue, and return. */
 	mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
@@ -1445,7 +1719,7 @@
 	goto fn_exit;
     }
         
-    rma_op = win_ptr->rma_ops_list;
+    rma_op = win_ptr->rma_ops_list_head;
     
     /* win_lock was not called. return error */
     if ( (rma_op == NULL) || (rma_op->type != MPIDI_RMA_LOCK) ) { 
@@ -1461,7 +1735,8 @@
     if (rma_op->next == NULL) {
 	/* only win_lock called, no put/get/acc. Do nothing and return. */
 	MPIU_Free(rma_op);
-	win_ptr->rma_ops_list = NULL;
+	win_ptr->rma_ops_list_head = NULL;
+	win_ptr->rma_ops_list_tail = NULL;
 	goto fn_exit;
     }
         
@@ -1502,7 +1777,7 @@
     if (single_op_opt == 0) {
 	
 	/* Send a lock packet over to the target. wait for the lock_granted
-	 * reply. then do all the RMA ops. */ 
+	 * reply. Then do all the RMA ops. */ 
 	
 	MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
 	lock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
@@ -1535,6 +1810,7 @@
 	{
 	    MPID_Progress_state progress_state;
 	    
+	    MPIU_INSTR_DURATION_START(winunlock_getlock);
 	    MPID_Progress_start(&progress_state);
 	    while (win_ptr->lock_granted == 0)
 	    {
@@ -1547,6 +1823,7 @@
 		/* --END ERROR HANDLING-- */
 	    }
 	    MPID_Progress_end(&progress_state);
+	    MPIU_INSTR_DURATION_END(winunlock_getlock);
 	}
 	
 	/* Now do all the RMA operations */
@@ -1603,18 +1880,15 @@
 					    int *wait_for_rma_done_pkt)
 {
     int mpi_errno = MPI_SUCCESS, done, i, nops;
-    MPIDI_RMA_ops *curr_ptr, *next_ptr, **curr_ptr_ptr, *tmp_ptr;
+    MPIDI_RMA_ops *curr_ptr;
     MPID_Comm *comm_ptr;
-    MPID_Request **requests=NULL; /* array of requests */
-    MPIDI_RMA_dtype_info *dtype_infos=NULL;
-    void **dataloops=NULL;    /* to store dataloops for each datatype */
+    MPIDI_RMA_ops **prevNextPtr, *tmpptr;
     MPI_Win source_win_handle, target_win_handle;
-    MPIU_CHKLMEM_DECL(3);
     MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_DO_PASSIVE_TARGET_RMA);
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_DO_PASSIVE_TARGET_RMA);
 
-    if (win_ptr->rma_ops_list->lock_type == MPI_LOCK_EXCLUSIVE) {
+    if (win_ptr->rma_ops_list_head->lock_type == MPI_LOCK_EXCLUSIVE) {
         /* exclusive lock. no need to wait for rma done pkt at the end */
         *wait_for_rma_done_pkt = 0;
     }
@@ -1623,179 +1897,200 @@
            to the end of the list and do it last, in which case an rma done 
            pkt is not needed. If there is no get, rma done pkt is needed */
 
-        /* First check whether the last operation is a get. Skip the first op, 
-           which is a lock. */
-
-        curr_ptr = win_ptr->rma_ops_list->next;
-        while (curr_ptr->next != NULL) 
-            curr_ptr = curr_ptr->next;
-    
-        if (curr_ptr->type == MPIDI_RMA_GET) {
+        if (win_ptr->rma_ops_list_tail->type == MPIDI_RMA_GET) {
             /* last operation is a get. no need to wait for rma done pkt */
             *wait_for_rma_done_pkt = 0;
         }
         else {
             /* go through the list and move the first get operation 
-               (if there is one) to the end */
+               (if there is one) to the end.  Note that the first
+	       operation must be a lock, so we can skip it */
             
-            curr_ptr = win_ptr->rma_ops_list->next;
-            curr_ptr_ptr = &(win_ptr->rma_ops_list->next);
+            curr_ptr = win_ptr->rma_ops_list_head->next;
+            prevNextPtr = &(win_ptr->rma_ops_list_head->next);
             
             *wait_for_rma_done_pkt = 1;
             
             while (curr_ptr != NULL) {
                 if (curr_ptr->type == MPIDI_RMA_GET) {
+		    /* Found a GET, move it to the end */
                     *wait_for_rma_done_pkt = 0;
-                    *curr_ptr_ptr = curr_ptr->next;
-                    tmp_ptr = curr_ptr;
-                    while (curr_ptr->next != NULL)
-                        curr_ptr = curr_ptr->next;
-                    curr_ptr->next = tmp_ptr;
-                    tmp_ptr->next = NULL;
+		    win_ptr->rma_ops_list_tail->next = curr_ptr;
+		    *prevNextPtr = curr_ptr->next;
+		    curr_ptr->next = NULL;
+		    win_ptr->rma_ops_list_tail = curr_ptr;
                     break;
                 }
                 else {
-                    curr_ptr_ptr = &(curr_ptr->next);
-                    curr_ptr = curr_ptr->next;
+                    prevNextPtr = &(curr_ptr->next);
+                    curr_ptr    = curr_ptr->next;
                 }
             }
         }
     }
 
-    MPID_Comm_get_ptr( win_ptr->comm, comm_ptr );
+    comm_ptr = win_ptr->comm_ptr;
 
     /* Ignore the first op in the list because it is a win_lock and do
        the rest */
 
-    curr_ptr = win_ptr->rma_ops_list->next;
+    /* 
+       This list has a head (lock) (but no tail (unlock)) that is not 
+       processed, so we must skip over that head 
+    */
+
+    curr_ptr = win_ptr->rma_ops_list_head->next;
     nops = 0;
     while (curr_ptr != NULL) {
         nops++;
         curr_ptr = curr_ptr->next;
     }
+    
+    MPIU_INSTR_DURATION_START(winunlock_issue);
+    i = 0;
 
-    MPIU_CHKLMEM_MALLOC(requests, MPID_Request **, nops*sizeof(MPID_Request*),
-			mpi_errno, "requests");
-    MPIU_CHKLMEM_MALLOC(dtype_infos, MPIDI_RMA_dtype_info *, 
-			nops*sizeof(MPIDI_RMA_dtype_info),
-			mpi_errno, "dtype_infos");
-    MPIU_CHKLMEM_MALLOC(dataloops, void **, nops*sizeof(void*),
-			mpi_errno, "dataloops");
+    /* Remove the lock entry */
+    curr_ptr = win_ptr->rma_ops_list_head;
+    tmpptr       = curr_ptr->next;
+    win_ptr->rma_ops_list_head = tmpptr;
+    MPIU_Free( curr_ptr );
 
-    for (i=0; i<nops; i++)
-    {
-        dataloops[i] = NULL;
-    }
-    
-    i = 0;
-    curr_ptr = win_ptr->rma_ops_list->next;
+    prevNextPtr = &win_ptr->rma_ops_list_head;
+    curr_ptr    = win_ptr->rma_ops_list_head;
     target_win_handle = win_ptr->all_win_handles[curr_ptr->target_rank];
     while (curr_ptr != NULL)
     {
         /* To indicate the last RMA operation, we pass the
            source_win_handle only on the last operation. Otherwise, 
            we pass MPI_WIN_NULL. */
-        if (i == nops - 1)
+	/* Could also be curr_ptr->next == NULL */
+        if (/*i == nops - 1*/!curr_ptr->next)
             source_win_handle = win_ptr->handle;
         else 
             source_win_handle = MPI_WIN_NULL;
         
+	curr_ptr->dataloop = 0;
         switch (curr_ptr->type)
         {
         case (MPIDI_RMA_PUT):  /* same as accumulate */
         case (MPIDI_RMA_ACCUMULATE):
             win_ptr->pt_rma_puts_accs[curr_ptr->target_rank]++;
             mpi_errno = MPIDI_CH3I_Send_rma_msg(curr_ptr, win_ptr,
-                         source_win_handle, target_win_handle, &dtype_infos[i],
-                                                &dataloops[i], &requests[i]);
+				source_win_handle, target_win_handle, 
+				&curr_ptr->dtype_info,
+                                &curr_ptr->dataloop, &curr_ptr->request);
 	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
             break;
+	case MPIDI_RMA_ACC_CONTIG:
+            win_ptr->pt_rma_puts_accs[curr_ptr->target_rank]++;
+	    mpi_errno = MPIDI_CH3I_Send_contig_acc_msg(curr_ptr, win_ptr,
+				       source_win_handle, target_win_handle, 
+				       &curr_ptr->request );
+	    break;
         case (MPIDI_RMA_GET):
             mpi_errno = MPIDI_CH3I_Recv_rma_msg(curr_ptr, win_ptr,
-                         source_win_handle, target_win_handle, &dtype_infos[i],
-                                                &dataloops[i], &requests[i]);
+                         source_win_handle, target_win_handle, 
+				&curr_ptr->dtype_info,
+                                &curr_ptr->dataloop, &curr_ptr->request);
 	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
             break;
         default:
 	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winInvalidOp");
         }
         i++;
-        curr_ptr = curr_ptr->next;
+	/* If the request is null, we can remove it immediately */
+	if (!curr_ptr->request) {
+	    if (curr_ptr->dataloop != NULL) {
+		MPIU_Free(curr_ptr->dataloop); /* allocated in send_rma_msg or 
+						  recv_rma_msg */
+	    }
+	    tmpptr       = curr_ptr->next;
+	    *prevNextPtr = tmpptr;
+	    MPIU_Free( curr_ptr );
+	    curr_ptr     = tmpptr;
+	}
+	else  {
+	    curr_ptr    = curr_ptr->next;
+	    prevNextPtr = &curr_ptr->next;
+	}
     }
+    MPIU_INSTR_DURATION_END(winunlock_issue);
     
     if (nops)
     {
 	MPID_Progress_state progress_state;
 	
 	done = 1;
+	MPIU_INSTR_DURATION_START(winunlock_complete);
 	MPID_Progress_start(&progress_state);
-	while (nops)
-	{
-	    for (i=0; i<nops; i++)
-	    {
-		if (requests[i] != NULL)
-		{
-		    if (!MPID_Request_is_complete(requests[i]))
-		    {
-			done = 0;
-			break;
-		    }
-		    else
-		    {
-			mpi_errno = requests[i]->status.MPI_ERROR;
+	while (win_ptr->rma_ops_list_head) {
+	    prevNextPtr = &win_ptr->rma_ops_list_head;
+	    curr_ptr    = win_ptr->rma_ops_list_head;
+	    do {
+		if (MPID_Request_is_complete(curr_ptr->request)) {
+		    /* Once we find a complete request, we complete
+		       as many as possible until we find an incomplete
+		       or null request */
+		    do {
+			mpi_errno = curr_ptr->request->status.MPI_ERROR;
 			/* --BEGIN ERROR HANDLING-- */
-			if (mpi_errno != MPI_SUCCESS)
-			{
+			if (mpi_errno != MPI_SUCCESS) {
 			    MPID_Progress_end(&progress_state);
 			    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winRMAmessage");
 			}
 			/* --END ERROR HANDLING-- */
-			/* if origin datatype was a derived
-			   datatype, it will get freed when the
-			   request gets freed. */ 
-			MPID_Request_release(requests[i]);
-			requests[i] = NULL;
+			MPID_Request_release(curr_ptr->request);
+			if (curr_ptr->dataloop != NULL) {
+			    MPIU_Free(curr_ptr->dataloop); /* allocated in send_rma_msg or 
+							      recv_rma_msg */
+			}
+			/* We can remove and free this rma op element */
+			tmpptr       = curr_ptr->next;
+			*prevNextPtr = tmpptr;
+			MPIU_Free( curr_ptr );
+			curr_ptr     = tmpptr;
 		    }
+		    while (curr_ptr &&
+			   MPID_Request_is_complete(curr_ptr->request));
+		    /* Once a request completes, we wait for another
+		       operation to arrive rather than check the
+		       rest of the requests.  */
+		    break;
 		}
+		else {
+		    prevNextPtr = &curr_ptr->next;
+		    curr_ptr    = curr_ptr->next;
+		    break;
+		}
+	    } while (curr_ptr);
+	    
+	    /* Wait for something to arrive*/
+	    /* In some tests, this hung unless the test ensured that 
+	       there was an incomplete request. */
+	    curr_ptr = win_ptr->rma_ops_list_head;
+	    if (curr_ptr && !MPID_Request_is_complete(curr_ptr->request) ) {
+		MPIU_INSTR_DURATION_START(winfence_block);
+		mpi_errno = MPID_Progress_wait(&progress_state);
+		/* --BEGIN ERROR HANDLING-- */
+		if (mpi_errno != MPI_SUCCESS) {
+		    MPID_Progress_end(&progress_state);
+		    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
+		}
+		/* --END ERROR HANDLING-- */
+		MPIU_INSTR_DURATION_END(winfence_block);
 	    }
-	
-	    if (done) 
-	    {
-		break;
-	    }
-	
-	    mpi_errno = MPID_Progress_wait(&progress_state);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (mpi_errno != MPI_SUCCESS) {
-		MPID_Progress_end(&progress_state);
-		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
-	    }
-	    /* --END ERROR HANDLING-- */
-	    done = 1;
-	}
+	} /* While list of rma operation is non-empty */
+	    
 	MPID_Progress_end(&progress_state);
     } 
     
-    for (i=0; i<nops; i++)
-    {
-        if (dataloops[i] != NULL)
-        {
-            MPIU_Free(dataloops[i]);
-        }
-    }
-    
-    /* free MPIDI_RMA_ops_list */
-    curr_ptr = win_ptr->rma_ops_list;
-    while (curr_ptr != NULL)
-    {
-        next_ptr = curr_ptr->next;
-        MPIU_Free(curr_ptr);
-        curr_ptr = next_ptr;
-    }
-    win_ptr->rma_ops_list = NULL;
 
+    MPIU_Assert( !win_ptr->rma_ops_list_head );
+
+    win_ptr->rma_ops_list_head = NULL;
+    win_ptr->rma_ops_list_tail = NULL;
+
  fn_exit:
-    MPIU_CHKLMEM_FREEALL();
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_DO_PASSIVE_TARGET_RMA);
     return mpi_errno;
     /* --BEGIN ERROR HANDLING-- */
@@ -1829,9 +2124,9 @@
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_PUT_OR_ACC);
 
-    lock_type = win_ptr->rma_ops_list->lock_type;
+    lock_type = win_ptr->rma_ops_list_head->lock_type;
 
-    rma_op = win_ptr->rma_ops_list->next;
+    rma_op = win_ptr->rma_ops_list_head->next;
 
     win_ptr->pt_rma_puts_accs[rma_op->target_rank]++;
 
@@ -1871,8 +2166,30 @@
         iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) lock_accum_unlock_pkt;
         iov[0].MPID_IOV_LEN = sizeof(*lock_accum_unlock_pkt);
     }
+    else if (rma_op->type == MPIDI_RMA_ACC_CONTIG) {
+        MPIDI_Pkt_init(lock_accum_unlock_pkt, MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK);
+        lock_accum_unlock_pkt->target_win_handle = 
+            win_ptr->all_win_handles[rma_op->target_rank];
+        lock_accum_unlock_pkt->source_win_handle = win_ptr->handle;
+        lock_accum_unlock_pkt->lock_type = lock_type;
 
-    MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
+        lock_accum_unlock_pkt->addr = 
+            (char *) win_ptr->base_addrs[rma_op->target_rank] +
+            win_ptr->disp_units[rma_op->target_rank] * rma_op->target_disp;
+        
+        lock_accum_unlock_pkt->count = rma_op->target_count;
+        lock_accum_unlock_pkt->datatype = rma_op->target_datatype;
+        lock_accum_unlock_pkt->op = rma_op->op;
+
+        iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST) lock_accum_unlock_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(*lock_accum_unlock_pkt);
+    }
+    else {
+	printf( "expected short accumulate...\n" );
+	/* */
+    }
+
+    comm_ptr = win_ptr->comm_ptr;
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(rma_op->origin_datatype, predefined);
@@ -1974,9 +2291,10 @@
     }
 
     /* free MPIDI_RMA_ops_list */
-    MPIU_Free(win_ptr->rma_ops_list->next);
-    MPIU_Free(win_ptr->rma_ops_list);
-    win_ptr->rma_ops_list = NULL;
+    MPIU_Free(win_ptr->rma_ops_list_head->next);
+    MPIU_Free(win_ptr->rma_ops_list_head);
+    win_ptr->rma_ops_list_head = NULL;
+    win_ptr->rma_ops_list_tail = NULL;
 
  fn_fail:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_PUT_OR_ACC);
@@ -2004,9 +2322,9 @@
 
     MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GET);
 
-    lock_type = win_ptr->rma_ops_list->lock_type;
+    lock_type = win_ptr->rma_ops_list_head->lock_type;
 
-    rma_op = win_ptr->rma_ops_list->next;
+    rma_op = win_ptr->rma_ops_list_head->next;
 
     /* create a request, store the origin buf, cnt, datatype in it,
        and pass a handle to it in the get packet. When the get
@@ -2048,7 +2366,7 @@
     lock_get_unlock_pkt->datatype = rma_op->target_datatype;
     lock_get_unlock_pkt->request_handle = rreq->handle;
 
-    MPID_Comm_get_ptr(win_ptr->comm, comm_ptr);
+    comm_ptr = win_ptr->comm_ptr;
     MPIDI_Comm_get_vc_set_active(comm_ptr, rma_op->target_rank, &vc);
 
     MPIU_THREAD_CS_ENTER(CH3COMM,vc);
@@ -2095,10 +2413,12 @@
     MPID_Request_release(rreq);
 
     /* free MPIDI_RMA_ops_list */
-    MPIU_Free(win_ptr->rma_ops_list->next);
-    MPIU_Free(win_ptr->rma_ops_list);
-    win_ptr->rma_ops_list = NULL;
+    MPIU_Free(win_ptr->rma_ops_list_head->next);
+    MPIU_Free(win_ptr->rma_ops_list_head);
+    win_ptr->rma_ops_list_head = NULL;
+    win_ptr->rma_ops_list_tail = NULL;
 
+
  fn_fail:
     MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GET);
     return mpi_errno;
@@ -2239,7 +2559,6 @@
         {
             mpi_errno = MPIDI_CH3_ReqHandler_PutAccumRespComplete(vc, req, &complete);
             if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-            
             if (complete)
             {
                 *rreqp = NULL;
@@ -2467,7 +2786,8 @@
     MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
     
     MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received accumulate pkt");
-    
+
+    MPIU_INSTR_DURATION_START(rmapkt_acc);
     data_len = *buflen - sizeof(MPIDI_CH3_Pkt_t);
     data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
     
@@ -2484,6 +2804,7 @@
     MPIDI_CH3I_DATATYPE_IS_PREDEFINED(accum_pkt->datatype, predefined);
     if (predefined)
     {
+	MPIU_INSTR_DURATION_START(rmapkt_acc_predef);
 	MPIDI_Request_set_type(req, MPIDI_REQUEST_TYPE_ACCUM_RESP);
 	req->dev.datatype = accum_pkt->datatype;
 
@@ -2530,9 +2851,11 @@
                 if (complete)
                 {
                     *rreqp = NULL;
+		    MPIU_INSTR_DURATION_END(rmapkt_acc_predef);
                     goto fn_exit;
                 }
             }
+	    MPIU_INSTR_DURATION_END(rmapkt_acc_predef);
 	}
     }
     else
@@ -2590,6 +2913,7 @@
     }
 
  fn_exit:
+    MPIU_INSTR_DURATION_END(rmapkt_acc);
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE);
     return mpi_errno;
  fn_fail:
@@ -2597,7 +2921,110 @@
 
 }
 
+/* Special accumulate for short data items entirely within the packet */
 #undef FUNCNAME
+#define FUNCNAME MPIDI_CH3_PktHandler_Accumulate_Immed
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPIDI_CH3_PktHandler_Accumulate_Immed( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
+					   MPIDI_msg_sz_t *buflen, 
+					   MPID_Request **rreqp )
+{
+    MPIDI_CH3_Pkt_accum_immed_t * accum_pkt = &pkt->accum_immed;
+    MPID_Win *win_ptr;
+    MPI_Aint extent;
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE_IMMED);
+    
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE_IMMED);
+
+    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"received accumulate immedidate pkt");
+
+    MPIU_INSTR_DURATION_START(rmapkt_acc_immed);
+
+    /* return the number of bytes processed in this function */
+    /* data_len == 0 (all within packet) */
+    *buflen = sizeof(MPIDI_CH3_Pkt_t);
+    *rreqp  = NULL;
+
+    MPID_Datatype_get_extent_macro(accum_pkt->datatype, extent); 
+    
+    /* size == 0 should never happen */
+    if (accum_pkt->count == 0 || extent == 0) {
+	;
+    }
+    else {
+	/* Data is already present */
+	if (accum_pkt->op == MPI_REPLACE) {
+	    /* no datatypes required */
+	    int len = accum_pkt->count * extent;
+	    /* FIXME: use immediate copy because this is short */
+	    MPIUI_Memcpy( accum_pkt->addr, accum_pkt->data, len );
+	}
+	else {
+	    if (HANDLE_GET_KIND(accum_pkt->op) == HANDLE_KIND_BUILTIN) {
+		MPI_User_function *uop;
+		/* get the function by indexing into the op table */
+		uop = MPIR_Op_table[((accum_pkt->op)&0xf) - 1];
+		(*uop)(accum_pkt->data, accum_pkt->addr,
+		       &(accum_pkt->count), &(accum_pkt->datatype));
+	    }
+	    else {
+		MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OP, "**opnotpredefined",
+				     "**opnotpredefined %d", accum_pkt->op );
+	    }
+	}
+
+	/* There are additional steps to take if this is a passive 
+	   target RMA or the last operation from the source */
+	
+	/* Here is the code executed in PutAccumRespComplete after the
+	   accumulation operation */
+	MPID_Win_get_ptr(accum_pkt->target_win_handle, win_ptr);
+	
+	/* if passive target RMA, increment counter */
+	if (win_ptr->current_lock_type != MPID_LOCK_NONE)
+	    win_ptr->my_pt_rma_puts_accs++;
+	
+	if (accum_pkt->source_win_handle != MPI_WIN_NULL) {
+	    /* Last RMA operation from source. If active
+	       target RMA, decrement window counter. If
+	       passive target RMA, release lock on window and
+	       grant next lock in the lock queue if there is
+	       any. If it's a shared lock or a lock-put-unlock
+	       type of optimization, we also need to send an
+	       ack to the source. */ 
+	    if (win_ptr->current_lock_type == MPID_LOCK_NONE) {
+		/* FIXME: MT: this has to be done atomically */
+		win_ptr->my_counter -= 1;
+		MPIDI_CH3_Progress_signal_completion();
+	    }
+	    else {
+		if ((win_ptr->current_lock_type == MPI_LOCK_SHARED) ||
+		    (/*rreq->dev.single_op_opt*/ 0 == 1)) {
+		    mpi_errno = MPIDI_CH3I_Send_pt_rma_done_pkt(vc, 
+					accum_pkt->source_win_handle);
+		    if (mpi_errno) {
+			    MPIU_ERR_POP(mpi_errno);
+		    }
+		}
+		mpi_errno = MPIDI_CH3I_Release_lock(win_ptr);
+	    }
+	}
+
+	goto fn_exit;
+    }
+
+ fn_exit:
+    MPIU_INSTR_DURATION_END(rmapkt_acc_immed);
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_PKTHANDLER_ACCUMULATE_IMMED);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+
+}
+
+#undef FUNCNAME
 #define FUNCNAME MPIDI_CH3_PktHandler_Lock
 #undef FCNAME
 #define FCNAME MPIDI_QUOTE(FUNCNAME)
@@ -2631,6 +3058,9 @@
 	
 	/* FIXME: MT: This may need to be done atomically. */
 	
+	/* FIXME: Since we need to add to the tail of the list,
+	   we should maintain a tail pointer rather than traversing the 
+	   list each time to find the tail. */
 	curr_ptr = (MPIDI_Win_lock_queue *) win_ptr->lock_queue;
 	prev_ptr = curr_ptr;
 	while (curr_ptr != NULL)
@@ -2639,7 +3069,9 @@
 	    curr_ptr = curr_ptr->next;
 	}
 	
+	MPIU_INSTR_DURATION_START(lockqueue_alloc);
 	new_ptr = (MPIDI_Win_lock_queue *) MPIU_Malloc(sizeof(MPIDI_Win_lock_queue));
+	MPIU_INSTR_DURATION_END(lockqueue_alloc);
 	if (!new_ptr) {
 	    MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %s",
 				 "MPIDI_Win_lock_queue");
@@ -2713,7 +3145,9 @@
 	/* queue the information */
 	MPIDI_Win_lock_queue *curr_ptr, *prev_ptr, *new_ptr;
 	
+	MPIU_INSTR_DURATION_START(lockqueue_alloc);
 	new_ptr = (MPIDI_Win_lock_queue *) MPIU_Malloc(sizeof(MPIDI_Win_lock_queue));
+	MPIU_INSTR_DURATION_END(lockqueue_alloc);
 	if (!new_ptr) {
 	    MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %s",
 				 "MPIDI_Win_lock_queue");
@@ -2874,7 +3308,7 @@
 	MPIDI_Win_lock_queue *curr_ptr, *prev_ptr, *new_ptr;
 	
 	/* FIXME: MT: This may need to be done atomically. */
-	
+
 	curr_ptr = (MPIDI_Win_lock_queue *) win_ptr->lock_queue;
 	prev_ptr = curr_ptr;
 	while (curr_ptr != NULL)
@@ -2883,7 +3317,9 @@
 	    curr_ptr = curr_ptr->next;
 	}
 	
+	MPIU_INSTR_DURATION_START(lockqueue_alloc);
 	new_ptr = (MPIDI_Win_lock_queue *) MPIU_Malloc(sizeof(MPIDI_Win_lock_queue));
+	MPIU_INSTR_DURATION_END(lockqueue_alloc);
 	if (!new_ptr) {
 	    MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %s",
 				 "MPIDI_Win_lock_queue");
@@ -2961,7 +3397,9 @@
     
     /* queue the information */
     
+    MPIU_INSTR_DURATION_START(lockqueue_alloc);
     new_ptr = (MPIDI_Win_lock_queue *) MPIU_Malloc(sizeof(MPIDI_Win_lock_queue));
+    MPIU_INSTR_DURATION_END(lockqueue_alloc);
     if (!new_ptr) {
 	MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %s",
 			     "MPIDI_Win_lock_queue");
@@ -3219,6 +3657,19 @@
     /*MPIU_DBG_PRINTF((" win_ptr ...... 0x%08X\n", pkt->accum.win_ptr));*/
     return MPI_SUCCESS;
 }
+int MPIDI_CH3_PktPrint_Accum_Immed( FILE *fp, MPIDI_CH3_Pkt_t *pkt )
+{
+    MPIU_DBG_PRINTF((" type ......... MPIDI_CH3_PKT_ACCUM_IMMED\n"));
+    MPIU_DBG_PRINTF((" addr ......... %p\n", pkt->accum_immed.addr));
+    MPIU_DBG_PRINTF((" count ........ %d\n", pkt->accum_immed.count));
+    MPIU_DBG_PRINTF((" datatype ..... 0x%08X\n", pkt->accum_immed.datatype));
+    MPIU_DBG_PRINTF((" op ........... 0x%08X\n", pkt->accum_immed.op));
+    MPIU_DBG_PRINTF((" target ....... 0x%08X\n", pkt->accum_immed.target_win_handle));
+    MPIU_DBG_PRINTF((" source ....... 0x%08X\n", pkt->accum_immed.source_win_handle));
+    /*MPIU_DBG_PRINTF((" win_ptr ...... 0x%08X\n", pkt->accum.win_ptr));*/
+    fflush(stdout);
+    return MPI_SUCCESS;
+}
 int MPIDI_CH3_PktPrint_Lock( FILE *fp, MPIDI_CH3_Pkt_t *pkt )
 {
     MPIU_DBG_PRINTF((" type ......... MPIDI_CH3_PKT_LOCK\n"));

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_abort.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_abort.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_abort.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -80,9 +80,9 @@
 	}
     }
     
-    MPIDU_Ftb_publish(MPIDU_FTB_EV_ABORT, error_msg);
+    MPIDU_Ftb_publish_me(MPIDU_FTB_EV_ABORT);
     MPIDU_Ftb_finalize();
-
+    
 #ifdef HAVE_DEBUGGER_SUPPORT
     MPIR_DebuggerSetAborting( error_msg );
 #endif

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_iprobe.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_iprobe.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_iprobe.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -6,6 +6,9 @@
 
 #include "mpidimpl.h"
 
+int (*MPIDI_Anysource_iprobe_fn)(int tag, MPID_Comm * comm, int context_offset, int *flag,
+                                 MPI_Status * status) = NULL;
+
 #undef FUNCNAME
 #define FUNCNAME MPID_Iprobe
 #undef FCNAME
@@ -29,6 +32,52 @@
 	goto fn_exit;
     }
 
+#ifdef ENABLE_COMM_OVERRIDES
+    if (MPIDI_Anysource_iprobe_fn) {
+        if (source == MPI_ANY_SOURCE) {
+            /* if it's anysource, check shm, then check the network.
+               If still not found, call progress, and check again. */
+
+            /* check shm*/
+            MPIU_THREAD_CS_ENTER(MSGQUEUE,);
+            found = MPIDI_CH3U_Recvq_FU(source, tag, context, status);
+            MPIU_THREAD_CS_EXIT(MSGQUEUE,);
+            if (!found) {
+                /* not found, check network */
+                mpi_errno = MPIDI_Anysource_iprobe_fn(tag, comm, context_offset, &found, status);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                if (!found) {
+                    /* still not found, make some progress*/
+                    mpi_errno = MPIDI_CH3_Progress_poke();
+                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                    /* check shm again */
+                    MPIU_THREAD_CS_ENTER(MSGQUEUE,);
+                    found = MPIDI_CH3U_Recvq_FU(source, tag, context, status);
+                    MPIU_THREAD_CS_EXIT(MSGQUEUE,);
+                    if (!found) {
+                        /* check network again */
+                        mpi_errno = MPIDI_Anysource_iprobe_fn(tag, comm, context_offset, &found, status);
+                        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                    }
+                }
+            }
+            *flag = found;
+            goto fn_exit;
+        } else {
+            /* it's not anysource, check if the netmod has overridden it */
+            MPIDI_VC_t * vc;
+            MPIDI_Comm_get_vc_set_active(comm, source, &vc);
+            if (vc->comm_ops && vc->comm_ops->probe) {
+                mpi_errno = vc->comm_ops->iprobe(vc, source, tag, comm, context_offset, &found, status);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                *flag = found;
+                goto fn_exit;
+            }
+            /* fall-through to shm case */
+        }
+    }
+#endif
+    
     /* FIXME: The routine CH3U_Recvq_FU is used only by the probe functions;
        it should atomically return the flag and status rather than create 
        a request.  Note that in some cases it will be possible to 
@@ -55,4 +104,6 @@
  fn_exit:    
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_IPROBE);
     return mpi_errno;
+ fn_fail:
+    goto fn_exit;
 }

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_probe.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_probe.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/src/mpid_probe.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -26,6 +26,55 @@
 	goto fn_exit;
     }
 
+#ifdef ENABLE_COMM_OVERRIDES
+    if (MPIDI_Anysource_iprobe_fn) {
+        if (source == MPI_ANY_SOURCE) {
+            /* if it's anysource, loop while checking the shm recv
+               queue and iprobing the netmod, then do a progress
+               test to make some progress. */
+            do {
+                int found;
+                
+                MPIU_THREAD_CS_ENTER(MSGQUEUE,);
+                found = MPIDI_CH3U_Recvq_FU(source, tag, context, status);
+                MPIU_THREAD_CS_EXIT(MSGQUEUE,);
+                if (found) break;
+
+                mpi_errno = MPIDI_Anysource_iprobe_fn(tag, comm, context_offset, &found, status);
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                if (found) break;
+
+                MPIU_THREAD_CS_YIELD(ALLFUNC,);
+                
+                mpi_errno = MPIDI_CH3_Progress_test();
+                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            } while (1);
+            goto fn_exit;
+        } else {
+            /* it's not anysource, see if this is for the netmod */
+            MPIDI_VC_t * vc;
+            MPIDI_Comm_get_vc_set_active(comm, source, &vc);
+            
+            if (vc->comm_ops && vc->comm_ops->iprobe) {
+                /* netmod has overridden iprobe */
+                do {
+                    int found;
+                    
+                    mpi_errno = vc->comm_ops->iprobe(vc, source, tag, comm, context_offset, &found,
+                                                     status);
+                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                    if (found) break;
+                    
+                    MPIU_THREAD_CS_YIELD(ALLFUNC,);
+                    
+                    mpi_errno = MPIDI_CH3_Progress_test();
+                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+                } while (1);
+            }
+            /* fall-through to shm case */
+        }
+    }
+#endif
     MPIDI_CH3_Progress_start(&progress_state);
     do
     {
@@ -44,4 +93,6 @@
  fn_exit:
     MPIDI_FUNC_EXIT(MPID_STATE_MPID_PROBE);
     return mpi_errno;
+ fn_fail:
+    goto fn_exit;
 }

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/util/ftb/ftb.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/util/ftb/ftb.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/ch3/util/ftb/ftb.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -93,6 +93,36 @@
     return;
 }
 
+/* convenience function for publishing events associated with a particular vc */
+#undef FUNCNAME
+#define FUNCNAME MPIDU_Ftb_publish_vc
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+void MPIDU_Ftb_publish_vc(const char *event_name, struct MPIDI_VC *vc)
+{
+    char payload[FTB_MAX_PAYLOAD_DATA] = "";
+
+    if (vc && vc->pg)  /* pg can be null for temp VCs (dynamic processes) */
+        MPIU_Snprintf(payload, sizeof(payload), "[id: {%s:{%d}}]", (char*)vc->pg->id, vc->pg_rank);
+    MPIDU_Ftb_publish(event_name, payload);
+    return;
+}
+
+/* convenience function for publishing events associated with this process */
+#undef FUNCNAME
+#define FUNCNAME MPIDU_Ftb_publish_me
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+void MPIDU_Ftb_publish_me(const char *event_name)
+{
+    char payload[FTB_MAX_PAYLOAD_DATA] = "";
+
+    MPIU_Snprintf(payload, sizeof(payload), "[id: {%s:{%d}}]", (char *)MPIDI_Process.my_pg->id, MPIDI_Process.my_pg_rank);
+    MPIDU_Ftb_publish(event_name, payload);
+    return;
+}
+
+
 /* MPIDU_Ftb_finalize has no return code for the same reasons that
    MPIDU_Ftb_publish doesn't. */
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/mpid/dcmfd/src/pt2pt/mpidi_irecv.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/mpid/dcmfd/src/pt2pt/mpidi_irecv.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/mpid/dcmfd/src/pt2pt/mpidi_irecv.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -33,11 +33,12 @@
   if (rreq == NULL)
     {
       *request = rreq;
+      /* MPI_ERR_NO_MEM is specific to MPI_ALLOC_MEM */
       mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                        MPIR_ERR_FATAL,
                                        func,
                                        __LINE__,
-                                       MPI_ERR_NO_MEM,
+                                       MPI_ERR_OTHER,
                                        "**nomem",
                                        0);
       return mpi_errno;


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/mpl/src/mplstr.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt2/src/mpl/src/string/mplstr.c:5182,5196,5198
/mpich2/branches/dev/ftb/src/mpl/src/mplstr.c:5661-5730
/mpich2/branches/dev/lapi/src/mpl/src/mplstr.c:5817
/mpich2/branches/release/mpich2-1.1.1/src/mpl/src/string/mplstr.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpl/src/string/mplstr.c:5406
   + /mpich2/branches/dev/ckpt2/src/mpl/src/string/mplstr.c:5182,5196,5198
/mpich2/branches/dev/ftb/src/mpl/src/mplstr.c:5661-5730
/mpich2/branches/dev/lapi/src/mpl/src/mplstr.c:5817
/mpich2/branches/release/mpich2-1.1.1/src/mpl/src/string/mplstr.c:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/mpl/src/string/mplstr.c:5406
/mpich2/trunk/src/mpl/src/mplstr.c:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra:5406
/mpich2/trunk/src/pm/hydra:7355-7359*,7366-7367*,7371-7402*,7406-7409*,7411-7416*


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/Makefile.am
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/Makefile.am:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/Makefile.am:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/Makefile.am:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/Makefile.am:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/Makefile.am:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/Makefile.am:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/Makefile.am:5406
/mpich2/trunk/src/pm/hydra/Makefile.am:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/README
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/README:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/README:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/README:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/README:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/README:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/README:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/README:5406
/mpich2/trunk/src/pm/hydra/README:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/autogen.sh
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/autogen.sh:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/autogen.sh:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/autogen.sh:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/autogen.sh:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/autogen.sh:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/autogen.sh:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/autogen.sh:5406
/mpich2/trunk/src/pm/hydra/autogen.sh:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/configure.in
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/configure.in	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/configure.in	2010-11-06 15:02:44 UTC (rev 7418)
@@ -604,6 +604,24 @@
 
 
 #########################################################################
+# FTB integration
+#########################################################################
+# FTB requires pthreads support; add pthreads to LIBS before trying to
+# find FTB
+PAC_PUSH_FLAG([LIBS])
+PAC_PREPEND_FLAG([-lpthread],[LIBS])
+PAC_SET_HEADER_LIB_PATH(ftb)
+PAC_POP_FLAG([LIBS])
+PAC_CHECK_HEADER_LIB(ftb, libftb.h, ftb, FTB_Connect, have_ftb=yes, have_ftb=no)
+if test "$have_ftb" = "yes" ; then
+   # FIXME: We need to test to make sure FTB works correctly
+   PAC_APPEND_FLAG([-lpthread],[LIBS])
+   AC_DEFINE(ENABLE_FTB,1,[Define if FTB is enabled])
+fi
+AM_CONDITIONAL([hydra_have_ftb], [test "${have_ftb}" = "yes"])
+
+
+#########################################################################
 # Signals and friends
 #########################################################################
 AC_CHECK_FUNCS(strsignal)


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/configure.in
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/configure.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/configure.in:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/configure.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/configure.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/configure.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/configure.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/configure.in:5406
/mpich2/trunk/src/pm/hydra/configure.in:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/examples
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/examples:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/examples:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/examples:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/examples:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/examples:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/examples:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/examples:5406
/mpich2/trunk/src/pm/hydra/examples:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/hydra-doxygen.cfg.in
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/hydra-doxygen.cfg.in:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/hydra-doxygen.cfg.in:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/hydra-doxygen.cfg.in:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/hydra-doxygen.cfg.in:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/hydra-doxygen.cfg.in:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/hydra-doxygen.cfg.in:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/hydra-doxygen.cfg.in:5406
/mpich2/trunk/src/pm/hydra/hydra-doxygen.cfg.in:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/include
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/include:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/include:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/include:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/include:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/include:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/include:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/include:5406
/mpich2/trunk/src/pm/hydra/include:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/mpich2prereq
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/mpich2prereq:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/mpich2prereq:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/mpich2prereq:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/mpich2prereq:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/mpich2prereq:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/mpich2prereq:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/mpich2prereq:5406
/mpich2/trunk/src/pm/hydra/mpich2prereq:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/pm:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/pm:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/pm:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/pm:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/pm:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/pm:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/pm:5406
/mpich2/trunk/src/pm/hydra/pm:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -9,6 +9,7 @@
 #include "pmip.h"
 #include "demux.h"
 #include "bind.h"
+#include "hydt_ftb.h"
 
 struct HYD_pmcd_pmip HYD_pmcd_pmip;
 
@@ -190,6 +191,9 @@
     status = HYDT_dmx_init(&HYD_pmcd_pmip.user_global.demux);
     HYDU_ERR_POP(status, "unable to initialize the demux engine\n");
 
+    status = HYDT_ftb_init();
+    HYDU_ERR_POP(status, "unable to initialize FTB\n");
+
     /* See if HYDRA_CONTROL_FD is set before trying to connect upstream */
     ret = MPL_env2int("HYDRA_CONTROL_FD", &HYD_pmcd_pmip.upstream.control);
     if (ret < 0) {
@@ -282,6 +286,9 @@
     status = HYDT_dmx_finalize();
     HYDU_ERR_POP(status, "error returned from demux finalize\n");
 
+    status = HYDT_ftb_finalize();
+    HYDU_ERR_POP(status, "unable to initialize FTB\n");
+
     /* cleanup the params structure */
     cleanup_params();
 

Modified: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_cb.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_cb.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_cb.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -11,6 +11,7 @@
 #include "ckpoint.h"
 #include "demux.h"
 #include "bind.h"
+#include "hydt_ftb.h"
 
 struct HYD_pmcd_pmip HYD_pmcd_pmip;
 struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_handle = { 0 };
@@ -257,6 +258,7 @@
     struct HYD_pmcd_pmi_hdr hdr;
     enum HYD_pmcd_pmi_cmd cmd;
     struct HYD_pmcd_pmip_pmi_handle *h;
+    char ftb_event_payload[HYDT_FTB_MAX_PAYLOAD_DATA];
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -280,6 +282,12 @@
          * applications, this is harder to identify, so we just let
          * the user cleanup the processes on a failure. */
         if (using_pmi_port || HYD_pmcd_pmip.downstream.pmi_fd_active[i]) {
+            MPL_snprintf(ftb_event_payload, HYDT_FTB_MAX_PAYLOAD_DATA,
+                         "pgid:%d rank:%d",
+                         HYD_pmcd_pmip.local.pgid, HYD_pmcd_pmip.downstream.pmi_rank[i]);
+            status = HYDT_ftb_publish("FTB_MPI_PROCS_DEAD", ftb_event_payload);
+            HYDU_ERR_POP(status, "FTB publish failed\n");
+
             if (HYD_pmcd_pmip.user_global.auto_cleanup) {
                 HYD_pmcd_pmip_killjob();
 
@@ -456,6 +464,7 @@
     int *pmi_ranks;
     int sent, closed, pmi_fds[2] = { HYD_FD_UNSET, HYD_FD_UNSET };
     struct HYDT_bind_cpuset_t cpuset;
+    char ftb_event_payload[HYDT_FTB_MAX_PAYLOAD_DATA];
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -526,6 +535,10 @@
         HYDU_ERR_POP(status, "unable to create env\n");
 
         /* Restart the proxy.  Specify stdin fd only if pmi_rank 0 is in this proxy. */
+        MPL_snprintf(ftb_event_payload, HYDT_FTB_MAX_PAYLOAD_DATA, "pgid:%d ranks:%d-%d",
+                     HYD_pmcd_pmip.local.pgid, HYD_pmcd_pmip.downstream.pmi_rank[0],
+                     HYD_pmcd_pmip.downstream.pmi_rank
+                     [HYD_pmcd_pmip.local.proxy_process_count-1]);
         status = HYDT_ckpoint_restart(HYD_pmcd_pmip.local.pgid, HYD_pmcd_pmip.local.id,
                                       env, HYD_pmcd_pmip.local.proxy_process_count,
                                       pmi_ranks,
@@ -535,7 +548,11 @@
                                       HYD_pmcd_pmip.downstream.out,
                                       HYD_pmcd_pmip.downstream.err,
                                       HYD_pmcd_pmip.downstream.pid);
-        HYDU_ERR_POP(status, "checkpoint restart failure\n");
+        if (status)
+            status = HYDT_ftb_publish("FTB_MPI_PROCS_RESTART_FAIL", ftb_event_payload);
+        else
+            status = HYDT_ftb_publish("FTB_MPI_PROCS_RESTARTED", ftb_event_payload);
+        HYDU_ERR_POP(status, "checkpoint restart FTB publishing failure\n");
         goto fn_spawn_complete;
     }
 
@@ -873,6 +890,7 @@
 {
     int cmd_len, closed;
     enum HYD_pmcd_pmi_cmd cmd;
+    char ftb_event_payload[HYDT_FTB_MAX_PAYLOAD_DATA];
     HYD_status status = HYD_SUCCESS;
 
     HYDU_FUNC_ENTER();
@@ -899,7 +917,18 @@
     }
     else if (cmd == CKPOINT) {
         HYD_pmcd_pmi_proxy_dump(status, STDOUT_FILENO, "requesting checkpoint\n");
+
+        MPL_snprintf(ftb_event_payload, HYDT_FTB_MAX_PAYLOAD_DATA, "pgid:%d ranks:%d-%d",
+                     HYD_pmcd_pmip.local.pgid, HYD_pmcd_pmip.downstream.pmi_rank[0],
+                     HYD_pmcd_pmip.downstream.pmi_rank
+                     [HYD_pmcd_pmip.local.proxy_process_count-1]);
         status = HYDT_ckpoint_suspend(HYD_pmcd_pmip.local.pgid, HYD_pmcd_pmip.local.id);
+        if (status)
+            status = HYDT_ftb_publish("FTB_MPI_PROCS_CKPT_FAIL", ftb_event_payload);
+        else
+            status = HYDT_ftb_publish("FTB_MPI_PROCS_CKPTED", ftb_event_payload);
+        HYDU_ERR_POP(status, "FTB publishing failure\n");
+
         HYDU_ERR_POP(status, "checkpoint suspend failed\n");
         HYD_pmcd_pmi_proxy_dump(status, STDOUT_FILENO, "checkpoint completed\n");
     }

Modified: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmip_pmi_v1.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -8,6 +8,7 @@
 #include "pmip.h"
 #include "bsci.h"
 #include "demux.h"
+#include "hydt_ftb.h"
 
 static HYD_status send_cmd_upstream(const char *start, int fd, char *args[])
 {

Modified: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmiserv_utils.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmiserv_utils.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/pm/pmiserv/pmiserv_utils.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -149,12 +149,13 @@
     node_id = 0;
     for (block = blocklist_head; block; block = block->next) {
         tmp[i++] = HYDU_strdup("(");
-        tmp[i++] = HYDU_int_to_str(node_id++);
+        tmp[i++] = HYDU_int_to_str(node_id);
         tmp[i++] = HYDU_strdup(",");
         tmp[i++] = HYDU_int_to_str(block->num_blocks);
         tmp[i++] = HYDU_strdup(",");
         tmp[i++] = HYDU_int_to_str(block->block_size);
         tmp[i++] = HYDU_strdup(")");
+        node_id += (block->num_blocks * block->block_size);
         if (block->next)
             tmp[i++] = HYDU_strdup(",");
         HYDU_STRLIST_CONSOLIDATE(tmp, i, status);


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/tools:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/tools:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/tools:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/tools:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/tools:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/tools:5406
/mpich2/trunk/src/pm/hydra/tools:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/Makefile.mk
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/Makefile.mk	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/Makefile.mk	2010-11-06 15:02:44 UTC (rev 7418)
@@ -13,3 +13,4 @@
 include tools/rmk/Makefile.mk
 include tools/debugger/Makefile.mk
 include tools/nameserver/Makefile.mk
+include tools/ftb/Makefile.mk


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5817
   + /mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/slurm/slurm_query_proxy_id.c:5817
/mpich2/trunk/src/pm/hydra/tools/bootstrap/external/slurm_query_proxy_id.c:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5817
   + /mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:5817
/mpich2/trunk/src/pm/hydra/tools/bootstrap/src/bsci_query_proxy_id.c:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5817
   + /mpich2/branches/dev/ftb/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:5817
/mpich2/trunk/src/pm/hydra/tools/bootstrap/utils/bscu_query_proxy_id.c:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Deleted: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/Makefile.mk
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/ftb/Makefile.mk	2010-10-29 00:22:32 UTC (rev 7396)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/Makefile.mk	2010-11-06 15:02:44 UTC (rev 7418)
@@ -1,11 +0,0 @@
-# -*- Mode: Makefile; -*-
-#
-# (C) 2008 by Argonne National Laboratory.
-#     See COPYRIGHT in top-level directory.
-#
-
-AM_CPPFLAGS += -I$(top_srcdir)/tools/ftb
-
-if hydra_have_ftb
-libhydra_la_SOURCES += $(top_srcdir)/tools/ftb/hydt_ftb.c
-endif

Copied: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/Makefile.mk (from rev 7396, mpich2/trunk/src/pm/hydra/tools/ftb/Makefile.mk)
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/Makefile.mk	                        (rev 0)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/Makefile.mk	2010-11-06 15:02:44 UTC (rev 7418)
@@ -0,0 +1,11 @@
+# -*- Mode: Makefile; -*-
+#
+# (C) 2008 by Argonne National Laboratory.
+#     See COPYRIGHT in top-level directory.
+#
+
+AM_CPPFLAGS += -I$(top_srcdir)/tools/ftb
+
+if hydra_have_ftb
+libhydra_la_SOURCES += $(top_srcdir)/tools/ftb/hydt_ftb.c
+endif

Deleted: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.c
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/ftb/hydt_ftb.c	2010-10-29 00:22:32 UTC (rev 7396)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -1,83 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2008 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "hydra_utils.h"
-#include "hydt_ftb.h"
-
-static FTB_client_handle_t ch;
-
-static FTB_event_info_t event_info[] = {
-    { "FTB_MPI_CKPOINTED",          "info" },
-    { "FTB_MPI_CKPOINT_FAILED",     "error" }
-};
-
-HYD_status HYDT_ftb_init(void)
-{
-    int ret;
-    FTB_client_t ci;
-    HYD_status status = HYD_SUCCESS;
-
-    MPL_strncpy(ci.event_space, "ftb.mpi.hydra", sizeof(ci.event_space));
-    MPL_strncpy(ci.client_name, "hydra " HYDRA_VERSION, sizeof(ci.client_name));
-    MPL_strncpy(ci.client_subscription_style, "FTB_SUBSCRIPTION_NONE",
-                sizeof(ci.client_subscription_style));
-    ci.client_polling_queue_len = -1;
-
-    ret = FTB_Connect(&ci, &ch);
-    if (ret)
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb connect\n");
-
-    ret = FTB_Declare_publishable_events(ch, NULL, event_info,
-                                         sizeof(event_info) / sizeof(event_info[0]));
-    if (ret)
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb declare publishable\n");
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}
-
-HYD_status HYDT_ftb_finalize(void)
-{
-    int ret;
-    HYD_status status = HYD_SUCCESS;
-
-    ret = FTB_Disconnect(ch);
-    if (ret)
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb disconnect\n");
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}
-
-HYD_status HYDT_ftb_publish(const char *event_name, const char *event_payload)
-{
-    FTB_event_properties_t event_prop;
-    FTB_event_handle_t event_handle;
-    int ret;
-    HYD_status status = HYD_SUCCESS;
-
-    event_prop.event_type = 1;
-    MPL_strncpy(event_prop.event_payload, event_payload, sizeof(event_prop.event_payload));
-
-    ret = FTB_Publish(ch, event_name, &event_prop, &event_handle);
-    if (ret)
-        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb publish\n");
-
-  fn_exit:
-    HYDU_FUNC_EXIT();
-    return status;
-
-  fn_fail:
-    goto fn_exit;
-}

Copied: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.c (from rev 7396, mpich2/trunk/src/pm/hydra/tools/ftb/hydt_ftb.c)
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.c	                        (rev 0)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -0,0 +1,86 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2008 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "hydra_utils.h"
+#include "hydt_ftb.h"
+
+static FTB_client_handle_t ch;
+
+static FTB_event_info_t event_info[] = {
+    { "FTB_MPI_PROCS_RESTARTED",     "info" },
+    { "FTB_MPI_PROCS_RESTART_FAIL",  "error" },
+    { "FTB_MPI_PROCS_CKPTED",        "info" },
+    { "FTB_MPI_PROCS_CKPT_FAILED",   "error" },
+    { "FTB_MPI_PROCS_DEAD",          "error" }
+};
+
+HYD_status HYDT_ftb_init(void)
+{
+    int ret;
+    FTB_client_t ci;
+    HYD_status status = HYD_SUCCESS;
+
+    MPL_strncpy(ci.event_space, "ftb.mpi.hydra", sizeof(ci.event_space));
+    MPL_strncpy(ci.client_name, "hydra " HYDRA_VERSION, sizeof(ci.client_name));
+    MPL_strncpy(ci.client_subscription_style, "FTB_SUBSCRIPTION_NONE",
+                sizeof(ci.client_subscription_style));
+    ci.client_polling_queue_len = -1;
+
+    ret = FTB_Connect(&ci, &ch);
+    if (ret)
+        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb connect\n");
+
+    ret = FTB_Declare_publishable_events(ch, NULL, event_info,
+                                         sizeof(event_info) / sizeof(event_info[0]));
+    if (ret)
+        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb declare publishable\n");
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+HYD_status HYDT_ftb_finalize(void)
+{
+    int ret;
+    HYD_status status = HYD_SUCCESS;
+
+    ret = FTB_Disconnect(ch);
+    if (ret)
+        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb disconnect\n");
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}
+
+HYD_status HYDT_ftb_publish(const char *event_name, const char *event_payload)
+{
+    FTB_event_properties_t event_prop;
+    FTB_event_handle_t event_handle;
+    int ret;
+    HYD_status status = HYD_SUCCESS;
+
+    event_prop.event_type = 1;
+    MPL_strncpy(event_prop.event_payload, event_payload, sizeof(event_prop.event_payload));
+
+    ret = FTB_Publish(ch, event_name, &event_prop, &event_handle);
+    if (ret)
+        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "ftb publish\n");
+
+  fn_exit:
+    HYDU_FUNC_EXIT();
+    return status;
+
+  fn_fail:
+    goto fn_exit;
+}

Deleted: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.h
===================================================================
--- mpich2/trunk/src/pm/hydra/tools/ftb/hydt_ftb.h	2010-10-29 00:22:32 UTC (rev 7396)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -1,40 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2008 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef HYDT_FTB_H_INCLUDED
-#define HYDT_FTB_H_INCLUDED
-
-#if defined ENABLE_FTB
-#include <libftb.h>
-
-#define HYDT_FTB_MAX_PAYLOAD_DATA FTB_MAX_PAYLOAD_DATA
-
-HYD_status HYDT_ftb_init(void);
-HYD_status HYDT_ftb_finalize(void);
-HYD_status HYDT_ftb_publish(const char *event_name, const char *event_payload);
-
-#else
-
-#define HYDT_FTB_MAX_PAYLOAD_DATA (1024)
-
-static HYD_status HYDT_ftb_init(void)
-{
-    return HYD_SUCCESS;
-}
-
-static HYD_status HYDT_ftb_finalize(void)
-{
-    return HYD_SUCCESS;
-}
-
-static HYD_status HYDT_ftb_publish(const char *event_name, const char *event_payload)
-{
-    return HYD_SUCCESS;
-}
-
-#endif /* ENABLE_FTB */
-
-#endif /* HYDT_FTB_H_INCLUDED */

Copied: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.h (from rev 7396, mpich2/trunk/src/pm/hydra/tools/ftb/hydt_ftb.h)
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.h	                        (rev 0)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/tools/ftb/hydt_ftb.h	2010-11-06 15:02:44 UTC (rev 7418)
@@ -0,0 +1,40 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2008 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef HYDT_FTB_H_INCLUDED
+#define HYDT_FTB_H_INCLUDED
+
+#if defined ENABLE_FTB
+#include <libftb.h>
+
+#define HYDT_FTB_MAX_PAYLOAD_DATA FTB_MAX_PAYLOAD_DATA
+
+HYD_status HYDT_ftb_init(void);
+HYD_status HYDT_ftb_finalize(void);
+HYD_status HYDT_ftb_publish(const char *event_name, const char *event_payload);
+
+#else
+
+#define HYDT_FTB_MAX_PAYLOAD_DATA (1024)
+
+static HYD_status HYDT_ftb_init(void)
+{
+    return HYD_SUCCESS;
+}
+
+static HYD_status HYDT_ftb_finalize(void)
+{
+    return HYD_SUCCESS;
+}
+
+static HYD_status HYDT_ftb_publish(const char *event_name, const char *event_payload)
+{
+    return HYD_SUCCESS;
+}
+
+#endif /* ENABLE_FTB */
+
+#endif /* HYDT_FTB_H_INCLUDED */


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/ui
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/ui:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/ui:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/ui:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/ui:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/ui:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/ui:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/ui:5406
/mpich2/trunk/src/pm/hydra/ui:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/ui/mpich/mpiexec.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/ui/mpich/mpiexec.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/ui/mpich/mpiexec.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -10,6 +10,7 @@
 #include "rmki.h"
 #include "pmci.h"
 #include "bsci.h"
+#include "hydt_ftb.h"
 #include "demux.h"
 #include "uiu.h"
 
@@ -192,6 +193,9 @@
                             HYD_handle.user_global.enablex, HYD_handle.user_global.debug);
     HYDU_ERR_POP(status, "unable to initialize the bootstrap server\n");
 
+    status = HYDT_ftb_init();
+    HYDU_ERR_POP(status, "unable to initialize FTB\n");
+
     if (HYD_handle.node_list == NULL) {
         /* Node list is not created yet. The user might not have
          * provided the host file. Query the RMK. */
@@ -327,6 +331,9 @@
     status = HYD_pmci_finalize();
     HYDU_ERR_POP(status, "process manager error on finalize\n");
 
+    status = HYDT_ftb_finalize();
+    HYDU_ERR_POP(status, "error finalizing FTB\n");
+
 #if defined ENABLE_PROFILING
     if (HYD_handle.enable_profiling) {
         HYDU_dump_noprefix(stdout, "\n");


Property changes on: mpich2/branches/release/mpich2-1.3.x/src/pm/hydra/utils
___________________________________________________________________
Added: svn:mergeinfo
   + /mpich2/branches/dev/ckpt/src/pm/hydra/utils:5050
/mpich2/branches/dev/ckpt2/src/pm/hydra/utils:5057-6537
/mpich2/branches/dev/ftb/src/pm/hydra/utils:5661-5730
/mpich2/branches/dev/lapi/src/pm/hydra/utils:5817
/mpich2/branches/dev/wintcp_async_progress/src/pm/hydra/utils:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/src/pm/hydra/utils:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/src/pm/hydra/utils:5406
/mpich2/trunk/src/pm/hydra/utils:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416

Modified: mpich2/branches/release/mpich2-1.3.x/src/util/createshlib.in
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/util/createshlib.in	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/util/createshlib.in	2010-11-06 15:02:44 UTC (rev 7418)
@@ -95,12 +95,12 @@
        nextarg=$arg
        ;;
        -echo) set -x ;;
-       -dryrun) Show=echo ;;
+       -dryrun|--dryrun) Show=echo ;;
        # -cc and -clink allow us to override the commands used to 
        # build the shared library
-       -cc=*) CC_SHL=$option ;;
-       -clink=*) C_LINK_SHL=$option ;;
-       -libtype=*) libtype=$option ;;
+       -cc=*|--cc=*) CC_SHL=$option ;;
+       -clink=*|--clink=*) C_LINK_SHL=$option ;;
+       -libtype=*|--libtype=*) libtype=$option ;;
        -l*|-L*)
        dependentLibs="$dependentLibs $arg"
        ;;

Modified: mpich2/branches/release/mpich2-1.3.x/src/util/instrm/Makefile.sm
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/util/instrm/Makefile.sm	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/util/instrm/Makefile.sm	2010-11-06 15:02:44 UTC (rev 7418)
@@ -1,2 +1,2 @@
-lib${MPILIBNAME}_a_SOURCES = states.c
+lib${MPILIBNAME}_a_SOURCES = states.c instr.c
 INCLUDES = -I../../include -I${top_srcdir}/src/include

Copied: mpich2/branches/release/mpich2-1.3.x/src/util/instrm/instr.c (from rev 7416, mpich2/trunk/src/util/instrm/instr.c)
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/util/instrm/instr.c	                        (rev 0)
+++ mpich2/branches/release/mpich2-1.3.x/src/util/instrm/instr.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -0,0 +1,132 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "mpiimpl.h"
+
+#ifdef USE_MPIU_INSTR
+
+static int MPIU_INSTR_Printf( FILE *fp );
+static int MPIU_INSTR_Finalize( void *p );
+
+/* */
+/*
+ * Basic but general support for instrumentation hooks in MPICH2
+ *
+ * Most actions are handled by MPIU_INSTR_xxx macros (to permit both lowest
+ * overhead and to allow instrumentation to be selected at compile time.
+ */
+static struct MPIU_INSTR_Generic_t *instrHead = 0, *instrTail = 0;
+
+int MPIU_INSTR_AddHandle( void *handlePtr )
+{
+    struct MPIU_INSTR_Generic_t *gPtr = 
+	(struct MPIU_INSTR_Generic_t *)handlePtr;
+
+    /* Note that Addhandle must be within a thread-safe initialization */
+    if (!instrHead) {
+	/* Make sure that this call back occurs early (before MPID_Finalize) */
+	MPIR_Add_finalize( MPIU_INSTR_Finalize, stdout,
+			   MPIR_FINALIZE_CALLBACK_PRIO + 2 );
+    }
+
+    if (instrHead) {
+	instrTail->next = gPtr;
+    }
+    else {
+	instrHead       = gPtr;
+    }
+    instrTail       = gPtr;
+}
+
+#define MAX_INSTR_BUF 1024
+static int MPIU_INSTR_Printf( FILE *fp )
+{
+    struct MPIU_INSTR_Generic_t *gPtr = instrHead;
+    char instrBuf[MAX_INSTR_BUF];
+    
+    while (gPtr) {
+	/* We only output information on events that occured */
+	if (gPtr->count) {
+	    if (gPtr->toStr) {
+		(*gPtr->toStr)( instrBuf, sizeof(instrBuf), gPtr );
+	    }
+	    else {
+		if (gPtr->desc) {
+		    MPIU_Strncpy( instrBuf, gPtr->desc, sizeof(instrBuf) );
+		}
+		else {
+		    /* This should not happen */
+		    MPIU_Strncpy( instrBuf, "", sizeof(instrBuf) );
+		}
+	    }
+	    fputs( instrBuf, fp );
+	    fputc( '\n', fp );
+	}
+	gPtr = gPtr->next;
+    }
+    fflush( fp );
+    return 0;
+}
+
+static int MPIU_INSTR_Finalize( void *p )
+{
+    int rc;
+    struct MPIU_INSTR_Generic_t *gPtr = instrHead;
+    /* FIXME: This should at least issue the writes in process order */
+    /* Allow whether output is generated to be controlled */
+    if (!MPL_env2bool( "MPICH_INSTR_AT_FINALIZE", &rc )) 
+	rc = 0;
+
+    if (rc) {
+	MPIU_INSTR_Printf( stdout );
+    }
+
+    /* Free any memory allocated for the descriptions */
+    while (gPtr) {
+	if (gPtr->desc) {
+	    MPIU_Free( (char *)gPtr->desc );
+	    gPtr->desc = 0;
+	}
+	gPtr = gPtr->next;
+    }
+    
+    return 0;
+}
+
+/*
+ * Standard print routines for the instrumentation objects
+ */
+
+/* 
+ * Print a duration, which may have extra integer fields.  Those fields
+ * are printed as integers, in order, separate by tabs
+ */
+int MPIU_INSTR_ToStr_Duration_Count( char *buf, size_t maxBuf, void *ptr )
+{
+    double ttime;
+    struct MPIU_INSTR_Duration_count_t *dPtr = 
+	(struct MPIU_INSTR_Duration_count_t *)ptr;
+    MPID_Wtime_todouble( &dPtr->ttime, &ttime );
+    snprintf( buf, maxBuf, "%-40s:\t%d\t%e", dPtr->desc, dPtr->count, ttime );
+    if (dPtr->nitems) {
+	char *p;
+	int  len = strlen(buf);
+	int  i;
+	/* Add each integer value, separated by a tab. */
+	maxBuf -= len;
+	p       = buf + len;
+	for (i=0; i<dPtr->nitems; i++) {
+	    snprintf( p, maxBuf, "\t%d", dPtr->data[i] );
+	    len     = strlen(p);
+	    maxBuf -= len;
+	    p      += len;
+	}
+    }
+    return 0;
+}
+#else
+/* No routines required if instrumentation is not selected */
+#endif

Modified: mpich2/branches/release/mpich2-1.3.x/src/util/param/params.yml
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/src/util/param/params.yml	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/src/util/param/params.yml	2010-11-06 15:02:44 UTC (rev 7418)
@@ -60,6 +60,115 @@
         algorithms. Setting it to 0 causes all irecvs/isends to be
         posted at once.
 
+    - category    : collective
+      name        : REDSCAT_COMMUTATIVE_LONG_MSG_SIZE
+      type        : int
+      default     : 524288
+      description : >-
+        the long message algorithm will be used if the operation is commutative
+        and the send buffer size is >= this value (in bytes)
+
+    - category    : collective
+      name        : BCAST_MIN_PROCS
+      type        : int
+      default     : 8
+      description : >-
+        the minimum number of processes in a communicator to use a non-binomial
+        broadcast algorithm
+
+    - category    : collective
+      name        : BCAST_SHORT_MSG_SIZE
+      type        : int
+      default     : 12288
+      description : >-
+        the short message algorithm will be used if the send buffer size is <
+        this value (in bytes)
+
+    - category    : collective
+      name        : BCAST_LONG_MSG_SIZE
+      type        : int
+      default     : 524288
+      description : >-
+        the long message algorithm will be used if the send buffer size is >=
+        this value (in bytes)
+
+    - category    : collective
+      name        : ALLGATHER_SHORT_MSG_SIZE
+      type        : int
+      default     : 81920
+      description : >-
+        For MPI_Allgather and MPI_Allgatherv, the short message algorithm will
+        be used if the send buffer size is < this value (in bytes).
+
+    - category    : collective
+      name        : ALLGATHER_LONG_MSG_SIZE
+      type        : int
+      default     : 524288
+      description : >-
+        For MPI_Allgather and MPI_Allgatherv, the long message algorithm will be
+        used if the send buffer size is >= this value (in bytes)
+
+    - category    : collective
+      name        : REDUCE_SHORT_MSG_SIZE
+      type        : int
+      default     : 2048
+      description : >-
+        the short message algorithm will be used if the send buffer size is <=
+        this value (in bytes)
+
+    - category    : collective
+      name        : ALLREDUCE_SHORT_MSG_SIZE
+      type        : int
+      default     : 2048
+      description : >-
+        the short message algorithm will be used if the send buffer size is <=
+        this value (in bytes)
+
+    - category    : collective
+      name        : GATHER_VSMALL_MSG_SIZE
+      type        : int
+      default     : 1024
+      description : >-
+        use a temporary buffer for intracommunicator MPI_Gather if the send
+        buffer size is < this value (in bytes)
+
+    - category    : collective
+      name        : GATHER_INTER_SHORT_MSG_SIZE
+      type        : int
+      default     : 2048
+      description : >-
+        use the short message algorithm for intercommunicator MPI_Gather if the
+        send buffer size is < this value (in bytes)
+
+    - category    : collective
+      name        : GATHERV_INTER_SSEND_MIN_PROCS
+      # backwards compatibility
+      abs-alt-env :
+                    - MPICH2_GATHERV_MIN_PROCS
+      type        : int
+      default     : 32
+      description : >-
+        Use Ssend (synchronous send) for intercommunicator MPI_Gatherv if the
+        "group B" size is >= this value.  Specifying "-1" always avoids using
+        Ssend.  For backwards compatibility, specifying "0" uses the default
+        value.
+
+    - category    : collective
+      name        : SCATTER_INTER_SHORT_MSG_SIZE
+      type        : int
+      default     : 2048
+      description : >-
+        use the short message algorithm for intercommunicator MPI_Scatter if the
+        send buffer size is < this value (in bytes)
+
+    - category    : collective
+      name        : ALLGATHERV_PIPELINE_MSG_SIZE
+      type        : int
+      default     : 32768
+      description : >-
+        The smallest message size that will be used for the pipelined, large-message,
+        ring algorithm in the MPI_Allgatherv implementation.
+
     ##############################################################
     # intranode communication parameters
     - category    : intranode

Modified: mpich2/branches/release/mpich2-1.3.x/test/mpi/errors/spawn/testlist
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/test/mpi/errors/spawn/testlist	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/test/mpi/errors/spawn/testlist	2010-11-06 15:02:44 UTC (rev 7418)
@@ -1,3 +1,3 @@
-badport 1
+badport 2
 unpub 1
 lookup_name 1

Modified: mpich2/branches/release/mpich2-1.3.x/test/mpi/group/Makefile.sm
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/test/mpi/group/Makefile.sm	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/test/mpi/group/Makefile.sm	2010-11-06 15:02:44 UTC (rev 7418)
@@ -13,6 +13,8 @@
 gtranks_LDADD = ../util/mtest.o
 groupnullincl_SOURCES = groupnullincl.c
 groupnullincl_LDADD = ../util/mtest.o
+gtranksperf_SOURCES = gtranksperf.c
+gtranksperf_LDADD = ../util/mtest.o
 
 ../util/mtest.o:
 	(cd ../util && $(MAKE) mtest.o)

Modified: mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranks.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranks.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranks.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -83,6 +83,9 @@
 	errs++;
     }
 
+    MPI_Group_free(&gself);
+    MPI_Group_free(&gworld);
+
     MTest_Finalize( errs );
     MPI_Finalize();
 

Copied: mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranksperf.c (from rev 7358, mpich2/trunk/test/mpi/group/gtranksperf.c)
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranksperf.c	                        (rev 0)
+++ mpich2/branches/release/mpich2-1.3.x/test/mpi/group/gtranksperf.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -0,0 +1,138 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2010 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "mpi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpitest.h"
+
+#include <math.h> /* for fabs(3) */
+#ifdef HAVE_UNISTD_H
+#include <unistd.h> /* for sleep(3) */
+#endif
+
+/* Measure and compare the relative performance of MPI_Group_translate_ranks
+ * with small and large group2 sizes but a constant number of ranks.  This
+ * serves as a performance sanity check for the Scalasca use case where we
+ * translate to MPI_COMM_WORLD ranks.  The performance should only depend on the
+ * number of ranks passed, not the size of either group (especially group2).
+ *
+ * This test is probably only meaningful for large-ish process counts, so we may
+ * not be able to run this test by default in the nightlies. */
+
+/* number of iterations used for timing */
+#define NUM_LOOPS (1000000)
+
+int main( int argc, char *argv[] )
+{
+    int errs = 0;
+    int *ranks;
+    int *ranksout;
+    MPI_Group gworld, grev, gself;
+    MPI_Comm  comm;
+    MPI_Comm  commrev;
+    int rank, size, i;
+    double start, end, time1, time2;
+
+    MTest_Init( &argc, &argv );
+
+    comm = MPI_COMM_WORLD;
+
+    MPI_Comm_size( comm, &size );
+    MPI_Comm_rank( comm, &rank );
+
+    ranks    = malloc(size*sizeof(int));
+    ranksout = malloc(size*sizeof(int));
+    if (!ranks || !ranksout) {
+        fprintf(stderr, "out of memory\n");
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+
+    /* generate a comm with the rank order reversed */
+    MPI_Comm_split(comm, 0, (size-rank-1), &commrev);
+    MPI_Comm_group(commrev, &grev);
+    MPI_Comm_group(MPI_COMM_SELF, &gself);
+    MPI_Comm_group(comm, &gworld);
+
+    /* sanity check correctness first */
+    for (i=0; i < size; i++) {
+        ranks[i] = i;
+        ranksout[i] = -1;
+    }
+    MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
+    for (i=0; i < size; i++) {
+        if (ranksout[i] != (size-i-1)) {
+            if (rank == 0)
+                printf("%d: (gworld) expected ranksout[%d]=%d, got %d\n", rank, i, (size-rank-1), ranksout[i]);
+            ++errs;
+        }
+    }
+    MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
+    for (i=0; i < size; i++) {
+        int expected = (i == (size-rank-1) ? 0 : MPI_UNDEFINED);
+        if (ranksout[i] != expected) {
+            if (rank == 0)
+                printf("%d: (gself) expected ranksout[%d]=%d, got %d\n", rank, i, expected, ranksout[i]);
+            ++errs;
+        }
+    }
+
+    /* now compare relative performance */
+
+    /* we needs lots of procs to get a group large enough to have meaningful
+     * numbers.  On most testing machines this means that we're oversubscribing
+     * cores in a big way, which might perturb the timing results.  So we make
+     * sure everyone started up and then everyone but rank 0 goes to sleep to
+     * let rank 0 do all the timings. */
+    MPI_Barrier(comm);
+
+    if (rank != 0) {
+        sleep(10);
+    }
+    else /* rank==0 */ {
+        sleep(1); /* try to avoid timing while everyone else is making syscalls */
+
+        MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout); /*throwaway iter*/
+        start = MPI_Wtime();
+        for (i = 0; i < NUM_LOOPS; ++i) {
+            MPI_Group_translate_ranks(grev, size, ranks, gworld, ranksout);
+        }
+        end = MPI_Wtime();
+        time1 = end - start;
+
+        MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout); /*throwaway iter*/
+        start = MPI_Wtime();
+        for (i = 0; i < NUM_LOOPS; ++i) {
+            MPI_Group_translate_ranks(grev, size, ranks, gself, ranksout);
+        }
+        end = MPI_Wtime();
+        time2 = end - start;
+
+        /* complain if the "gworld" time exceeds 2x the "gself" time */
+        if (fabs(time1 - time2) > (2.00 * time2)) {
+            printf("too much difference in MPI_Group_translate_ranks performance:\n");
+            printf("time1=%f time2=%f\n", time1, time2);
+            printf("(fabs(time1-time2)/time2)=%f\n", (fabs(time1-time2)/time2));
+            if (time1 < time2) {
+                printf("also, (time1<time2) is surprising...\n");
+            }
+            ++errs;
+        }
+    }
+
+    free(ranks);
+    free(ranksout);
+
+    MPI_Group_free(&grev);
+    MPI_Group_free(&gself);
+    MPI_Group_free(&gworld);
+
+    MPI_Comm_free(&commrev);
+
+    MTest_Finalize(errs);
+    MPI_Finalize();
+
+    return 0;
+}

Modified: mpich2/branches/release/mpich2-1.3.x/test/mpi/group/testlist
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/test/mpi/group/testlist	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/test/mpi/group/testlist	2010-11-06 15:02:44 UTC (rev 7418)
@@ -2,3 +2,9 @@
 grouptest 8
 grouptest2 4
 groupnullincl 4
+
+gtranks 4
+
+# this may be too many processes for some systems, but the test needs a large-ish
+# number of processes to yield an effective performance check
+gtranksperf 20

Modified: mpich2/branches/release/mpich2-1.3.x/test/mpi/perf/manyrma.c
===================================================================
--- mpich2/branches/release/mpich2-1.3.x/test/mpi/perf/manyrma.c	2010-11-06 14:41:25 UTC (rev 7417)
+++ mpich2/branches/release/mpich2-1.3.x/test/mpi/perf/manyrma.c	2010-11-06 15:02:44 UTC (rev 7418)
@@ -1,31 +1,100 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2010 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
 /* This test measures the performance of many rma operations to a single 
    target process.
-   It uses a number of operations (put, get, or accumulate) to different
-   locations in the target window */
+   It uses a number of operations (put or accumulate) to different
+   locations in the target window 
+   This is one of the ways that RMA may be used, and is used in the 
+   reference implementation of the graph500 benchmark.
+*/
 #include "mpi.h"
 #include <stdio.h>
 #include <stdlib.h>
 
 #define MAX_COUNT 65536
+#define MAX_RMA_SIZE 16
 #define MAX_RUNS 10
 
+typedef enum { SYNC_NONE=0, 
+	       SYNC_ALL=-1, SYNC_FENCE=1, SYNC_LOCK=2, SYNC_PSCW=4 } sync_t;
+typedef enum { RMA_NONE=0, RMA_ALL=-1, RMA_PUT=1, RMA_ACC=2, RMA_GET=4 } rma_t;
+/* Note GET not yet implemented */
+sync_t syncChoice = SYNC_ALL;
+rma_t rmaChoice = RMA_ALL;
+
+typedef struct {
+    double startOp, endOp, endSync;
+} timing;
+
 static int verbose = 1;
 
+void PrintResults( int cnt, timing t[] );
+void RunAccFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
+void RunAccLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
+void RunPutFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
+void RunPutLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] );
+void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz, 
+		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] );
+void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz, 
+		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] );
+
 int main( int argc, char *argv[] )
 {
-    int arraysize, i, k, cnt, maxCount, *arraybuffer;
+    int arraysize, i, k, cnt, sz, maxCount, *arraybuffer;
     int one = 1;
-    int wrank, wsize, destRank;
+    int wrank, wsize, destRank, srcRank;
     MPI_Win win;
+    MPI_Group wgroup, accessGroup, exposureGroup;
     double t1[MAX_RUNS], t2[MAX_RUNS], t3[MAX_RUNS];
+    timing t[MAX_RUNS];
 
     MPI_Init( &argc, &argv );
+
+    for (i=1; i<argc; i++) {
+	if (strcmp( argv[i], "-put" ) == 0) {
+	    if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
+	    rmaChoice  |= RMA_PUT;
+	}
+	else if (strcmp( argv[i], "-acc" ) == 0) {
+	    if (rmaChoice == RMA_ALL) rmaChoice = RMA_NONE;
+	    rmaChoice  |= RMA_ACC;
+	}
+	else if (strcmp( argv[i], "-fence" ) == 0) {
+	    if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
+	    syncChoice |= SYNC_FENCE;
+	}
+	else if (strcmp( argv[i], "-lock" ) == 0) {
+	    if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
+	    syncChoice |= SYNC_LOCK;
+	}
+	else if (strcmp( argv[i], "-pscw" ) == 0) {
+	    if (syncChoice == SYNC_ALL) syncChoice = SYNC_NONE;
+	    syncChoice |= SYNC_PSCW;
+	}
+	else {
+	    fprintf( stderr, "Unrecognized argument %s\n", argv[i] );
+	    MPI_Abort( MPI_COMM_WORLD, 1 );
+	}
+    }
     
     MPI_Comm_rank( MPI_COMM_WORLD, &wrank );
     MPI_Comm_size( MPI_COMM_WORLD, &wsize );
     destRank = wrank + 1;
     while (destRank >= wsize) destRank = destRank - wsize;
-    arraysize = MAX_COUNT;
+    srcRank = wrank - 1;
+    if (srcRank < 0) srcRank += wsize;
+
+    /* Create groups for PSCW */
+    MPI_Comm_group( MPI_COMM_WORLD, &wgroup );
+    MPI_Group_incl( wgroup, 1, &destRank, &accessGroup );
+    MPI_Group_incl( wgroup, 1, &srcRank, &exposureGroup );
+    MPI_Group_free( &wgroup );
+
+    arraysize = MAX_RMA_SIZE * MAX_COUNT;
     arraybuffer = (int*)malloc( arraysize * sizeof(int) );
     if (!arraybuffer) {
 	fprintf( stderr, "Unable to allocate %d words\n", arraysize );
@@ -35,44 +104,263 @@
     MPI_Win_create( arraybuffer, arraysize*sizeof(int), (int)sizeof(int),
 		    MPI_INFO_NULL, MPI_COMM_WORLD, &win );
 
+    /* FIXME: we need a test on performance consistency.
+       The test needs to have both a relative growth limit and
+       an absolute limit.
+    */
+
     maxCount = MAX_COUNT;
-    maxCount = 33000;
-    cnt = 1;
-    while (cnt <= maxCount) {
-	for (k=0; k<MAX_RUNS; k++) {
-	    MPI_Win_fence( 0, win );
-	    MPI_Barrier( MPI_COMM_WORLD );
-	    t1[k] = MPI_Wtime();
-	    for (i=0; i<cnt; i++) {
-		MPI_Accumulate( &one, 1, MPI_INT, destRank, 
-				i, 1, MPI_INT, MPI_SUM, win );
+
+    if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
+	for (sz=1; sz<=MAX_RMA_SIZE; sz = sz + sz) {
+	    if (wrank == 0) 
+		printf( "Accumulate with fence, %d elements\n", sz );
+	    cnt = 1;
+	    while (cnt <= maxCount) {
+		RunAccFence( win, destRank, cnt, sz, t );
+		if (wrank == 0) {
+		    PrintResults( cnt, t );
+		}
+		cnt = 2 * cnt;
 	    }
-	    t2[k] = MPI_Wtime();
-	    MPI_Win_fence( 0, win );
-	    t3[k] = MPI_Wtime();
 	}
-	if (wrank == 0) {
-	    double d1=0, d2=0;
-	    for (k=0; k<MAX_RUNS; k++) {
-		d1 += t2[k] - t1[k];
-		d2 += t3[k] - t2[k];
+    }
+
+    if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
+	for (sz=1; sz<=MAX_RMA_SIZE; sz = sz + sz) {
+	    if (wrank == 0) 
+		printf( "Accumulate with lock, %d elements\n", sz );
+	    cnt = 1;
+	    while (cnt <= maxCount) {
+		RunAccLock( win, destRank, cnt, sz, t );
+		if (wrank == 0) {
+		    PrintResults( cnt, t );
+		}
+		cnt = 2 * cnt;
 	    }
-	    if (verbose) {
-		printf( "%d\t%e\t%e\t%e\t%e\n", cnt, 
-			d1 / MAX_RUNS, d2 / MAX_RUNS, 
-			d1 / (MAX_RUNS * cnt), d2 / (MAX_RUNS * cnt) );
+	}
+    }
+
+    if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
+	for (sz=1; sz<=MAX_RMA_SIZE; sz = sz + sz) {
+	    if (wrank == 0) 
+		printf( "Put with fence, %d elements\n", sz );
+	    cnt = 1;
+	    while (cnt <= maxCount) {
+		RunPutFence( win, destRank, cnt, sz, t );
+		if (wrank == 0) {
+		    PrintResults( cnt, t );
+		}
+		cnt = 2 * cnt;
 	    }
-	    /* FIXME: we need a test on performance consistency.
-	       The test needs to have both a relative growth limit and
-	       an absolute limit.
-	     */
 	}
-	
-	cnt = 2 * cnt;
     }
 
+    if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
+	for (sz=1; sz<=MAX_RMA_SIZE; sz = sz + sz) {
+	    if (wrank == 0) 
+		printf( "Put with lock, %d elements\n", sz );
+	    cnt = 1;
+	    while (cnt <= maxCount) {
+		RunPutLock( win, destRank, cnt, sz, t );
+		if (wrank == 0) {
+		    PrintResults( cnt, t );
+		}
+		cnt = 2 * cnt;
+	    }
+	}
+    }
+
+    if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
+	for (sz=1; sz<=MAX_RMA_SIZE; sz = sz + sz) {
+	    if (wrank == 0) 
+		printf( "Put with pscw, %d elements\n", sz );
+	    cnt = 1;
+	    while (cnt <= maxCount) {
+		RunPutPSCW( win, destRank, cnt, sz, 
+			    exposureGroup, accessGroup, t );
+		if (wrank == 0) {
+		    PrintResults( cnt, t );
+		}
+		cnt = 2 * cnt;
+	    }
+	}
+    }
+
+    if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
+	for (sz=1; sz<=MAX_RMA_SIZE; sz = sz + sz) {
+	    if (wrank == 0) 
+		printf( "Accumulate with pscw, %d elements\n", sz );
+	    cnt = 1;
+	    while (cnt <= maxCount) {
+		RunAccPSCW( win, destRank, cnt, sz, 
+			    exposureGroup, accessGroup, t );
+		if (wrank == 0) {
+		    PrintResults( cnt, t );
+		}
+		cnt = 2 * cnt;
+	    }
+	}
+    }
+
     MPI_Win_free( &win );
+
+    MPI_Group_free( &accessGroup );
+    MPI_Group_free( &exposureGroup );
     
     MPI_Finalize();
     return 0;
 }
+
+
+void RunAccFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+{
+    int k, i, j, one = 1;
+
+    for (k=0; k<MAX_RUNS; k++) {
+	MPI_Win_fence( 0, win );
+	MPI_Barrier( MPI_COMM_WORLD );
+	j = 0;
+	t[k].startOp = MPI_Wtime();
+	for (i=0; i<cnt; i++) {
+	    MPI_Accumulate( &one, sz, MPI_INT, destRank, 
+			    j, sz, MPI_INT, MPI_SUM, win );
+	    j += sz;
+	}
+	t[k].endOp = MPI_Wtime();
+	MPI_Win_fence( 0, win );
+	t[k].endSync = MPI_Wtime();
+    }
+}
+
+void RunAccLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+{
+    int k, i, j, one = 1;
+
+    for (k=0; k<MAX_RUNS; k++) {
+	MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
+	MPI_Barrier( MPI_COMM_WORLD );
+	j = 0;
+	t[k].startOp = MPI_Wtime();
+	for (i=0; i<cnt; i++) {
+	    MPI_Accumulate( &one, sz, MPI_INT, destRank, 
+			    j, sz, MPI_INT, MPI_SUM, win );
+	    j += sz;
+	}
+	t[k].endOp = MPI_Wtime();
+	MPI_Win_unlock( destRank, win );
+	t[k].endSync = MPI_Wtime();
+    }
+}
+
+void RunPutFence( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+{
+    int k, i, j, one = 1;
+
+    for (k=0; k<MAX_RUNS; k++) {
+	MPI_Win_fence( 0, win );
+	MPI_Barrier( MPI_COMM_WORLD );
+	j = 0;
+	t[k].startOp = MPI_Wtime();
+	for (i=0; i<cnt; i++) {
+	    MPI_Put( &one, sz, MPI_INT, destRank, 
+			    j, sz, MPI_INT, win );
+	    j += sz;
+	}
+	t[k].endOp = MPI_Wtime();
+	MPI_Win_fence( 0, win );
+	t[k].endSync = MPI_Wtime();
+    }
+}
+
+void RunPutLock( MPI_Win win, int destRank, int cnt, int sz, timing t[] )
+{
+    int k, i, j, one = 1;
+
+    for (k=0; k<MAX_RUNS; k++) {
+	MPI_Win_lock( MPI_LOCK_SHARED, destRank, 0, win );
+	MPI_Barrier( MPI_COMM_WORLD );
+	j = 0;
+	t[k].startOp = MPI_Wtime();
+	for (i=0; i<cnt; i++) {
+	    MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
+	    j += sz;
+	}
+	t[k].endOp = MPI_Wtime();
+	MPI_Win_unlock( destRank, win );
+	t[k].endSync = MPI_Wtime();
+    }
+}
+
+void RunPutPSCW( MPI_Win win, int destRank, int cnt, int sz, 
+		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] )
+{
+    int k, i, j, one = 1;
+
+    for (k=0; k<MAX_RUNS; k++) {
+	MPI_Win_post( exposureGroup, 0, win );
+	MPI_Win_start( accessGroup, 0, win );
+	MPI_Barrier( MPI_COMM_WORLD );
+	j = 0;
+	t[k].startOp = MPI_Wtime();
+	for (i=0; i<cnt; i++) {
+	    MPI_Put( &one, sz, MPI_INT, destRank, j, sz, MPI_INT, win );
+	    j += sz;
+	}
+	t[k].endOp = MPI_Wtime();
+	MPI_Win_complete( win );
+	MPI_Win_wait( win );
+	t[k].endSync = MPI_Wtime();
+    }
+}
+
+void RunAccPSCW( MPI_Win win, int destRank, int cnt, int sz, 
+		 MPI_Group exposureGroup, MPI_Group accessGroup, timing t[] )
+{
+    int k, i, j, one = 1;
+
+    for (k=0; k<MAX_RUNS; k++) {
+	MPI_Win_post( exposureGroup, 0, win );
+	MPI_Win_start( accessGroup, 0, win );
+	MPI_Barrier( MPI_COMM_WORLD );
+	j = 0;
+	t[k].startOp = MPI_Wtime();
+	for (i=0; i<cnt; i++) {
+	    MPI_Accumulate( &one, sz, MPI_INT, destRank, 
+			    j, sz, MPI_INT, MPI_SUM, win );
+	    j += sz;
+	}
+	t[k].endOp = MPI_Wtime();
+	MPI_Win_complete( win );
+	MPI_Win_wait( win );
+	t[k].endSync = MPI_Wtime();
+    }
+}
+
+void PrintResults( int cnt, timing t[] )
+{
+    int k;
+    double d1=0, d2=0;
+    double minD1 = 1e10, minD2 = 1e10;
+    double tOp, tSync;
+    for (k=0; k<MAX_RUNS; k++) {
+	tOp   = t[k].endOp - t[k].startOp;
+	tSync = t[k].endSync - t[k].endOp;
+	d1    += tOp;
+	d2    += tSync;
+	if (tOp < minD1)   minD1 = tOp;
+	if (tSync < minD2) minD2 = tSync;
+    }
+    if (verbose) {
+	long rate = 0;
+	/* Use the minimum times because they are more stable - if timing
+	   accuracy is an issue, use the min over multiple trials */
+	d1 = minD1;
+	d2 = minD2;
+	/* d1 = d1 / MAX_RUNS; d2 = d2 / MAX_RUNS); */
+	if (d2 > 0) rate = (long)(cnt) / d2;
+	printf( "%d\t%e\t%e\t%e\t%e\t%ld\n", cnt, 
+		d1, d2, 
+		d1 / cnt, d2 / cnt, rate );
+    }
+}


Property changes on: mpich2/branches/release/mpich2-1.3.x/winconfigure.wsf
___________________________________________________________________
Modified: svn:mergeinfo
   - /mpich2/branches/dev/ckpt/winconfigure.wsf:5050
/mpich2/branches/dev/ckpt2/winconfigure.wsf:5057-6537
/mpich2/branches/dev/ftb/winconfigure.wsf:5661-5730
/mpich2/branches/dev/lapi/winconfigure.wsf:5817
/mpich2/branches/dev/win_rrvm/winconfigure.wsf:6404,6407-6408,6420,6422-6423
/mpich2/branches/dev/wintcp_async_progress/winconfigure.wsf:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/winconfigure.wsf:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/winconfigure.wsf:5406
   + /mpich2/branches/dev/ckpt/winconfigure.wsf:5050
/mpich2/branches/dev/ckpt2/winconfigure.wsf:5057-6537
/mpich2/branches/dev/ftb/winconfigure.wsf:5661-5730
/mpich2/branches/dev/lapi/winconfigure.wsf:5817
/mpich2/branches/dev/win_rrvm/winconfigure.wsf:6404,6407-6408,6420,6422-6423
/mpich2/branches/dev/wintcp_async_progress/winconfigure.wsf:5008-5009,5123,5555-5559,5561-5564,5566-5567,5570,5577-5581,5613-5616,5619
/mpich2/branches/release/mpich2-1.1.1/winconfigure.wsf:5022,5032,5110,5113,5140-5141
/mpich2/branches/release/mpich2-1.2/winconfigure.wsf:5406
/mpich2/trunk/winconfigure.wsf:7355-7359,7366-7367,7371-7402,7406-7409,7411-7416



More information about the mpich2-commits mailing list