[mpich2-commits] r4037 - mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module

jayesh at mcs.anl.gov jayesh at mcs.anl.gov
Thu Mar 12 15:33:40 CDT 2009


Author: jayesh
Date: 2009-03-12 15:33:40 -0500 (Thu, 12 Mar 2009)
New Revision: 4037

Added:
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_finalize.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_impl.h
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_init.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_lmt.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_poll.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_queue.h
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_send.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_utility.c
Removed:
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_finalize.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_impl.h
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_init.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_lmt.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_poll.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_queue.h
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_send.c
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_utility.c
Modified:
   mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/socksm.c
Log:
This change breaks MPICH2 windows compiln - renaming files to follow convention in other channels - remove 'module' in the file names

Modified: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/socksm.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/socksm.c	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/socksm.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -6,7 +6,7 @@
 
 #define SOCKSM_H_DEFGLOBALS_
 
-#include "wintcp_module_impl.h"
+#include "wintcp_impl.h"
 #include "socksm.h"
 
 /* FIXME trace/log all the state transitions */

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_finalize.c (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_finalize.c)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_finalize.c	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_finalize.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,54 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "wintcp_impl.h"
+
+extern sockconn_t MPID_nem_newtcp_module_g_lstn_sc;
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_finalize
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_finalize()
+{
+    int mpi_errno = MPI_SUCCESS;
+    
+    /* FIXME: Why don't we have a finalize for sm - MPID_nem_newtcp_module_finalize_sm() - ? */
+    /* FIXME: Shouldn't the order of finalize() be the reverse order of init() ? 
+     * i.e., *finalize_sm(); *poll_finalize(); *send_finalize();
+     */
+    mpi_errno = MPID_nem_newtcp_module_send_finalize();
+    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+
+    mpi_errno = MPID_nem_newtcp_module_poll_finalize();
+    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+    
+    mpi_errno =  MPID_nem_newtcp_module_sm_finalize();
+    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+     
+    if(MPIU_SOCKW_Sockfd_is_valid(MPID_nem_newtcp_module_g_lstn_sc.fd))
+    {
+        MPIU_OSW_RETRYON_INTR((mpi_errno != MPI_SUCCESS), (mpi_errno = MPIU_SOCKW_Sock_close(MPID_nem_newtcp_module_g_lstn_sc.fd)));
+        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+    }
+    mpi_errno = MPIU_SOCKW_Finalize();
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+        
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_ckpt_shutdown
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_ckpt_shutdown()
+{
+    return MPID_nem_newtcp_module_finalize();
+}
+

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_impl.h (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_impl.h)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_impl.h	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_impl.h	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,163 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef NEWTCP_MODULE_IMPL_H
+#define NEWTCP_MODULE_IMPL_H
+
+#include "mpid_nem_impl.h"
+/* #include "newtcp_module.h" */
+#ifdef HAVE_SYS_TYPES_H
+    #include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_SOCKET_H
+    #include <sys/socket.h>
+#endif
+#ifdef HAVE_ERRNO_H
+    #include <errno.h>
+#endif
+#ifdef HAVE_NETINET_IN_H
+    #include <netinet/in.h>
+#endif
+#ifdef HAVE_NETINET_TCP_H
+    #include <netinet/tcp.h>
+#endif
+#ifdef HAVE_NETDB_H
+    #include <netdb.h>
+#endif
+#include "socksm.h"
+
+/* globals */
+extern MPID_nem_queue_ptr_t MPID_nem_newtcp_module_free_queue;
+extern MPID_nem_queue_ptr_t MPID_nem_process_recv_queue;
+extern MPID_nem_queue_ptr_t MPID_nem_process_free_queue;
+extern int MPID_nem_newtcp_module_listen_fd;
+
+#define MPID_NEM_NEWTCP_MODULE_VC_STATE_DISCONNECTED 0
+#define MPID_NEM_NEWTCP_MODULE_VC_STATE_CONNECTED 1
+
+extern char *MPID_nem_newtcp_module_recv_buf;
+#define MPID_NEM_NEWTCP_MODULE_RECV_MAX_PKT_LEN 1024
+#define MPID_NEM_NEWTCP_MODULE_RCVBUF_SZ    (128*1024)
+#define MPID_NEM_NEWTCP_MODULE_SNDBUF_SZ    (128*1024)
+
+/* The vc provides a generic buffer in which network modules can store
+   private fields This removes all dependencies from the VC struction
+   on the network module, facilitating dynamic module loading. */
+typedef struct 
+{
+    struct sockaddr_in sock_id;
+    struct MPID_nem_new_tcp_module_sockconn *sc;
+    struct
+    {
+        struct MPID_Request *head;
+        struct MPID_Request *tail;
+    } send_queue;
+    /* this is a count of how many sc objects refer to this vc */
+    int sc_ref_count;
+} MPID_nem_newtcp_module_vc_area;
+
+/* accessor macro to private fields in VC */
+#define VC_FIELD(vc, field) (((MPID_nem_newtcp_module_vc_area *)((MPIDI_CH3I_VC *)(vc)->channel_private)->netmod_area.padding)->field)
+
+#define ASSIGN_SC_TO_VC(vc_, sc_) do {      \
+        VC_FIELD((vc_), sc) = (sc_);        \
+    } while (0)
+
+/* functions */
+int MPID_nem_newtcp_module_init (MPID_nem_queue_ptr_t proc_recv_queue, 
+                                 MPID_nem_queue_ptr_t proc_free_queue, 
+                                 MPID_nem_cell_ptr_t proc_elements,   int num_proc_elements,
+                                 MPID_nem_cell_ptr_t module_elements, int num_module_elements, 
+                                 MPID_nem_queue_ptr_t *module_free_queue, int ckpt_restart,
+                                 MPIDI_PG_t *pg_p, int pg_rank,
+                                 char **bc_val_p, int *val_max_sz_p);
+int MPID_nem_newtcp_module_finalize (void);
+int MPID_nem_newtcp_module_ckpt_shutdown (void);
+int MPID_nem_newtcp_module_poll (MPID_nem_poll_dir_t in_or_out);
+int MPID_nem_newtcp_module_send (MPIDI_VC_t *vc, MPID_nem_cell_ptr_t cell, int datalen);
+int MPID_nem_newtcp_module_get_business_card (int my_rank, char **bc_val_p, int *val_max_sz_p);
+int MPID_nem_newtcp_module_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc);
+int MPID_nem_newtcp_module_vc_init (MPIDI_VC_t *vc);
+int MPID_nem_newtcp_module_vc_destroy(MPIDI_VC_t *vc);
+int MPID_nem_newtcp_module_vc_terminate (MPIDI_VC_t *vc);
+
+/* completion counter is atomically decremented when operation completes */
+int MPID_nem_newtcp_module_get (void *target_p, void *source_p, int source_node, int len, int *completion_ctr);
+int MPID_nem_newtcp_module_put (void *target_p, int target_node, void *source_p, int len, int *completion_ctr);
+
+int MPID_nem_newtcp_module_send_init (void);
+int MPID_nem_newtcp_module_send_queued (MPIDI_VC_t *vc);
+int MPID_nem_newtcp_module_poll_init (void);
+int MPID_nem_newtcp_module_connect (struct MPIDI_VC *const vc);
+int MPID_nem_newtcp_module_conn_wr_enable (struct MPIDI_VC *const vc);
+int MPID_nem_newtcp_module_conn_wr_disable (struct MPIDI_VC *const vc);
+int MPID_nem_newtcp_module_connpoll (void);
+int MPID_nem_newtcp_module_sm_init (void);
+int MPID_nem_newtcp_module_sm_finalize (void);
+int MPID_nem_newtcp_module_set_sockopts (int fd);
+MPID_NEM_NEWTCP_MODULE_SOCK_STATUS_t MPID_nem_newtcp_module_check_sock_status(MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd);
+int MPID_nem_newtcp_module_poll_finalize (void);
+int MPID_nem_newtcp_module_send_finalize (void);
+int MPID_nem_newtcp_module_bind (int sockfd);
+int MPID_nem_newtcp_module_recv_handler (MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd, sockconn_t *sc);
+int MPID_nem_newtcp_module_conn_est (MPIDI_VC_t *vc);
+int MPID_nem_newtcp_module_get_conninfo (struct MPIDI_VC *vc, struct sockaddr_in *addr, char **pg_id, int *pg_rank);
+int MPID_nem_newtcp_module_get_vc_from_conninfo (char *pg_id, int pg_rank, struct MPIDI_VC **vc);
+int MPID_nem_newtcp_module_is_sock_connected(int fd);
+int MPID_nem_newtcp_module_disconnect (struct MPIDI_VC *const vc);
+int MPID_nem_newtcp_module_cleanup (struct MPIDI_VC *const vc);
+int MPID_nem_newtcp_module_state_listening_handler(MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd, sockconn_t *const l_sc);
+
+int MPID_nem_newtcp_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz);
+int MPID_nem_newtcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz,
+                                    MPID_Request **sreq_ptr);
+int MPID_nem_newtcp_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz);
+
+/* Macros */
+
+/* system call wrapper -- This retries the syscall each time it is interrupted.  
+   Example usage:  instead of writing "ret = write(fd, buf, len);" 
+   use: "CHECK_EINTR(ret, write(fd, buf, len)); 
+ Caution:
+ (1) Some of the system calls have value-result parameters. Those system calls
+ should not be used within CHECK_EINTR macro or should be used with CARE.
+ For eg. accept, the last parameter (addrlen) is a value-result one. So, even if the
+ system call is interrupted, addrlen should be initialized to appropriate value before
+ calling it again.
+
+ (2) connect should not be called within a loop. In case, the connect is interrupted after
+ the TCP handshake is initiated, calling connect again will only fail. So, select/poll
+ should be called to check the status of the socket.
+ I don't know what will happen, if a connect is interrupted even before the system call
+ tries to initiate TCP handshake. No book/manual doesn't seem to explain this scenario.
+*/
+/* CHECK_EINTR is now deprecated. Use MPIU_OSW_RETRYON_EINTR() instead*/
+#define CHECK_EINTR(var, func) do {             \
+        (var) = (func);                         \
+    } while ((var) == -1 && errno == EINTR)
+
+/* Send queue macros */
+#define Q_EMPTY(q) GENERIC_Q_EMPTY (q)
+#define Q_HEAD(q) GENERIC_Q_HEAD (q)
+#define Q_ENQUEUE_EMPTY(qp, ep) GENERIC_Q_ENQUEUE_EMPTY (qp, ep, next)
+#define Q_ENQUEUE(qp, ep) GENERIC_Q_ENQUEUE (qp, ep, next)
+#define Q_ENQUEUE_EMPTY_MULTIPLE(qp, ep0, ep1) GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE (qp, ep0, ep1, next)
+#define Q_ENQUEUE_MULTIPLE(qp, ep0, ep1) GENERIC_Q_ENQUEUE_MULTIPLE (qp, ep0, ep1, next)
+#define Q_DEQUEUE(qp, ep) GENERIC_Q_DEQUEUE (qp, ep, next)
+#define Q_REMOVE_ELEMENTS(qp, ep0, ep1) GENERIC_Q_REMOVE_ELEMENTS (qp, ep0, ep1, next)
+
+/* VC list macros */
+#define VC_L_EMPTY(q) GENERIC_L_EMPTY (q)
+#define VC_L_HEAD(q) GENERIC_L_HEAD (q)
+
+/* stack macros */
+#define S_EMPTY(s) GENERIC_S_EMPTY (s)
+#define S_TOP(s) GENERIC_S_TOP (s)
+#define S_PUSH(sp, ep) GENERIC_S_PUSH (sp, ep, next)
+#define S_PUSH_MULTIPLE(sp, ep0, ep1) GENERIC_S_PUSH_MULTIPLE (sp, ep0, ep1, next)
+#define S_POP(sp, ep) GENERIC_S_POP (sp, ep, next)
+
+#endif /* NEWTCP_MODULE_IMPL_H */

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_init.c (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_init.c)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_init.c	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_init.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,699 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "wintcp_impl.h"
+#ifdef HAVE_SYS_TYPES_H
+    #include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_SOCKET_H
+    #include <sys/socket.h>
+#endif
+#ifdef HAVE_ARPA_INET_H
+    #include <arpa/inet.h>
+#endif
+
+/*S
+  MPIDU_Sock_ifaddr_t - Structure to hold an Internet address.
+
++ len - Length of the address.  4 for IPv4, 16 for IPv6.
+- ifaddr - Address bytes (as bytes, not characters)
+
+S*/
+typedef struct MPIDU_Sock_ifaddr_t {
+    int len, type;
+    unsigned char ifaddr[16];
+} MPIDU_Sock_ifaddr_t;
+
+
+MPID_nem_queue_ptr_t MPID_nem_newtcp_module_free_queue = 0;
+MPID_nem_queue_ptr_t MPID_nem_process_recv_queue = 0;
+MPID_nem_queue_ptr_t MPID_nem_process_free_queue = 0;
+extern sockconn_t MPID_nem_newtcp_module_g_lstn_sc;
+extern pollfd_t g_lstn_plfd;
+
+static MPID_nem_queue_t _free_queue;
+
+static int dbg_ifname = 0;
+
+static int get_addr_port_from_bc (const char *business_card, struct in_addr *addr, in_port_t *port);
+static int GetIPInterface( MPIDU_Sock_ifaddr_t *, int * );
+
+MPID_nem_netmod_funcs_t MPIDI_nem_newtcp_module_funcs = {
+    MPID_nem_newtcp_module_init,
+    MPID_nem_newtcp_module_finalize,
+    MPID_nem_newtcp_module_ckpt_shutdown,
+    MPID_nem_newtcp_module_poll,
+    MPID_nem_newtcp_module_send,
+    MPID_nem_newtcp_module_get_business_card,
+    MPID_nem_newtcp_module_connect_to_root,
+    MPID_nem_newtcp_module_vc_init,
+    MPID_nem_newtcp_module_vc_destroy,
+    MPID_nem_newtcp_module_vc_terminate
+};
+
+#define MPIDI_CH3I_PORT_KEY "port"
+#define MPIDI_CH3I_HOST_DESCRIPTION_KEY "description"
+#define MPIDI_CH3I_IFNAME_KEY "ifname"
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_init (MPID_nem_queue_ptr_t proc_recv_queue, MPID_nem_queue_ptr_t proc_free_queue,
+                                 MPID_nem_cell_ptr_t proc_elements, int num_proc_elements, MPID_nem_cell_ptr_t module_elements,
+                                 int num_module_elements, MPID_nem_queue_ptr_t *module_free_queue,
+                                 int ckpt_restart, MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ret;
+    int i;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_INIT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_INIT);
+
+    /* first make sure that our private fields in the vc fit into the area provided  */
+    MPIU_Assert(sizeof(MPID_nem_newtcp_module_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN);
+    
+    /* set up listener socket */
+    mpi_errno = MPIU_SOCKW_Init();
+    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+
+    mpi_errno = MPIU_SOCKW_Sock_open(AF_INET, SOCK_STREAM, IPPROTO_TCP, &(MPID_nem_newtcp_module_g_lstn_sc.fd));
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    mpi_errno = MPID_nem_newtcp_module_set_sockopts(MPID_nem_newtcp_module_g_lstn_sc.fd);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+    mpi_errno = MPID_nem_newtcp_module_bind (MPID_nem_newtcp_module_g_lstn_sc.fd);
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+    mpi_errno = MPIU_SOCKW_Listen(MPID_nem_newtcp_module_g_lstn_sc.fd, SOMAXCONN);
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    MPID_nem_newtcp_module_g_lstn_sc.state.lstate = LISTEN_STATE_LISTENING;
+    MPID_nem_newtcp_module_g_lstn_sc.handler = MPID_nem_newtcp_module_state_listening_handler;
+
+    /* create business card */
+    mpi_errno = MPID_nem_newtcp_module_get_business_card (pg_rank, bc_val_p, val_max_sz_p);
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+    /* save references to queues */
+    MPID_nem_process_recv_queue = proc_recv_queue;
+    MPID_nem_process_free_queue = proc_free_queue;
+
+    MPID_nem_newtcp_module_free_queue = &_free_queue;
+
+    /* set up network module queues */
+    MPID_nem_queue_init (MPID_nem_newtcp_module_free_queue);
+
+    for (i = 0; i < num_module_elements; ++i)
+    {
+        MPID_nem_queue_enqueue (MPID_nem_newtcp_module_free_queue, &module_elements[i]);
+    }
+
+    *module_free_queue = MPID_nem_newtcp_module_free_queue;
+
+    /* FIXME: Why happens on an error ? */
+
+    mpi_errno = MPID_nem_newtcp_module_sm_init();
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    mpi_errno = MPID_nem_newtcp_module_send_init();
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    mpi_errno = MPID_nem_newtcp_module_poll_init();
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_INIT);
+    return mpi_errno;
+ fn_fail:
+/*     fprintf(stdout, "failure. mpi_errno = %d\n", mpi_errno); */
+    goto fn_exit;
+}
+
+/*
+ * Get a description of the network interface to use for socket communication
+ *
+ * Here are the steps.  This order of checks is used to provide the 
+ * user control over the choice of interface and to avoid, where possible,
+ * the use of non-scalable services, such as centeralized name servers.
+ *
+ * MPICH_INTERFACE_HOSTNAME
+ * MPICH_INTERFACE_HOSTNAME_R%d
+ * a single (non-localhost) available IP address, if possible
+ * gethostbyname(gethostname())
+ *
+ * We return the following items:
+ *
+ *    ifname - name of the interface.  This may or may not be the same
+ *             as the name returned by gethostname  (in Unix)
+ *    ifaddr - This structure includes the interface IP address (as bytes),
+ *             and the type (e.g., AF_INET or AF_INET6).  Only 
+ *             ipv4 (AF_INET) is used so far.
+ */
+
+static int GetSockInterfaceAddr(int myRank, char *ifname, int maxIfname,
+                                MPIDU_Sock_ifaddr_t *ifaddr)
+{
+    char *ifname_string;
+    int mpi_errno = MPI_SUCCESS;
+    int ifaddrFound = 0;
+
+    /* Set "not found" for ifaddr */
+    ifaddr->len = 0;
+
+    /* Check for the name supplied through an environment variable */
+    ifname_string = getenv("MPICH_INTERFACE_HOSTNAME");
+    if (!ifname_string) {
+	/* See if there is a per-process name for the interfaces (e.g.,
+	   the process manager only delievers the same values for the 
+	   environment to each process */
+	char namebuf[1024];
+	MPIU_Snprintf( namebuf, sizeof(namebuf), 
+		       "MPICH_INTERFACE_HOSTNAME_R%d", myRank );
+	ifname_string = getenv( namebuf );
+	if (dbg_ifname && ifname_string) {
+	    fprintf( stdout, "Found interface name %s from %s\n", 
+		    ifname_string, namebuf );
+	    fflush( stdout );
+	}
+    }
+    else if (dbg_ifname) {
+	fprintf( stdout, 
+		 "Found interface name %s from MPICH_INTERFACE_HOSTNAME\n", 
+		 ifname_string );
+	fflush( stdout );
+    }
+	 
+    if (!ifname_string) {
+	int len;
+
+	/* If we have nothing, then use the host name */
+	mpi_errno = MPID_Get_processor_name(ifname, maxIfname, &len );
+	ifname_string = ifname;
+
+	/* If we didn't find a specific name, then try to get an IP address
+	   directly from the available interfaces, if that is supported on
+	   this platform.  Otherwise, we'll drop into the next step that uses 
+	   the ifname */
+	mpi_errno = GetIPInterface( ifaddr, &ifaddrFound );
+    }
+    else {
+	/* Copy this name into the output name */
+	MPIU_Strncpy( ifname, ifname_string, maxIfname );
+    }
+
+    /* If we don't have an IP address, try to get it from the name */
+    if (!ifaddrFound) {
+	struct hostent *info;
+	info = gethostbyname( ifname_string );
+	if (info && info->h_addr_list) {
+	    /* Use the primary address */
+	    ifaddr->len  = info->h_length;
+	    ifaddr->type = info->h_addrtype;
+	    if (ifaddr->len > sizeof(ifaddr->ifaddr)) {
+		/* If the address won't fit in the field, reset to
+		   no address */
+		ifaddr->len = 0;
+		ifaddr->type = -1;
+	    }
+	    else
+		memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len );
+	}
+    }
+
+    return 0;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_get_business_card
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_get_business_card (int my_rank, char **bc_val_p, int *val_max_sz_p)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDU_Sock_ifaddr_t ifaddr;
+    char ifname[MAX_HOST_DESCRIPTION_LEN];
+    int ret;
+    struct sockaddr_in sock_id;
+    socklen_t len;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_GET_BUSINESS_CARD);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_GET_BUSINESS_CARD);
+    
+    mpi_errno = GetSockInterfaceAddr(my_rank, ifname, sizeof(ifname), &ifaddr);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    
+    
+    mpi_errno = MPIU_Str_add_string_arg(bc_val_p, val_max_sz_p, MPIDI_CH3I_HOST_DESCRIPTION_KEY, ifname);
+    if (mpi_errno != MPIU_STR_SUCCESS)
+    {
+        if (mpi_errno == MPIU_STR_NOMEM)
+        {
+            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard_len");
+        }
+        else
+        {
+            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
+        }
+    }
+
+    len = sizeof(sock_id);
+    ret = getsockname (MPID_nem_newtcp_module_g_lstn_sc.fd, (struct sockaddr *)&sock_id, &len);
+    MPIU_ERR_CHKANDJUMP1 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**getsockname", "**getsockname %s", strerror (errno));
+
+    mpi_errno = MPIU_Str_add_int_arg (bc_val_p, val_max_sz_p, MPIDI_CH3I_PORT_KEY, sock_id.sin_port);
+    if (mpi_errno != MPIU_STR_SUCCESS)
+    {
+        if (mpi_errno == MPIU_STR_NOMEM)
+        {
+            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard_len");
+        }
+        else
+        {
+            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
+        }
+    }
+
+    {
+	char ifname[256];
+	unsigned char *p;
+	if (ifaddr.len > 0 && ifaddr.type == AF_INET)
+        {
+	    p = (unsigned char *)(ifaddr.ifaddr);
+	    MPIU_Snprintf( ifname, sizeof(ifname), "%u.%u.%u.%u", p[0], p[1], p[2], p[3] );
+	    MPIU_DBG_MSG_S(CH3_CONNECT,VERBOSE,"ifname = %s",ifname );
+	    mpi_errno = MPIU_Str_add_string_arg(bc_val_p, val_max_sz_p, MPIDI_CH3I_IFNAME_KEY, ifname);
+	    if (mpi_errno != MPIU_STR_SUCCESS)
+            {
+		if (mpi_errno == MPIU_STR_NOMEM)
+                {
+		    MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard_len");
+		}
+		else
+                {
+		    MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
+		}
+	    }
+	}
+    }
+
+    /*     printf("MPID_nem_newtcp_module_get_business_card. port=%d\n", sock_id.sin_port); */
+
+ fn_exit:
+/*     fprintf(stdout, "MPID_nem_newtcp_module_get_business_card Exit, mpi_errno=%d\n", mpi_errno); fflush(stdout); */
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_GET_BUSINESS_CARD);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_connect_to_root
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    struct in_addr addr;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONNECT_TO_ROOT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONNECT_TO_ROOT);
+
+    /* vc is already allocated before reaching this point */
+
+    mpi_errno = MPID_nem_newtcp_module_get_addr_port_from_bc(business_card, &addr, &(VC_FIELD(new_vc, sock_id).sin_port));
+    VC_FIELD(new_vc, sock_id).sin_addr.s_addr = addr.s_addr;
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+    mpi_errno = MPIDI_GetTagFromPort(business_card, &new_vc->port_name_tag);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    MPID_nem_newtcp_module_connect(new_vc);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONNECT_TO_ROOT);
+    return mpi_errno;
+
+ fn_fail:
+    goto fn_exit;}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_vc_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_vc_init (MPIDI_VC_t *vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    struct in_addr addr;
+    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_VC_INIT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_VC_INIT);
+
+    vc_ch->state = MPID_NEM_NEWTCP_MODULE_VC_STATE_DISCONNECTED;
+    
+    vc->sendNoncontig_fn      = MPID_nem_newtcp_SendNoncontig;
+    vc_ch->iStartContigMsg    = MPID_nem_newtcp_iStartContigMsg;
+    vc_ch->iSendContig        = MPID_nem_newtcp_iSendContig;
+    memset(&VC_FIELD(vc, sock_id), 0, sizeof(VC_FIELD(vc, sock_id)));
+    VC_FIELD(vc, sock_id).sin_family = AF_INET;
+    
+    vc_ch->next = NULL;
+    vc_ch->prev = NULL;
+    VC_FIELD(vc, sc) = NULL;
+    VC_FIELD(vc, sc_ref_count) = 0; 
+    VC_FIELD(vc, send_queue).head = VC_FIELD(vc, send_queue).tail = NULL;
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_VC_INIT);
+    return mpi_errno;
+ fn_fail:
+    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_vc_destroy
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_vc_destroy(MPIDI_VC_t *vc)
+{
+    int mpi_errno = MPI_SUCCESS;   
+
+    /* free any resources associated with this VC here */
+
+ fn_exit:   
+       return mpi_errno;
+ fn_fail:
+       goto fn_exit;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_get_addr_port_from_bc
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_get_addr_port_from_bc (const char *business_card, struct in_addr *addr, in_port_t *port)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ret;
+    char desc_str[256];
+    char ifname[256];
+    MPIDI_STATE_DECL(MPID_STATE_NEWTCP_MODULE_GET_ADDR_PORT_FROM_BC);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_NEWTCP_MODULE_GET_ADDR_PORT_FROM_BC);
+    
+    /* desc_str is only used for debugging
+    ret = MPIU_Str_get_string_arg (business_card, MPIDI_CH3I_HOST_DESCRIPTION_KEY, desc_str, sizeof(desc_str));
+    MPIU_ERR_CHKANDJUMP (ret != MPIU_STR_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost");
+    */
+
+    mpi_errno = MPIU_Str_get_int_arg (business_card, MPIDI_CH3I_PORT_KEY, (int *)port);
+    MPIU_ERR_CHKANDJUMP (mpi_errno != MPIU_STR_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**argstr_missingport");
+    /*     fprintf(stdout, "get_addr_port_from_bc buscard=%s  desc=%s port=%d\n",business_card, desc_str, *port); fflush(stdout); */
+
+    ret = MPIU_Str_get_string_arg(business_card, MPIDI_CH3I_IFNAME_KEY, ifname, sizeof(ifname));
+    MPIU_ERR_CHKANDJUMP (ret != MPIU_STR_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**argstr_missingifname");
+	
+    /*
+    ret = inet_pton (AF_INET, (const char *)ifname, addr);
+    MPIU_ERR_CHKANDJUMP(ret == 0, mpi_errno,MPI_ERR_OTHER,"**ifnameinvalid");
+    MPIU_ERR_CHKANDJUMP(ret < 0, mpi_errno, MPI_ERR_OTHER, "**afinetinvalid");
+    */
+    mpi_errno = MPIU_SOCKW_Inet_addr(ifname, &(addr->s_addr));
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_NEWTCP_MODULE_GET_ADDR_PORT_FROM_BC);
+    return mpi_errno;
+ fn_fail:
+/*     fprintf(stdout, "failure. mpi_errno = %d\n", mpi_errno); */
+    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
+    goto fn_exit;
+}
+
+/* MPID_nem_newtcp_module_bind -- if MPICH_PORT_RANGE is set, this
+   binds the socket to an available port number in the range.
+   Otherwise, it binds it to any addr and any port */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_bind
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_bind (int sockfd)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ret;
+    struct sockaddr_in sin;
+    int port, low_port, high_port;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_BIND);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_BIND);
+   
+    low_port = 0;
+    high_port = 0;
+
+    MPIU_GetEnvRange( "MPICH_PORT_RANGE", &low_port, &high_port );
+    MPIU_ERR_CHKANDJUMP (low_port < 0 || low_port > high_port, mpi_errno, MPI_ERR_OTHER, "**badportrange");
+
+    memset((void *)&sin, 0, sizeof(sin));
+    sin.sin_family      = AF_INET;
+    sin.sin_addr.s_addr = htonl(INADDR_ANY);
+
+    mpi_errno = MPIU_SOCKW_Bind_port_range(sockfd, &sin, low_port, high_port);
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+/*     if (ret == 0) */
+/*         fprintf(stdout, "sockfd=%d  port=%d bound\n", sockfd, port); */
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_BIND);
+    return mpi_errno;
+ fn_fail:
+    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_vc_terminate
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_vc_terminate (MPIDI_VC_t *vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_NEM_NEWTCP_MODULE_VC_TERMINATE);
+
+    MPIDI_FUNC_ENTER(MPID_NEM_NEWTCP_MODULE_VC_TERMINATE);
+
+    mpi_errno = MPID_nem_newtcp_module_cleanup(vc);
+    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_NEM_NEWTCP_MODULE_VC_TERMINATE);
+    return mpi_errno;
+ fn_fail:
+    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
+    goto fn_exit;
+}
+
+
+/* These includes are here because they're used just for getting the interface
+ *   names
+ */
+
+
+#include <sys/types.h>
+
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_NET_IF_H
+#include <net/if.h>
+#endif
+#ifdef HAVE_SYS_SOCKIO_H
+/* Needed for SIOCGIFCONF */
+#include <sys/sockio.h>
+#endif
+
+#if defined(SIOCGIFCONF) && defined(HAVE_STRUCT_IFCONF)
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+/* We can only access the interfaces if we have a number of features.
+   Test for these, otherwise define this routine to return false in the
+   "found" variable */
+
+#define NUM_IFREQS 10
+
+static int GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
+{
+    char *buf_ptr, *ptr;
+    int buf_len, buf_len_prev;
+    int fd;
+    MPIDU_Sock_ifaddr_t myifaddr;
+    int nfound = 0, foundLocalhost = 0;
+    /* We predefine the LSB and MSB localhost addresses */
+    unsigned int localhost = 0x0100007f;
+#ifdef WORDS_BIGENDIAN
+    unsigned int MSBlocalhost = 0x7f000001;
+#endif
+
+    fd = socket(AF_INET, SOCK_DGRAM, 0);
+    if (fd < 0) {
+	fprintf( stderr, "Unable to open an AF_INET socket\n" );
+	return 1;
+    }
+
+    /* Use MSB localhost if necessary */
+#ifdef WORDS_BIGENDIAN
+    localhost = MSBlocalhost;
+#endif
+    
+
+    /*
+     * Obtain the interface information from the operating system
+     *
+     * Note: much of this code is borrowed from W. Richard Stevens' book
+     * entitled "UNIX Network Programming", Volume 1, Second Edition.  See
+     * section 16.6 for details.
+     */
+    buf_len = NUM_IFREQS * sizeof(struct ifreq);
+    buf_len_prev = 0;
+
+    for(;;)
+    {
+	struct ifconf			ifconf;
+	int				rc;
+
+	buf_ptr = (char *) MPIU_Malloc(buf_len);
+	if (buf_ptr == NULL) {
+	    fprintf( stderr, "Unable to allocate %d bytes\n", buf_len );
+	    return 1;
+	}
+	
+	ifconf.ifc_buf = buf_ptr;
+	ifconf.ifc_len = buf_len;
+
+	rc = ioctl(fd, SIOCGIFCONF, &ifconf);
+	if (rc < 0) {
+	    if (errno != EINVAL || buf_len_prev != 0) {
+		fprintf( stderr, "Error from ioctl = %d\n", errno );
+		perror(" Error is: ");
+		return 1;
+	    }
+	}
+        else {
+	    if (ifconf.ifc_len == buf_len_prev) {
+		buf_len = ifconf.ifc_len;
+		break;
+	    }
+
+	    buf_len_prev = ifconf.ifc_len;
+	}
+	
+	MPIU_Free(buf_ptr);
+	buf_len += NUM_IFREQS * sizeof(struct ifreq);
+    }
+	
+    /*
+     * Now that we've got the interface information, we need to run through
+     * the interfaces and check out the ip addresses.  If we find a
+     * unique, non-lcoal host (127.0.0.1) address, return that, otherwise
+     * return nothing.
+     */
+    ptr = buf_ptr;
+
+    while(ptr < buf_ptr + buf_len) {
+	struct ifreq *			ifreq;
+
+	ifreq = (struct ifreq *) ptr;
+
+	if (dbg_ifname) {
+	    fprintf( stdout, "%10s\t", ifreq->ifr_name );
+	}
+	
+	if (ifreq->ifr_addr.sa_family == AF_INET) {
+	    struct in_addr		addr;
+
+	    addr = ((struct sockaddr_in *) &(ifreq->ifr_addr))->sin_addr;
+	    if (dbg_ifname) {
+		fprintf( stdout, "IPv4 address = %08x (%s)\n", addr.s_addr, 
+			 inet_ntoa( addr ) );
+	    }
+
+	    if (addr.s_addr == localhost && dbg_ifname) {
+		fprintf( stdout, "Found local host\n" );
+	    }
+	    /* Save localhost if we find it.  Let any new interface 
+	       overwrite localhost.  However, if we find more than 
+	       one non-localhost interface, then we'll choose none for the 
+	       interfaces */
+	    if (addr.s_addr == localhost) {
+		foundLocalhost = 1;
+		if (nfound == 0) {
+		    myifaddr.type = AF_INET;
+		    myifaddr.len  = 4;
+		    memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
+		}
+	    }
+	    else {
+		nfound++;
+		myifaddr.type = AF_INET;
+		myifaddr.len  = 4;
+		memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
+	    }
+	}
+	else {
+	    if (dbg_ifname) {
+		fprintf( stdout, "\n" );
+	    }
+	}
+
+	/*
+	 *  Increment pointer to the next ifreq; some adjustment may be
+	 *  required if the address is an IPv6 address
+	 */
+	/* This is needed for MAX OSX */
+#ifdef _SIZEOF_ADDR_IFREQ
+	ptr += _SIZEOF_ADDR_IFREQ(*ifreq);
+#else
+	ptr += sizeof(struct ifreq);
+	
+#	if defined(AF_INET6)
+	{
+	    if (ifreq->ifr_addr.sa_family == AF_INET6)
+	    {
+		ptr += sizeof(struct sockaddr_in6) - sizeof(struct sockaddr);
+	    }
+	}
+#	endif
+#endif
+    }
+
+    MPIU_Free(buf_ptr);
+    close(fd);
+    
+    /* If we found a unique address, use that */
+    if (nfound == 1 || (nfound == 0 && foundLocalhost == 1)) {
+	*ifaddr = myifaddr;
+	*found  = 1;
+    }
+    else {
+	*found  = 0;
+    }
+
+    return 0;
+}
+
+#else /* things needed to find the interfaces */
+
+/* In this case, just return false for interfaces found */
+static int GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
+{
+    *found = 0;
+    return 0;
+}
+#endif

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_lmt.c (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_lmt.c)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_lmt.c	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_lmt.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,565 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "wintcp_impl.h"
+extern int h_errno;
+
+static int create_s_cookie (int data_sz, char **cookie, int *len);
+static int read_s_cookie (MPID_IOV cookie, int *data_sz);
+static int create_r_cookie (char *hostname, int port, int data_sz, char **cookie, int *len);
+static int read_r_cookie (MPID_IOV cookie, char **hostname, int *port, int *data_sz);
+static void free_cookie (void *c);
+static int set_sockopts (int fd);
+
+//#define TESTING_CHUNKING
+#ifdef TESTING_CHUNKING
+#define CHUNK 6299651//(32*1024)
+#endif
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_lmt_pre_send
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_tcp_module_lmt_pre_send (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV *cookie)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int len;
+    MPIDI_msg_sz_t data_sz;
+    int dt_contig;
+    MPI_Aint dt_true_lb;
+    MPID_Datatype * dt_ptr;
+    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_SEND);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_SEND);
+
+    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    mpi_errno = create_s_cookie (data_sz, &vc_ch->net.tcp.lmt_cookie, &len);
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+    cookie->MPID_IOV_BUF = vc_ch->net.tcp.lmt_cookie;
+    cookie->MPID_IOV_LEN = len;
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_SEND);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_lmt_pre_recv
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_tcp_module_lmt_pre_recv (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV s_cookie, MPID_IOV *r_cookie, int *send_cts)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ret;
+    unsigned int len;
+    struct sockaddr_in saddr;
+    MPIDI_msg_sz_t data_sz;
+    int dt_contig;
+    MPI_Aint dt_true_lb;
+    MPID_Datatype * dt_ptr;
+    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_RECV);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_RECV);
+
+    mpi_errno = read_s_cookie (s_cookie, &vc_ch->net.tcp.lmt_s_len);
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+    memset (&saddr, sizeof(saddr), 0);
+
+    if (!vc_ch->net.tcp.lmt_connected)
+    {
+        vc_ch->net.tcp.lmt_desc = socket (AF_INET, SOCK_STREAM, 0);
+        MPIU_ERR_CHKANDJUMP2 (vc_ch->net.tcp.lmt_desc == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", strerror (errno), errno);
+
+        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
+        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+
+        saddr.sin_family      = AF_INET;
+        saddr.sin_addr.s_addr = htonl (INADDR_ANY);
+        saddr.sin_port        = htons (0);
+
+        ret = bind (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, sizeof (saddr));
+        MPIU_ERR_CHKANDJUMP3 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|bind", "**sock|poll|bind %d %d %s", ntohs (saddr.sin_port), errno, strerror (errno));
+
+        len = sizeof (saddr);
+        ret = getsockname (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, &len);
+        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+
+        set_sockopts (vc_ch->net.tcp.lmt_desc);
+
+        ret = listen (vc_ch->net.tcp.lmt_desc, SOMAXCONN);
+        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**listen", "**listen %s %d", errno, strerror (errno));
+    }
+
+    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    mpi_errno = create_r_cookie (MPID_nem_hostname, ntohs (saddr.sin_port), data_sz, &vc_ch->net.tcp.lmt_cookie, &len);
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+    r_cookie->MPID_IOV_BUF = vc_ch->net.tcp.lmt_cookie;
+    r_cookie->MPID_IOV_LEN = len;
+
+    *send_cts = 1;
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_RECV);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_lmt_start_send
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_tcp_module_lmt_start_send (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV r_cookie)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ret;
+    MPIDI_msg_sz_t data_sz;
+    int dt_contig;
+    MPI_Aint dt_true_lb;
+    MPID_Datatype * dt_ptr;
+    MPIDI_msg_sz_t last;
+    int nb;
+    int s_len = 0;
+    int r_len;
+    int r_port;
+    char *r_hostname;
+    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);
+
+    mpi_errno = read_r_cookie (r_cookie, &r_hostname, &r_port, &r_len);
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+    free_cookie (vc_ch->net.tcp.lmt_cookie);
+
+    if (!vc_ch->net.tcp.lmt_connected)
+    {
+        struct sockaddr_in saddr;
+        struct hostent *hp;
+
+        vc_ch->net.tcp.lmt_desc = socket (AF_INET, SOCK_STREAM, 0);
+        MPIU_ERR_CHKANDJUMP2 (vc_ch->net.tcp.lmt_desc == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", strerror (errno), errno);
+
+        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
+        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+
+        hp = gethostbyname (r_hostname);
+        MPIU_ERR_CHKANDJUMP2 (hp == NULL, mpi_errno, MPI_ERR_OTHER, "**gethostbyname", "**gethostbyname %s %d", hstrerror (h_errno), h_errno);
+
+        memset (&saddr, sizeof(saddr), 0);
+        saddr.sin_family = AF_INET;
+        saddr.sin_port   = htons (r_port);
+        MPID_NEM_MEMCPY (&saddr.sin_addr, hp->h_addr, hp->h_length);
+
+        set_sockopts (vc_ch->net.tcp.lmt_desc);
+
+        ret = connect (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, sizeof(saddr));
+        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+
+        vc_ch->net.tcp.lmt_connected = 1;
+    }
+
+    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    if (r_len < data_sz)
+    {
+        /* message will be truncated */
+        s_len = data_sz;
+        data_sz = r_len;
+ 	req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", s_len, r_len);
+    }
+
+    MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0);
+    req->dev.segment_first = 0;
+    req->dev.segment_size = data_sz;
+    req->dev.iov_count = MPID_IOV_LIMIT;
+    req->dev.iov_offset = 0;
+    last = data_sz;
+
+    do
+    {
+        int iov_offset;
+        int left_to_send;
+        MPID_Segment_pack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count);
+
+        left_to_send = last - req->dev.segment_first;
+        iov_offset = 0;
+
+#ifdef TESTING_CHUNKING
+        {
+            char *buf = req->dev.iov[0].MPID_IOV_BUF;
+            int l;
+            while (left_to_send)
+            {
+                if (left_to_send > CHUNK)
+                    l = CHUNK;
+                else
+                    l = left_to_send;
+
+                do
+                    nb = write (vc_ch->net.tcp.lmt_desc, buf, l);
+                while (nb == -1 && errno == EINTR);
+                MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
+
+                left_to_send -= nb;
+                buf += nb;
+            }
+
+            MPIDI_CH3U_Request_complete (req);
+            goto fn_exit;
+        }
+#endif
+
+        do
+            nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
+        while (nb == -1 && errno == EINTR);
+        MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
+
+        left_to_send -= nb;
+        while (left_to_send)
+        { /* send rest of iov */
+            while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN)
+            { /* update iov to reflect sent bytes */
+                nb -= req->dev.iov[iov_offset].MPID_IOV_LEN;
+                ++iov_offset;
+            }
+            req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb;
+            req->dev.iov[iov_offset].MPID_IOV_LEN -= nb;
+
+            do
+                nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
+            while (nb == -1 && errno == EINTR);
+            MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
+            left_to_send -= nb;
+        }
+    }
+    while (last < data_sz);
+
+    MPIDI_CH3U_Request_complete (req);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_lmt_start_recv
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_tcp_module_lmt_start_recv (MPIDI_VC_t *vc, MPID_Request *req)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int ret;
+    MPIDI_msg_sz_t data_sz;
+    int dt_contig;
+    MPI_Aint dt_true_lb;
+    MPID_Datatype * dt_ptr;
+    MPIDI_msg_sz_t last;
+    int nb;
+    int r_len;
+    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);
+
+    free_cookie (vc_ch->net.tcp.lmt_cookie);
+
+    if (!vc_ch->net.tcp.lmt_connected)
+    {
+        int len;
+        struct sockaddr_in saddr;
+        int connfd;
+
+        len = sizeof (saddr);
+        connfd = accept (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, &len);
+        MPIU_ERR_CHKANDJUMP2 (connfd == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|accept", "**sock|poll|accept %d %s", errno, strerror (errno));
+
+        /* close listen fd */
+        do
+            ret = close (vc_ch->net.tcp.lmt_desc);
+        while (ret == -1 && errno == EINTR);
+        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**closesocket", "**closesocket %s %d", strerror (errno), errno);
+
+        /* set lmt_desc to new connected fd */
+        vc_ch->net.tcp.lmt_desc = connfd;
+        vc_ch->net.tcp.lmt_connected = 1;
+
+        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
+        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+    }
+
+    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
+
+    if (data_sz > vc_ch->net.tcp.lmt_s_len)
+    {
+        data_sz = vc_ch->net.tcp.lmt_s_len;
+    }
+    else if (data_sz < vc_ch->net.tcp.lmt_s_len)
+    {
+        /* message will be truncated */
+        r_len = data_sz;
+ 	req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", vc_ch->net.tcp.lmt_s_len, r_len);
+    }
+
+    MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0);
+    req->dev.segment_first = 0;
+    req->dev.segment_size = data_sz;
+    req->dev.iov_count = MPID_IOV_LIMIT;
+    req->dev.iov_offset = 0;
+    last = data_sz;
+
+    do
+    {
+        int iov_offset;
+        int left_to_recv;
+
+        MPID_Segment_unpack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count);
+
+        left_to_recv = last - req->dev.segment_first;
+        iov_offset = 0;
+
+#ifdef TESTING_CHUNKING
+        {
+            char *buf = req->dev.iov[0].MPID_IOV_BUF;
+            int l;
+            while (left_to_recv)
+            {
+                if (left_to_recv > CHUNK)
+                    l = CHUNK;
+                else
+                    l = left_to_recv;
+
+                do
+                    nb = read (vc_ch->net.tcp.lmt_desc, buf, l);
+                while (nb == -1 && errno == EINTR);
+                MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
+
+                left_to_recv -= nb;
+                buf += nb;
+            }
+            MPIDI_CH3U_Request_complete (req);
+            goto fn_exit;
+        }
+#endif
+
+        do
+            nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
+        while (nb == -1 && errno == EINTR);
+        MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+        MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail");
+
+        left_to_recv -= nb;
+        while (left_to_recv)
+        { /* recv rest of iov */
+            while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN)
+            { /* update iov to reflect sent bytes */
+                nb -= req->dev.iov[iov_offset].MPID_IOV_LEN;
+                ++iov_offset;
+            }
+            req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb;
+            req->dev.iov[iov_offset].MPID_IOV_LEN -= nb;
+
+            do
+                nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
+            while (nb == -1 && errno == EINTR);
+            MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+            MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail");
+            left_to_recv -= nb;
+        }
+    }
+    while (last < data_sz);
+
+    MPIDI_CH3U_Request_complete (req);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_lmt_post_send
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_tcp_module_lmt_post_send (MPIDI_VC_t *vc, MPID_Request *req)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_SEND);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_SEND);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_SEND);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_tcp_module_lmt_post_recv
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_tcp_module_lmt_post_recv (MPIDI_VC_t *vc, MPID_Request *req)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_RECV);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_RECV);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_RECV);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME create_s_cookie
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int create_s_cookie (int data_sz, char **cookie, int *len)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int *int_cookie;
+
+    int_cookie = MPIU_Malloc (sizeof (data_sz));
+    MPIU_ERR_CHKANDJUMP (int_cookie == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem");
+    *int_cookie = data_sz;
+
+    *cookie = (char *)int_cookie;
+    *len = sizeof (data_sz);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME read_s_cookie
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int read_s_cookie (MPID_IOV cookie, int *data_sz)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    MPIU_ERR_CHKANDJUMP (cookie.MPID_IOV_LEN != sizeof (data_sz), mpi_errno, MPI_ERR_OTHER, "**fail");
+
+    *data_sz = *(int *)cookie.MPID_IOV_BUF;
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+typedef struct r_cookie
+{
+    int port;
+    int data_sz;
+    char hostname[1];
+} r_cookie_t;
+
+#undef FUNCNAME
+#define FUNCNAME create_r_cookie
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int create_r_cookie (char *hostname, int port, int data_sz, char **cookie, int *len)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int hostname_len;
+    int cookie_len;
+    r_cookie_t *c;
+
+    hostname_len = strnlen (hostname, MAX_HOSTNAME_LEN) + 1;
+
+    cookie_len = sizeof (r_cookie_t) - 1 + hostname_len;
+
+    c = MPIU_Malloc (cookie_len);
+    MPIU_ERR_CHKANDJUMP (c == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem");
+
+    c->port = port;
+    c->data_sz = data_sz;
+    MPIU_Strncpy (c->hostname, hostname, hostname_len);
+
+    *cookie = (char *)c;
+    *len = sizeof (r_cookie_t) - 1 + hostname_len;
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+/* read_r_cookie - extracts hostname, port and data size from a recv cookie
+   data pointed to by hostname is valid only as long as the packet containing the cookie is valid
+ */
+#undef FUNCNAME
+#define FUNCNAME read_r_cookie
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int read_r_cookie (MPID_IOV cookie, char **hostname, int *port, int *data_sz)
+{
+    int mpi_errno = MPI_SUCCESS;
+    r_cookie_t *c;
+
+    MPIU_ERR_CHKANDJUMP (cookie.MPID_IOV_LEN < sizeof (r_cookie_t), mpi_errno, MPI_ERR_OTHER, "**fail");
+
+    c = (r_cookie_t *)cookie.MPID_IOV_BUF;
+
+    *hostname = c->hostname;
+    *port = c->port;
+    *data_sz = c->data_sz;
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+static void free_cookie (void *c)
+{
+    MPIU_Free (c);
+}
+
+#undef FUNCNAME
+#define FUNCNAME set_sockopts
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+static int set_sockopts (int fd)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int option;
+    int ret;
+
+    option = 0;
+    ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(int));
+    MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+
+    option = 128*1024;
+    setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(int));
+    MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+    setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(int));
+    MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_finalize.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_finalize.c	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_finalize.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,54 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "wintcp_module_impl.h"
-
-extern sockconn_t MPID_nem_newtcp_module_g_lstn_sc;
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_finalize
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_finalize()
-{
-    int mpi_errno = MPI_SUCCESS;
-    
-    /* FIXME: Why don't we have a finalize for sm - MPID_nem_newtcp_module_finalize_sm() - ? */
-    /* FIXME: Shouldn't the order of finalize() be the reverse order of init() ? 
-     * i.e., *finalize_sm(); *poll_finalize(); *send_finalize();
-     */
-    mpi_errno = MPID_nem_newtcp_module_send_finalize();
-    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-
-    mpi_errno = MPID_nem_newtcp_module_poll_finalize();
-    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-    
-    mpi_errno =  MPID_nem_newtcp_module_sm_finalize();
-    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-     
-    if(MPIU_SOCKW_Sockfd_is_valid(MPID_nem_newtcp_module_g_lstn_sc.fd))
-    {
-        MPIU_OSW_RETRYON_INTR((mpi_errno != MPI_SUCCESS), (mpi_errno = MPIU_SOCKW_Sock_close(MPID_nem_newtcp_module_g_lstn_sc.fd)));
-        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-    }
-    mpi_errno = MPIU_SOCKW_Finalize();
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-        
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_ckpt_shutdown
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_ckpt_shutdown()
-{
-    return MPID_nem_newtcp_module_finalize();
-}
-

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_impl.h
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_impl.h	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_impl.h	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,163 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef NEWTCP_MODULE_IMPL_H
-#define NEWTCP_MODULE_IMPL_H
-
-#include "mpid_nem_impl.h"
-/* #include "newtcp_module.h" */
-#ifdef HAVE_SYS_TYPES_H
-    #include <sys/types.h>
-#endif
-#ifdef HAVE_SYS_SOCKET_H
-    #include <sys/socket.h>
-#endif
-#ifdef HAVE_ERRNO_H
-    #include <errno.h>
-#endif
-#ifdef HAVE_NETINET_IN_H
-    #include <netinet/in.h>
-#endif
-#ifdef HAVE_NETINET_TCP_H
-    #include <netinet/tcp.h>
-#endif
-#ifdef HAVE_NETDB_H
-    #include <netdb.h>
-#endif
-#include "socksm.h"
-
-/* globals */
-extern MPID_nem_queue_ptr_t MPID_nem_newtcp_module_free_queue;
-extern MPID_nem_queue_ptr_t MPID_nem_process_recv_queue;
-extern MPID_nem_queue_ptr_t MPID_nem_process_free_queue;
-extern int MPID_nem_newtcp_module_listen_fd;
-
-#define MPID_NEM_NEWTCP_MODULE_VC_STATE_DISCONNECTED 0
-#define MPID_NEM_NEWTCP_MODULE_VC_STATE_CONNECTED 1
-
-extern char *MPID_nem_newtcp_module_recv_buf;
-#define MPID_NEM_NEWTCP_MODULE_RECV_MAX_PKT_LEN 1024
-#define MPID_NEM_NEWTCP_MODULE_RCVBUF_SZ    (128*1024)
-#define MPID_NEM_NEWTCP_MODULE_SNDBUF_SZ    (128*1024)
-
-/* The vc provides a generic buffer in which network modules can store
-   private fields This removes all dependencies from the VC struction
-   on the network module, facilitating dynamic module loading. */
-typedef struct 
-{
-    struct sockaddr_in sock_id;
-    struct MPID_nem_new_tcp_module_sockconn *sc;
-    struct
-    {
-        struct MPID_Request *head;
-        struct MPID_Request *tail;
-    } send_queue;
-    /* this is a count of how many sc objects refer to this vc */
-    int sc_ref_count;
-} MPID_nem_newtcp_module_vc_area;
-
-/* accessor macro to private fields in VC */
-#define VC_FIELD(vc, field) (((MPID_nem_newtcp_module_vc_area *)((MPIDI_CH3I_VC *)(vc)->channel_private)->netmod_area.padding)->field)
-
-#define ASSIGN_SC_TO_VC(vc_, sc_) do {      \
-        VC_FIELD((vc_), sc) = (sc_);        \
-    } while (0)
-
-/* functions */
-int MPID_nem_newtcp_module_init (MPID_nem_queue_ptr_t proc_recv_queue, 
-                                 MPID_nem_queue_ptr_t proc_free_queue, 
-                                 MPID_nem_cell_ptr_t proc_elements,   int num_proc_elements,
-                                 MPID_nem_cell_ptr_t module_elements, int num_module_elements, 
-                                 MPID_nem_queue_ptr_t *module_free_queue, int ckpt_restart,
-                                 MPIDI_PG_t *pg_p, int pg_rank,
-                                 char **bc_val_p, int *val_max_sz_p);
-int MPID_nem_newtcp_module_finalize (void);
-int MPID_nem_newtcp_module_ckpt_shutdown (void);
-int MPID_nem_newtcp_module_poll (MPID_nem_poll_dir_t in_or_out);
-int MPID_nem_newtcp_module_send (MPIDI_VC_t *vc, MPID_nem_cell_ptr_t cell, int datalen);
-int MPID_nem_newtcp_module_get_business_card (int my_rank, char **bc_val_p, int *val_max_sz_p);
-int MPID_nem_newtcp_module_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc);
-int MPID_nem_newtcp_module_vc_init (MPIDI_VC_t *vc);
-int MPID_nem_newtcp_module_vc_destroy(MPIDI_VC_t *vc);
-int MPID_nem_newtcp_module_vc_terminate (MPIDI_VC_t *vc);
-
-/* completion counter is atomically decremented when operation completes */
-int MPID_nem_newtcp_module_get (void *target_p, void *source_p, int source_node, int len, int *completion_ctr);
-int MPID_nem_newtcp_module_put (void *target_p, int target_node, void *source_p, int len, int *completion_ctr);
-
-int MPID_nem_newtcp_module_send_init (void);
-int MPID_nem_newtcp_module_send_queued (MPIDI_VC_t *vc);
-int MPID_nem_newtcp_module_poll_init (void);
-int MPID_nem_newtcp_module_connect (struct MPIDI_VC *const vc);
-int MPID_nem_newtcp_module_conn_wr_enable (struct MPIDI_VC *const vc);
-int MPID_nem_newtcp_module_conn_wr_disable (struct MPIDI_VC *const vc);
-int MPID_nem_newtcp_module_connpoll (void);
-int MPID_nem_newtcp_module_sm_init (void);
-int MPID_nem_newtcp_module_sm_finalize (void);
-int MPID_nem_newtcp_module_set_sockopts (int fd);
-MPID_NEM_NEWTCP_MODULE_SOCK_STATUS_t MPID_nem_newtcp_module_check_sock_status(MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd);
-int MPID_nem_newtcp_module_poll_finalize (void);
-int MPID_nem_newtcp_module_send_finalize (void);
-int MPID_nem_newtcp_module_bind (int sockfd);
-int MPID_nem_newtcp_module_recv_handler (MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd, sockconn_t *sc);
-int MPID_nem_newtcp_module_conn_est (MPIDI_VC_t *vc);
-int MPID_nem_newtcp_module_get_conninfo (struct MPIDI_VC *vc, struct sockaddr_in *addr, char **pg_id, int *pg_rank);
-int MPID_nem_newtcp_module_get_vc_from_conninfo (char *pg_id, int pg_rank, struct MPIDI_VC **vc);
-int MPID_nem_newtcp_module_is_sock_connected(int fd);
-int MPID_nem_newtcp_module_disconnect (struct MPIDI_VC *const vc);
-int MPID_nem_newtcp_module_cleanup (struct MPIDI_VC *const vc);
-int MPID_nem_newtcp_module_state_listening_handler(MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd, sockconn_t *const l_sc);
-
-int MPID_nem_newtcp_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz);
-int MPID_nem_newtcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz,
-                                    MPID_Request **sreq_ptr);
-int MPID_nem_newtcp_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz);
-
-/* Macros */
-
-/* system call wrapper -- This retries the syscall each time it is interrupted.  
-   Example usage:  instead of writing "ret = write(fd, buf, len);" 
-   use: "CHECK_EINTR(ret, write(fd, buf, len)); 
- Caution:
- (1) Some of the system calls have value-result parameters. Those system calls
- should not be used within CHECK_EINTR macro or should be used with CARE.
- For eg. accept, the last parameter (addrlen) is a value-result one. So, even if the
- system call is interrupted, addrlen should be initialized to appropriate value before
- calling it again.
-
- (2) connect should not be called within a loop. In case, the connect is interrupted after
- the TCP handshake is initiated, calling connect again will only fail. So, select/poll
- should be called to check the status of the socket.
- I don't know what will happen, if a connect is interrupted even before the system call
- tries to initiate TCP handshake. No book/manual doesn't seem to explain this scenario.
-*/
-/* CHECK_EINTR is now deprecated. Use MPIU_OSW_RETRYON_EINTR() instead*/
-#define CHECK_EINTR(var, func) do {             \
-        (var) = (func);                         \
-    } while ((var) == -1 && errno == EINTR)
-
-/* Send queue macros */
-#define Q_EMPTY(q) GENERIC_Q_EMPTY (q)
-#define Q_HEAD(q) GENERIC_Q_HEAD (q)
-#define Q_ENQUEUE_EMPTY(qp, ep) GENERIC_Q_ENQUEUE_EMPTY (qp, ep, next)
-#define Q_ENQUEUE(qp, ep) GENERIC_Q_ENQUEUE (qp, ep, next)
-#define Q_ENQUEUE_EMPTY_MULTIPLE(qp, ep0, ep1) GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE (qp, ep0, ep1, next)
-#define Q_ENQUEUE_MULTIPLE(qp, ep0, ep1) GENERIC_Q_ENQUEUE_MULTIPLE (qp, ep0, ep1, next)
-#define Q_DEQUEUE(qp, ep) GENERIC_Q_DEQUEUE (qp, ep, next)
-#define Q_REMOVE_ELEMENTS(qp, ep0, ep1) GENERIC_Q_REMOVE_ELEMENTS (qp, ep0, ep1, next)
-
-/* VC list macros */
-#define VC_L_EMPTY(q) GENERIC_L_EMPTY (q)
-#define VC_L_HEAD(q) GENERIC_L_HEAD (q)
-
-/* stack macros */
-#define S_EMPTY(s) GENERIC_S_EMPTY (s)
-#define S_TOP(s) GENERIC_S_TOP (s)
-#define S_PUSH(sp, ep) GENERIC_S_PUSH (sp, ep, next)
-#define S_PUSH_MULTIPLE(sp, ep0, ep1) GENERIC_S_PUSH_MULTIPLE (sp, ep0, ep1, next)
-#define S_POP(sp, ep) GENERIC_S_POP (sp, ep, next)
-
-#endif /* NEWTCP_MODULE_IMPL_H */

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_init.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_init.c	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_init.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,699 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "wintcp_module_impl.h"
-#ifdef HAVE_SYS_TYPES_H
-    #include <sys/types.h>
-#endif
-#ifdef HAVE_SYS_SOCKET_H
-    #include <sys/socket.h>
-#endif
-#ifdef HAVE_ARPA_INET_H
-    #include <arpa/inet.h>
-#endif
-
-/*S
-  MPIDU_Sock_ifaddr_t - Structure to hold an Internet address.
-
-+ len - Length of the address.  4 for IPv4, 16 for IPv6.
-- ifaddr - Address bytes (as bytes, not characters)
-
-S*/
-typedef struct MPIDU_Sock_ifaddr_t {
-    int len, type;
-    unsigned char ifaddr[16];
-} MPIDU_Sock_ifaddr_t;
-
-
-MPID_nem_queue_ptr_t MPID_nem_newtcp_module_free_queue = 0;
-MPID_nem_queue_ptr_t MPID_nem_process_recv_queue = 0;
-MPID_nem_queue_ptr_t MPID_nem_process_free_queue = 0;
-extern sockconn_t MPID_nem_newtcp_module_g_lstn_sc;
-extern pollfd_t g_lstn_plfd;
-
-static MPID_nem_queue_t _free_queue;
-
-static int dbg_ifname = 0;
-
-static int get_addr_port_from_bc (const char *business_card, struct in_addr *addr, in_port_t *port);
-static int GetIPInterface( MPIDU_Sock_ifaddr_t *, int * );
-
-MPID_nem_netmod_funcs_t MPIDI_nem_newtcp_module_funcs = {
-    MPID_nem_newtcp_module_init,
-    MPID_nem_newtcp_module_finalize,
-    MPID_nem_newtcp_module_ckpt_shutdown,
-    MPID_nem_newtcp_module_poll,
-    MPID_nem_newtcp_module_send,
-    MPID_nem_newtcp_module_get_business_card,
-    MPID_nem_newtcp_module_connect_to_root,
-    MPID_nem_newtcp_module_vc_init,
-    MPID_nem_newtcp_module_vc_destroy,
-    MPID_nem_newtcp_module_vc_terminate
-};
-
-#define MPIDI_CH3I_PORT_KEY "port"
-#define MPIDI_CH3I_HOST_DESCRIPTION_KEY "description"
-#define MPIDI_CH3I_IFNAME_KEY "ifname"
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_init
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_init (MPID_nem_queue_ptr_t proc_recv_queue, MPID_nem_queue_ptr_t proc_free_queue,
-                                 MPID_nem_cell_ptr_t proc_elements, int num_proc_elements, MPID_nem_cell_ptr_t module_elements,
-                                 int num_module_elements, MPID_nem_queue_ptr_t *module_free_queue,
-                                 int ckpt_restart, MPIDI_PG_t *pg_p, int pg_rank, char **bc_val_p, int *val_max_sz_p)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int ret;
-    int i;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_INIT);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_INIT);
-
-    /* first make sure that our private fields in the vc fit into the area provided  */
-    MPIU_Assert(sizeof(MPID_nem_newtcp_module_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN);
-    
-    /* set up listener socket */
-    mpi_errno = MPIU_SOCKW_Init();
-    if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-
-    mpi_errno = MPIU_SOCKW_Sock_open(AF_INET, SOCK_STREAM, IPPROTO_TCP, &(MPID_nem_newtcp_module_g_lstn_sc.fd));
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    mpi_errno = MPID_nem_newtcp_module_set_sockopts(MPID_nem_newtcp_module_g_lstn_sc.fd);
-    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
-    mpi_errno = MPID_nem_newtcp_module_bind (MPID_nem_newtcp_module_g_lstn_sc.fd);
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
-    mpi_errno = MPIU_SOCKW_Listen(MPID_nem_newtcp_module_g_lstn_sc.fd, SOMAXCONN);
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    MPID_nem_newtcp_module_g_lstn_sc.state.lstate = LISTEN_STATE_LISTENING;
-    MPID_nem_newtcp_module_g_lstn_sc.handler = MPID_nem_newtcp_module_state_listening_handler;
-
-    /* create business card */
-    mpi_errno = MPID_nem_newtcp_module_get_business_card (pg_rank, bc_val_p, val_max_sz_p);
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
-    /* save references to queues */
-    MPID_nem_process_recv_queue = proc_recv_queue;
-    MPID_nem_process_free_queue = proc_free_queue;
-
-    MPID_nem_newtcp_module_free_queue = &_free_queue;
-
-    /* set up network module queues */
-    MPID_nem_queue_init (MPID_nem_newtcp_module_free_queue);
-
-    for (i = 0; i < num_module_elements; ++i)
-    {
-        MPID_nem_queue_enqueue (MPID_nem_newtcp_module_free_queue, &module_elements[i]);
-    }
-
-    *module_free_queue = MPID_nem_newtcp_module_free_queue;
-
-    /* FIXME: Why happens on an error ? */
-
-    mpi_errno = MPID_nem_newtcp_module_sm_init();
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    mpi_errno = MPID_nem_newtcp_module_send_init();
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    mpi_errno = MPID_nem_newtcp_module_poll_init();
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_INIT);
-    return mpi_errno;
- fn_fail:
-/*     fprintf(stdout, "failure. mpi_errno = %d\n", mpi_errno); */
-    goto fn_exit;
-}
-
-/*
- * Get a description of the network interface to use for socket communication
- *
- * Here are the steps.  This order of checks is used to provide the 
- * user control over the choice of interface and to avoid, where possible,
- * the use of non-scalable services, such as centeralized name servers.
- *
- * MPICH_INTERFACE_HOSTNAME
- * MPICH_INTERFACE_HOSTNAME_R%d
- * a single (non-localhost) available IP address, if possible
- * gethostbyname(gethostname())
- *
- * We return the following items:
- *
- *    ifname - name of the interface.  This may or may not be the same
- *             as the name returned by gethostname  (in Unix)
- *    ifaddr - This structure includes the interface IP address (as bytes),
- *             and the type (e.g., AF_INET or AF_INET6).  Only 
- *             ipv4 (AF_INET) is used so far.
- */
-
-static int GetSockInterfaceAddr(int myRank, char *ifname, int maxIfname,
-                                MPIDU_Sock_ifaddr_t *ifaddr)
-{
-    char *ifname_string;
-    int mpi_errno = MPI_SUCCESS;
-    int ifaddrFound = 0;
-
-    /* Set "not found" for ifaddr */
-    ifaddr->len = 0;
-
-    /* Check for the name supplied through an environment variable */
-    ifname_string = getenv("MPICH_INTERFACE_HOSTNAME");
-    if (!ifname_string) {
-	/* See if there is a per-process name for the interfaces (e.g.,
-	   the process manager only delievers the same values for the 
-	   environment to each process */
-	char namebuf[1024];
-	MPIU_Snprintf( namebuf, sizeof(namebuf), 
-		       "MPICH_INTERFACE_HOSTNAME_R%d", myRank );
-	ifname_string = getenv( namebuf );
-	if (dbg_ifname && ifname_string) {
-	    fprintf( stdout, "Found interface name %s from %s\n", 
-		    ifname_string, namebuf );
-	    fflush( stdout );
-	}
-    }
-    else if (dbg_ifname) {
-	fprintf( stdout, 
-		 "Found interface name %s from MPICH_INTERFACE_HOSTNAME\n", 
-		 ifname_string );
-	fflush( stdout );
-    }
-	 
-    if (!ifname_string) {
-	int len;
-
-	/* If we have nothing, then use the host name */
-	mpi_errno = MPID_Get_processor_name(ifname, maxIfname, &len );
-	ifname_string = ifname;
-
-	/* If we didn't find a specific name, then try to get an IP address
-	   directly from the available interfaces, if that is supported on
-	   this platform.  Otherwise, we'll drop into the next step that uses 
-	   the ifname */
-	mpi_errno = GetIPInterface( ifaddr, &ifaddrFound );
-    }
-    else {
-	/* Copy this name into the output name */
-	MPIU_Strncpy( ifname, ifname_string, maxIfname );
-    }
-
-    /* If we don't have an IP address, try to get it from the name */
-    if (!ifaddrFound) {
-	struct hostent *info;
-	info = gethostbyname( ifname_string );
-	if (info && info->h_addr_list) {
-	    /* Use the primary address */
-	    ifaddr->len  = info->h_length;
-	    ifaddr->type = info->h_addrtype;
-	    if (ifaddr->len > sizeof(ifaddr->ifaddr)) {
-		/* If the address won't fit in the field, reset to
-		   no address */
-		ifaddr->len = 0;
-		ifaddr->type = -1;
-	    }
-	    else
-		memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len );
-	}
-    }
-
-    return 0;
-}
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_get_business_card
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_get_business_card (int my_rank, char **bc_val_p, int *val_max_sz_p)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDU_Sock_ifaddr_t ifaddr;
-    char ifname[MAX_HOST_DESCRIPTION_LEN];
-    int ret;
-    struct sockaddr_in sock_id;
-    socklen_t len;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_GET_BUSINESS_CARD);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_GET_BUSINESS_CARD);
-    
-    mpi_errno = GetSockInterfaceAddr(my_rank, ifname, sizeof(ifname), &ifaddr);
-    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    
-    
-    mpi_errno = MPIU_Str_add_string_arg(bc_val_p, val_max_sz_p, MPIDI_CH3I_HOST_DESCRIPTION_KEY, ifname);
-    if (mpi_errno != MPIU_STR_SUCCESS)
-    {
-        if (mpi_errno == MPIU_STR_NOMEM)
-        {
-            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard_len");
-        }
-        else
-        {
-            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
-        }
-    }
-
-    len = sizeof(sock_id);
-    ret = getsockname (MPID_nem_newtcp_module_g_lstn_sc.fd, (struct sockaddr *)&sock_id, &len);
-    MPIU_ERR_CHKANDJUMP1 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**getsockname", "**getsockname %s", strerror (errno));
-
-    mpi_errno = MPIU_Str_add_int_arg (bc_val_p, val_max_sz_p, MPIDI_CH3I_PORT_KEY, sock_id.sin_port);
-    if (mpi_errno != MPIU_STR_SUCCESS)
-    {
-        if (mpi_errno == MPIU_STR_NOMEM)
-        {
-            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard_len");
-        }
-        else
-        {
-            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
-        }
-    }
-
-    {
-	char ifname[256];
-	unsigned char *p;
-	if (ifaddr.len > 0 && ifaddr.type == AF_INET)
-        {
-	    p = (unsigned char *)(ifaddr.ifaddr);
-	    MPIU_Snprintf( ifname, sizeof(ifname), "%u.%u.%u.%u", p[0], p[1], p[2], p[3] );
-	    MPIU_DBG_MSG_S(CH3_CONNECT,VERBOSE,"ifname = %s",ifname );
-	    mpi_errno = MPIU_Str_add_string_arg(bc_val_p, val_max_sz_p, MPIDI_CH3I_IFNAME_KEY, ifname);
-	    if (mpi_errno != MPIU_STR_SUCCESS)
-            {
-		if (mpi_errno == MPIU_STR_NOMEM)
-                {
-		    MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard_len");
-		}
-		else
-                {
-		    MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**buscard");
-		}
-	    }
-	}
-    }
-
-    /*     printf("MPID_nem_newtcp_module_get_business_card. port=%d\n", sock_id.sin_port); */
-
- fn_exit:
-/*     fprintf(stdout, "MPID_nem_newtcp_module_get_business_card Exit, mpi_errno=%d\n", mpi_errno); fflush(stdout); */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_GET_BUSINESS_CARD);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_connect_to_root
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc)
-{
-    int mpi_errno = MPI_SUCCESS;
-    struct in_addr addr;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONNECT_TO_ROOT);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONNECT_TO_ROOT);
-
-    /* vc is already allocated before reaching this point */
-
-    mpi_errno = MPID_nem_newtcp_module_get_addr_port_from_bc(business_card, &addr, &(VC_FIELD(new_vc, sock_id).sin_port));
-    VC_FIELD(new_vc, sock_id).sin_addr.s_addr = addr.s_addr;
-    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
-    mpi_errno = MPIDI_GetTagFromPort(business_card, &new_vc->port_name_tag);
-    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    MPID_nem_newtcp_module_connect(new_vc);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONNECT_TO_ROOT);
-    return mpi_errno;
-
- fn_fail:
-    goto fn_exit;}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_vc_init
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_vc_init (MPIDI_VC_t *vc)
-{
-    int mpi_errno = MPI_SUCCESS;
-    struct in_addr addr;
-    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_VC_INIT);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_VC_INIT);
-
-    vc_ch->state = MPID_NEM_NEWTCP_MODULE_VC_STATE_DISCONNECTED;
-    
-    vc->sendNoncontig_fn      = MPID_nem_newtcp_SendNoncontig;
-    vc_ch->iStartContigMsg    = MPID_nem_newtcp_iStartContigMsg;
-    vc_ch->iSendContig        = MPID_nem_newtcp_iSendContig;
-    memset(&VC_FIELD(vc, sock_id), 0, sizeof(VC_FIELD(vc, sock_id)));
-    VC_FIELD(vc, sock_id).sin_family = AF_INET;
-    
-    vc_ch->next = NULL;
-    vc_ch->prev = NULL;
-    VC_FIELD(vc, sc) = NULL;
-    VC_FIELD(vc, sc_ref_count) = 0; 
-    VC_FIELD(vc, send_queue).head = VC_FIELD(vc, send_queue).tail = NULL;
-    
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_VC_INIT);
-    return mpi_errno;
- fn_fail:
-    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_vc_destroy
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_vc_destroy(MPIDI_VC_t *vc)
-{
-    int mpi_errno = MPI_SUCCESS;   
-
-    /* free any resources associated with this VC here */
-
- fn_exit:   
-       return mpi_errno;
- fn_fail:
-       goto fn_exit;
-}
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_get_addr_port_from_bc
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_get_addr_port_from_bc (const char *business_card, struct in_addr *addr, in_port_t *port)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int ret;
-    char desc_str[256];
-    char ifname[256];
-    MPIDI_STATE_DECL(MPID_STATE_NEWTCP_MODULE_GET_ADDR_PORT_FROM_BC);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_NEWTCP_MODULE_GET_ADDR_PORT_FROM_BC);
-    
-    /* desc_str is only used for debugging
-    ret = MPIU_Str_get_string_arg (business_card, MPIDI_CH3I_HOST_DESCRIPTION_KEY, desc_str, sizeof(desc_str));
-    MPIU_ERR_CHKANDJUMP (ret != MPIU_STR_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost");
-    */
-
-    mpi_errno = MPIU_Str_get_int_arg (business_card, MPIDI_CH3I_PORT_KEY, (int *)port);
-    MPIU_ERR_CHKANDJUMP (mpi_errno != MPIU_STR_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**argstr_missingport");
-    /*     fprintf(stdout, "get_addr_port_from_bc buscard=%s  desc=%s port=%d\n",business_card, desc_str, *port); fflush(stdout); */
-
-    ret = MPIU_Str_get_string_arg(business_card, MPIDI_CH3I_IFNAME_KEY, ifname, sizeof(ifname));
-    MPIU_ERR_CHKANDJUMP (ret != MPIU_STR_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**argstr_missingifname");
-	
-    /*
-    ret = inet_pton (AF_INET, (const char *)ifname, addr);
-    MPIU_ERR_CHKANDJUMP(ret == 0, mpi_errno,MPI_ERR_OTHER,"**ifnameinvalid");
-    MPIU_ERR_CHKANDJUMP(ret < 0, mpi_errno, MPI_ERR_OTHER, "**afinetinvalid");
-    */
-    mpi_errno = MPIU_SOCKW_Inet_addr(ifname, &(addr->s_addr));
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_NEWTCP_MODULE_GET_ADDR_PORT_FROM_BC);
-    return mpi_errno;
- fn_fail:
-/*     fprintf(stdout, "failure. mpi_errno = %d\n", mpi_errno); */
-    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
-    goto fn_exit;
-}
-
-/* MPID_nem_newtcp_module_bind -- if MPICH_PORT_RANGE is set, this
-   binds the socket to an available port number in the range.
-   Otherwise, it binds it to any addr and any port */
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_bind
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_bind (int sockfd)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int ret;
-    struct sockaddr_in sin;
-    int port, low_port, high_port;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_BIND);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_BIND);
-   
-    low_port = 0;
-    high_port = 0;
-
-    MPIU_GetEnvRange( "MPICH_PORT_RANGE", &low_port, &high_port );
-    MPIU_ERR_CHKANDJUMP (low_port < 0 || low_port > high_port, mpi_errno, MPI_ERR_OTHER, "**badportrange");
-
-    memset((void *)&sin, 0, sizeof(sin));
-    sin.sin_family      = AF_INET;
-    sin.sin_addr.s_addr = htonl(INADDR_ANY);
-
-    mpi_errno = MPIU_SOCKW_Bind_port_range(sockfd, &sin, low_port, high_port);
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
- fn_exit:
-/*     if (ret == 0) */
-/*         fprintf(stdout, "sockfd=%d  port=%d bound\n", sockfd, port); */
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_BIND);
-    return mpi_errno;
- fn_fail:
-    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_vc_terminate
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_vc_terminate (MPIDI_VC_t *vc)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_NEM_NEWTCP_MODULE_VC_TERMINATE);
-
-    MPIDI_FUNC_ENTER(MPID_NEM_NEWTCP_MODULE_VC_TERMINATE);
-
-    mpi_errno = MPID_nem_newtcp_module_cleanup(vc);
-    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_NEM_NEWTCP_MODULE_VC_TERMINATE);
-    return mpi_errno;
- fn_fail:
-    MPIU_DBG_MSG_FMT(NEM_SOCK_DET, VERBOSE, (MPIU_DBG_FDEST, "failure. mpi_errno = %d", mpi_errno));
-    goto fn_exit;
-}
-
-
-/* These includes are here because they're used just for getting the interface
- *   names
- */
-
-
-#include <sys/types.h>
-
-#ifdef HAVE_SYS_SOCKET_H
-#include <sys/socket.h>
-#endif
-#ifdef HAVE_NET_IF_H
-#include <net/if.h>
-#endif
-#ifdef HAVE_SYS_SOCKIO_H
-/* Needed for SIOCGIFCONF */
-#include <sys/sockio.h>
-#endif
-
-#if defined(SIOCGIFCONF) && defined(HAVE_STRUCT_IFCONF)
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-
-/* We can only access the interfaces if we have a number of features.
-   Test for these, otherwise define this routine to return false in the
-   "found" variable */
-
-#define NUM_IFREQS 10
-
-static int GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
-{
-    char *buf_ptr, *ptr;
-    int buf_len, buf_len_prev;
-    int fd;
-    MPIDU_Sock_ifaddr_t myifaddr;
-    int nfound = 0, foundLocalhost = 0;
-    /* We predefine the LSB and MSB localhost addresses */
-    unsigned int localhost = 0x0100007f;
-#ifdef WORDS_BIGENDIAN
-    unsigned int MSBlocalhost = 0x7f000001;
-#endif
-
-    fd = socket(AF_INET, SOCK_DGRAM, 0);
-    if (fd < 0) {
-	fprintf( stderr, "Unable to open an AF_INET socket\n" );
-	return 1;
-    }
-
-    /* Use MSB localhost if necessary */
-#ifdef WORDS_BIGENDIAN
-    localhost = MSBlocalhost;
-#endif
-    
-
-    /*
-     * Obtain the interface information from the operating system
-     *
-     * Note: much of this code is borrowed from W. Richard Stevens' book
-     * entitled "UNIX Network Programming", Volume 1, Second Edition.  See
-     * section 16.6 for details.
-     */
-    buf_len = NUM_IFREQS * sizeof(struct ifreq);
-    buf_len_prev = 0;
-
-    for(;;)
-    {
-	struct ifconf			ifconf;
-	int				rc;
-
-	buf_ptr = (char *) MPIU_Malloc(buf_len);
-	if (buf_ptr == NULL) {
-	    fprintf( stderr, "Unable to allocate %d bytes\n", buf_len );
-	    return 1;
-	}
-	
-	ifconf.ifc_buf = buf_ptr;
-	ifconf.ifc_len = buf_len;
-
-	rc = ioctl(fd, SIOCGIFCONF, &ifconf);
-	if (rc < 0) {
-	    if (errno != EINVAL || buf_len_prev != 0) {
-		fprintf( stderr, "Error from ioctl = %d\n", errno );
-		perror(" Error is: ");
-		return 1;
-	    }
-	}
-        else {
-	    if (ifconf.ifc_len == buf_len_prev) {
-		buf_len = ifconf.ifc_len;
-		break;
-	    }
-
-	    buf_len_prev = ifconf.ifc_len;
-	}
-	
-	MPIU_Free(buf_ptr);
-	buf_len += NUM_IFREQS * sizeof(struct ifreq);
-    }
-	
-    /*
-     * Now that we've got the interface information, we need to run through
-     * the interfaces and check out the ip addresses.  If we find a
-     * unique, non-lcoal host (127.0.0.1) address, return that, otherwise
-     * return nothing.
-     */
-    ptr = buf_ptr;
-
-    while(ptr < buf_ptr + buf_len) {
-	struct ifreq *			ifreq;
-
-	ifreq = (struct ifreq *) ptr;
-
-	if (dbg_ifname) {
-	    fprintf( stdout, "%10s\t", ifreq->ifr_name );
-	}
-	
-	if (ifreq->ifr_addr.sa_family == AF_INET) {
-	    struct in_addr		addr;
-
-	    addr = ((struct sockaddr_in *) &(ifreq->ifr_addr))->sin_addr;
-	    if (dbg_ifname) {
-		fprintf( stdout, "IPv4 address = %08x (%s)\n", addr.s_addr, 
-			 inet_ntoa( addr ) );
-	    }
-
-	    if (addr.s_addr == localhost && dbg_ifname) {
-		fprintf( stdout, "Found local host\n" );
-	    }
-	    /* Save localhost if we find it.  Let any new interface 
-	       overwrite localhost.  However, if we find more than 
-	       one non-localhost interface, then we'll choose none for the 
-	       interfaces */
-	    if (addr.s_addr == localhost) {
-		foundLocalhost = 1;
-		if (nfound == 0) {
-		    myifaddr.type = AF_INET;
-		    myifaddr.len  = 4;
-		    memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
-		}
-	    }
-	    else {
-		nfound++;
-		myifaddr.type = AF_INET;
-		myifaddr.len  = 4;
-		memcpy( myifaddr.ifaddr, &addr.s_addr, 4 );
-	    }
-	}
-	else {
-	    if (dbg_ifname) {
-		fprintf( stdout, "\n" );
-	    }
-	}
-
-	/*
-	 *  Increment pointer to the next ifreq; some adjustment may be
-	 *  required if the address is an IPv6 address
-	 */
-	/* This is needed for MAX OSX */
-#ifdef _SIZEOF_ADDR_IFREQ
-	ptr += _SIZEOF_ADDR_IFREQ(*ifreq);
-#else
-	ptr += sizeof(struct ifreq);
-	
-#	if defined(AF_INET6)
-	{
-	    if (ifreq->ifr_addr.sa_family == AF_INET6)
-	    {
-		ptr += sizeof(struct sockaddr_in6) - sizeof(struct sockaddr);
-	    }
-	}
-#	endif
-#endif
-    }
-
-    MPIU_Free(buf_ptr);
-    close(fd);
-    
-    /* If we found a unique address, use that */
-    if (nfound == 1 || (nfound == 0 && foundLocalhost == 1)) {
-	*ifaddr = myifaddr;
-	*found  = 1;
-    }
-    else {
-	*found  = 0;
-    }
-
-    return 0;
-}
-
-#else /* things needed to find the interfaces */
-
-/* In this case, just return false for interfaces found */
-static int GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found )
-{
-    *found = 0;
-    return 0;
-}
-#endif

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_lmt.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_lmt.c	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_lmt.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,565 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "wintcp_module_impl.h"
-extern int h_errno;
-
-static int create_s_cookie (int data_sz, char **cookie, int *len);
-static int read_s_cookie (MPID_IOV cookie, int *data_sz);
-static int create_r_cookie (char *hostname, int port, int data_sz, char **cookie, int *len);
-static int read_r_cookie (MPID_IOV cookie, char **hostname, int *port, int *data_sz);
-static void free_cookie (void *c);
-static int set_sockopts (int fd);
-
-//#define TESTING_CHUNKING
-#ifdef TESTING_CHUNKING
-#define CHUNK 6299651//(32*1024)
-#endif
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_lmt_pre_send
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_tcp_module_lmt_pre_send (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV *cookie)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int len;
-    MPIDI_msg_sz_t data_sz;
-    int dt_contig;
-    MPI_Aint dt_true_lb;
-    MPID_Datatype * dt_ptr;
-    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_SEND);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_SEND);
-
-    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
-
-    mpi_errno = create_s_cookie (data_sz, &vc_ch->net.tcp.lmt_cookie, &len);
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
-    cookie->MPID_IOV_BUF = vc_ch->net.tcp.lmt_cookie;
-    cookie->MPID_IOV_LEN = len;
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_SEND);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_lmt_pre_recv
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_tcp_module_lmt_pre_recv (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV s_cookie, MPID_IOV *r_cookie, int *send_cts)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int ret;
-    unsigned int len;
-    struct sockaddr_in saddr;
-    MPIDI_msg_sz_t data_sz;
-    int dt_contig;
-    MPI_Aint dt_true_lb;
-    MPID_Datatype * dt_ptr;
-    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_RECV);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_RECV);
-
-    mpi_errno = read_s_cookie (s_cookie, &vc_ch->net.tcp.lmt_s_len);
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
-    memset (&saddr, sizeof(saddr), 0);
-
-    if (!vc_ch->net.tcp.lmt_connected)
-    {
-        vc_ch->net.tcp.lmt_desc = socket (AF_INET, SOCK_STREAM, 0);
-        MPIU_ERR_CHKANDJUMP2 (vc_ch->net.tcp.lmt_desc == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", strerror (errno), errno);
-
-        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
-        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-
-        saddr.sin_family      = AF_INET;
-        saddr.sin_addr.s_addr = htonl (INADDR_ANY);
-        saddr.sin_port        = htons (0);
-
-        ret = bind (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, sizeof (saddr));
-        MPIU_ERR_CHKANDJUMP3 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|bind", "**sock|poll|bind %d %d %s", ntohs (saddr.sin_port), errno, strerror (errno));
-
-        len = sizeof (saddr);
-        ret = getsockname (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, &len);
-        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-
-        set_sockopts (vc_ch->net.tcp.lmt_desc);
-
-        ret = listen (vc_ch->net.tcp.lmt_desc, SOMAXCONN);
-        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**listen", "**listen %s %d", errno, strerror (errno));
-    }
-
-    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
-
-    mpi_errno = create_r_cookie (MPID_nem_hostname, ntohs (saddr.sin_port), data_sz, &vc_ch->net.tcp.lmt_cookie, &len);
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
-    r_cookie->MPID_IOV_BUF = vc_ch->net.tcp.lmt_cookie;
-    r_cookie->MPID_IOV_LEN = len;
-
-    *send_cts = 1;
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_PRE_RECV);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_lmt_start_send
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_tcp_module_lmt_start_send (MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV r_cookie)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int ret;
-    MPIDI_msg_sz_t data_sz;
-    int dt_contig;
-    MPI_Aint dt_true_lb;
-    MPID_Datatype * dt_ptr;
-    MPIDI_msg_sz_t last;
-    int nb;
-    int s_len = 0;
-    int r_len;
-    int r_port;
-    char *r_hostname;
-    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);
-
-    mpi_errno = read_r_cookie (r_cookie, &r_hostname, &r_port, &r_len);
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
-    free_cookie (vc_ch->net.tcp.lmt_cookie);
-
-    if (!vc_ch->net.tcp.lmt_connected)
-    {
-        struct sockaddr_in saddr;
-        struct hostent *hp;
-
-        vc_ch->net.tcp.lmt_desc = socket (AF_INET, SOCK_STREAM, 0);
-        MPIU_ERR_CHKANDJUMP2 (vc_ch->net.tcp.lmt_desc == -1, mpi_errno, MPI_ERR_OTHER, "**sock_create", "**sock_create %s %d", strerror (errno), errno);
-
-        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
-        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-
-        hp = gethostbyname (r_hostname);
-        MPIU_ERR_CHKANDJUMP2 (hp == NULL, mpi_errno, MPI_ERR_OTHER, "**gethostbyname", "**gethostbyname %s %d", hstrerror (h_errno), h_errno);
-
-        memset (&saddr, sizeof(saddr), 0);
-        saddr.sin_family = AF_INET;
-        saddr.sin_port   = htons (r_port);
-        MPID_NEM_MEMCPY (&saddr.sin_addr, hp->h_addr, hp->h_length);
-
-        set_sockopts (vc_ch->net.tcp.lmt_desc);
-
-        ret = connect (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, sizeof(saddr));
-        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-
-        vc_ch->net.tcp.lmt_connected = 1;
-    }
-
-    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
-
-    if (r_len < data_sz)
-    {
-        /* message will be truncated */
-        s_len = data_sz;
-        data_sz = r_len;
- 	req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", s_len, r_len);
-    }
-
-    MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0);
-    req->dev.segment_first = 0;
-    req->dev.segment_size = data_sz;
-    req->dev.iov_count = MPID_IOV_LIMIT;
-    req->dev.iov_offset = 0;
-    last = data_sz;
-
-    do
-    {
-        int iov_offset;
-        int left_to_send;
-        MPID_Segment_pack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count);
-
-        left_to_send = last - req->dev.segment_first;
-        iov_offset = 0;
-
-#ifdef TESTING_CHUNKING
-        {
-            char *buf = req->dev.iov[0].MPID_IOV_BUF;
-            int l;
-            while (left_to_send)
-            {
-                if (left_to_send > CHUNK)
-                    l = CHUNK;
-                else
-                    l = left_to_send;
-
-                do
-                    nb = write (vc_ch->net.tcp.lmt_desc, buf, l);
-                while (nb == -1 && errno == EINTR);
-                MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
-
-                left_to_send -= nb;
-                buf += nb;
-            }
-
-            MPIDI_CH3U_Request_complete (req);
-            goto fn_exit;
-        }
-#endif
-
-        do
-            nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
-        while (nb == -1 && errno == EINTR);
-        MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
-
-        left_to_send -= nb;
-        while (left_to_send)
-        { /* send rest of iov */
-            while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN)
-            { /* update iov to reflect sent bytes */
-                nb -= req->dev.iov[iov_offset].MPID_IOV_LEN;
-                ++iov_offset;
-            }
-            req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb;
-            req->dev.iov[iov_offset].MPID_IOV_LEN -= nb;
-
-            do
-                nb = writev (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
-            while (nb == -1 && errno == EINTR);
-            MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
-            left_to_send -= nb;
-        }
-    }
-    while (last < data_sz);
-
-    MPIDI_CH3U_Request_complete (req);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_SEND);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_lmt_start_recv
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_tcp_module_lmt_start_recv (MPIDI_VC_t *vc, MPID_Request *req)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int ret;
-    MPIDI_msg_sz_t data_sz;
-    int dt_contig;
-    MPI_Aint dt_true_lb;
-    MPID_Datatype * dt_ptr;
-    MPIDI_msg_sz_t last;
-    int nb;
-    int r_len;
-    MPIDI_CH3I_VC *vc_ch = (MPIDI_CH3I_VC *)vc->channel_private;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);
-
-    free_cookie (vc_ch->net.tcp.lmt_cookie);
-
-    if (!vc_ch->net.tcp.lmt_connected)
-    {
-        int len;
-        struct sockaddr_in saddr;
-        int connfd;
-
-        len = sizeof (saddr);
-        connfd = accept (vc_ch->net.tcp.lmt_desc, (struct sockaddr *)&saddr, &len);
-        MPIU_ERR_CHKANDJUMP2 (connfd == -1, mpi_errno, MPI_ERR_OTHER, "**sock|poll|accept", "**sock|poll|accept %d %s", errno, strerror (errno));
-
-        /* close listen fd */
-        do
-            ret = close (vc_ch->net.tcp.lmt_desc);
-        while (ret == -1 && errno == EINTR);
-        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**closesocket", "**closesocket %s %d", strerror (errno), errno);
-
-        /* set lmt_desc to new connected fd */
-        vc_ch->net.tcp.lmt_desc = connfd;
-        vc_ch->net.tcp.lmt_connected = 1;
-
-        //        ret = fcntl (vc_ch->net.tcp.lmt_desc, F_SETFL, O_NONBLOCK);
-        //        MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-    }
-
-    MPIDI_Datatype_get_info (req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
-
-    if (data_sz > vc_ch->net.tcp.lmt_s_len)
-    {
-        data_sz = vc_ch->net.tcp.lmt_s_len;
-    }
-    else if (data_sz < vc_ch->net.tcp.lmt_s_len)
-    {
-        /* message will be truncated */
-        r_len = data_sz;
- 	req->status.MPI_ERROR = MPIU_ERR_SET2 (mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", vc_ch->net.tcp.lmt_s_len, r_len);
-    }
-
-    MPID_Segment_init (req->dev.user_buf, req->dev.user_count, req->dev.datatype, &req->dev.segment, 0);
-    req->dev.segment_first = 0;
-    req->dev.segment_size = data_sz;
-    req->dev.iov_count = MPID_IOV_LIMIT;
-    req->dev.iov_offset = 0;
-    last = data_sz;
-
-    do
-    {
-        int iov_offset;
-        int left_to_recv;
-
-        MPID_Segment_unpack_vector (&req->dev.segment, req->dev.segment_first, &last, req->dev.iov, &req->dev.iov_count);
-
-        left_to_recv = last - req->dev.segment_first;
-        iov_offset = 0;
-
-#ifdef TESTING_CHUNKING
-        {
-            char *buf = req->dev.iov[0].MPID_IOV_BUF;
-            int l;
-            while (left_to_recv)
-            {
-                if (left_to_recv > CHUNK)
-                    l = CHUNK;
-                else
-                    l = left_to_recv;
-
-                do
-                    nb = read (vc_ch->net.tcp.lmt_desc, buf, l);
-                while (nb == -1 && errno == EINTR);
-                MPIU_ERR_CHKANDJUMP (nb == -1, mpi_errno, MPI_ERR_OTHER, "**sock_writev");
-
-                left_to_recv -= nb;
-                buf += nb;
-            }
-            MPIDI_CH3U_Request_complete (req);
-            goto fn_exit;
-        }
-#endif
-
-        do
-            nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
-        while (nb == -1 && errno == EINTR);
-        MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-        MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail");
-
-        left_to_recv -= nb;
-        while (left_to_recv)
-        { /* recv rest of iov */
-            while (nb >= req->dev.iov[iov_offset].MPID_IOV_LEN)
-            { /* update iov to reflect sent bytes */
-                nb -= req->dev.iov[iov_offset].MPID_IOV_LEN;
-                ++iov_offset;
-            }
-            req->dev.iov[iov_offset].MPID_IOV_BUF = (char *)req->dev.iov[iov_offset].MPID_IOV_BUF + nb;
-            req->dev.iov[iov_offset].MPID_IOV_LEN -= nb;
-
-            do
-                nb = readv (vc_ch->net.tcp.lmt_desc, &req->dev.iov[iov_offset], req->dev.iov_count - iov_offset);
-            while (nb == -1 && errno == EINTR);
-            MPIU_ERR_CHKANDJUMP2 (nb == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-            MPIU_ERR_CHKANDJUMP (nb == 0, mpi_errno, MPI_ERR_OTHER, "**fail");
-            left_to_recv -= nb;
-        }
-    }
-    while (last < data_sz);
-
-    MPIDI_CH3U_Request_complete (req);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_START_RECV);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_lmt_post_send
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_tcp_module_lmt_post_send (MPIDI_VC_t *vc, MPID_Request *req)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_SEND);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_SEND);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_SEND);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_tcp_module_lmt_post_recv
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_tcp_module_lmt_post_recv (MPIDI_VC_t *vc, MPID_Request *req)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_RECV);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_RECV);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_MODULE_LMT_POST_RECV);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME create_s_cookie
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int create_s_cookie (int data_sz, char **cookie, int *len)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int *int_cookie;
-
-    int_cookie = MPIU_Malloc (sizeof (data_sz));
-    MPIU_ERR_CHKANDJUMP (int_cookie == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem");
-    *int_cookie = data_sz;
-
-    *cookie = (char *)int_cookie;
-    *len = sizeof (data_sz);
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME read_s_cookie
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int read_s_cookie (MPID_IOV cookie, int *data_sz)
-{
-    int mpi_errno = MPI_SUCCESS;
-
-    MPIU_ERR_CHKANDJUMP (cookie.MPID_IOV_LEN != sizeof (data_sz), mpi_errno, MPI_ERR_OTHER, "**fail");
-
-    *data_sz = *(int *)cookie.MPID_IOV_BUF;
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-typedef struct r_cookie
-{
-    int port;
-    int data_sz;
-    char hostname[1];
-} r_cookie_t;
-
-#undef FUNCNAME
-#define FUNCNAME create_r_cookie
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int create_r_cookie (char *hostname, int port, int data_sz, char **cookie, int *len)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int hostname_len;
-    int cookie_len;
-    r_cookie_t *c;
-
-    hostname_len = strnlen (hostname, MAX_HOSTNAME_LEN) + 1;
-
-    cookie_len = sizeof (r_cookie_t) - 1 + hostname_len;
-
-    c = MPIU_Malloc (cookie_len);
-    MPIU_ERR_CHKANDJUMP (c == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem");
-
-    c->port = port;
-    c->data_sz = data_sz;
-    MPIU_Strncpy (c->hostname, hostname, hostname_len);
-
-    *cookie = (char *)c;
-    *len = sizeof (r_cookie_t) - 1 + hostname_len;
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-/* read_r_cookie - extracts hostname, port and data size from a recv cookie
-   data pointed to by hostname is valid only as long as the packet containing the cookie is valid
- */
-#undef FUNCNAME
-#define FUNCNAME read_r_cookie
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int read_r_cookie (MPID_IOV cookie, char **hostname, int *port, int *data_sz)
-{
-    int mpi_errno = MPI_SUCCESS;
-    r_cookie_t *c;
-
-    MPIU_ERR_CHKANDJUMP (cookie.MPID_IOV_LEN < sizeof (r_cookie_t), mpi_errno, MPI_ERR_OTHER, "**fail");
-
-    c = (r_cookie_t *)cookie.MPID_IOV_BUF;
-
-    *hostname = c->hostname;
-    *port = c->port;
-    *data_sz = c->data_sz;
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-static void free_cookie (void *c)
-{
-    MPIU_Free (c);
-}
-
-#undef FUNCNAME
-#define FUNCNAME set_sockopts
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-static int set_sockopts (int fd)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int option;
-    int ret;
-
-    option = 0;
-    ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(int));
-    MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-
-    option = 128*1024;
-    setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(int));
-    MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-    setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(int));
-    MPIU_ERR_CHKANDJUMP2 (ret == -1, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", strerror (errno), errno);
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_poll.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_poll.c	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_poll.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,160 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "wintcp_module_impl.h"
-#ifdef HAVE_ERRNO_H
-	#include <errno.h>
-#endif
-
-char *MPID_nem_newtcp_module_recv_buf = NULL; /* avoid common symbol */
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_poll_init
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_poll_init()
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIU_CHKPMEM_DECL(1);
-
-    MPIU_CHKPMEM_MALLOC(MPID_nem_newtcp_module_recv_buf, char*, MPID_NEM_NEWTCP_MODULE_RECV_MAX_PKT_LEN, mpi_errno, "NewTCP temporary buffer");
-    MPIU_CHKPMEM_COMMIT();
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    MPIU_CHKPMEM_REAP();
-    goto fn_exit;
-}
-
-
-int MPID_nem_newtcp_module_poll_finalize()
-{
-    MPIU_Free(MPID_nem_newtcp_module_recv_buf);
-    return MPI_SUCCESS;
-}
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_recv_handler
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_recv_handler (struct pollfd *pfd, sockconn_t *sc)
-{
-    int mpi_errno = MPI_SUCCESS;
-    ssize_t bytes_recvd;
-
-    if (((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active == NULL)
-    {
-        /* receive a new message */
-        MPIU_OSW_RETRYON_INTR((bytes_recvd < 0), 
-            (mpi_errno = MPIU_SOCKW_Read(sc->fd, MPID_nem_newtcp_module_recv_buf, MPID_NEM_NEWTCP_MODULE_RECV_MAX_PKT_LEN, &bytes_recvd)));
-        if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-        if(bytes_recvd < 0){
-            /* Handle this condn first/fast */
-            goto fn_exit;
-        }
-        else if(bytes_recvd == 0){
-            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
-        }
-        else{
-            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "New recv %d", bytes_recvd);
-            mpi_errno = MPID_nem_handle_pkt(sc->vc, MPID_nem_newtcp_module_recv_buf, bytes_recvd);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-        }
-    }
-    else
-    {
-        /* there is a pending receive, receive it directly into the user buffer */
-        MPID_Request *rreq = ((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active;
-        MPID_IOV *iov = &rreq->dev.iov[rreq->dev.iov_offset];
-        int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-
-        MPIU_OSW_RETRYON_INTR((bytes_recvd < 0),
-            (mpi_errno = MPIU_SOCKW_Readv(sc->fd, iov, rreq->dev.iov_count, &bytes_recvd)));
-        if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-        if(bytes_recvd < 0){
-            /* Handle this condn first/fast */
-            goto fn_exit;
-        }
-        
-        if(bytes_recvd == 0){
-            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
-        }
-
-        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "Cont recv %d", bytes_recvd);
-
-        /* update the iov */
-        for (iov = &rreq->dev.iov[rreq->dev.iov_offset]; iov < &rreq->dev.iov[rreq->dev.iov_offset + rreq->dev.iov_count]; ++iov)
-        {
-            if (bytes_recvd < iov->MPID_IOV_LEN)
-            {
-                iov->MPID_IOV_BUF = (char *)iov->MPID_IOV_BUF + bytes_recvd;
-                iov->MPID_IOV_LEN -= bytes_recvd;
-                rreq->dev.iov_count = &rreq->dev.iov[rreq->dev.iov_offset + rreq->dev.iov_count] - iov;
-                rreq->dev.iov_offset = iov - rreq->dev.iov;
-                MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "bytes_recvd = %d", bytes_recvd);
-                MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "iov len = %d", iov->MPID_IOV_LEN);
-                MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "iov_offset = %d", rreq->dev.iov_offset);
-                goto fn_exit;
-            }
-            bytes_recvd -= iov->MPID_IOV_LEN;
-        }
-        
-        /* the whole iov has been received */
-
-        reqFn = rreq->dev.OnDataAvail;
-        if (!reqFn)
-        {
-            MPIU_Assert(MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_RESP);
-            MPIDI_CH3U_Request_complete(rreq);
-            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "...complete");
-            ((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active = NULL;
-        }
-        else
-        {
-            int complete = 0;
-                
-            mpi_errno = reqFn(sc->vc, rreq, &complete);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
-            if (complete)
-            {
-                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "...complete");
-                ((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active = NULL;
-            }
-            else
-            {
-                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "...not complete");
-            }
-        }        
-    }
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_poll
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_poll (MPID_nem_poll_dir_t in_or_out)
-{
-    int mpi_errno = MPI_SUCCESS;
-
-    mpi_errno = MPID_nem_newtcp_module_connpoll();
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_queue.h
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_queue.h	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_queue.h	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,134 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#ifndef TCP_MODULE_QUEUE_H
-#define TCP_MODULE_QUEUE_H
-
-/* Generic queue macros -- "next_field" should be set to the name of
-   the next pointer field in the element (e.g., "ch.newtcp_sendq_next") */
-
-#define PRINT_QUEUE(qp, next_field) do {        \
-    } while(0)       
-        
-
-#define GENERIC_Q_EMPTY(q) ((q).head == NULL)
-
-#define GENERIC_Q_HEAD(q) ((q).head)
-
-#define GENERIC_Q_ENQUEUE_EMPTY(qp, ep, next_field) do {        \
-        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                  \
-        (qp)->head = (qp)->tail = ep;                           \
-        (ep)->next_field = NULL;                                \
-        PRINT_QUEUE (qp, next_field);                           \
-    } while (0)
-
-#define GENERIC_Q_ENQUEUE(qp, ep, next_field) do {              \
-        if (GENERIC_Q_EMPTY (*(qp)))                            \
-            GENERIC_Q_ENQUEUE_EMPTY (qp, ep, next_field);       \
-        else                                                    \
-        {                                                       \
-            (qp)->tail->next_field = (qp)->tail = ep;           \
-            (ep)->next_field = NULL;                            \
-        }                                                       \
-        PRINT_QUEUE (qp, next_field);                           \
-    } while (0)
-
-/* the _MULTIPLE routines assume that ep0 is the head and ep1 is the
-   tail of a linked list of elements.  The list is inserted on the end
-   of the queue. */
-#define GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE(qp, ep0, ep1, next_field) do { \
-        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                          \
-        (qp)->head = ep0;                                               \
-        (qp)->tail = ep1;                                               \
-        (ep1)->next_field = NULL;                                       \
-    } while (0)
-
-#define GENERIC_Q_ENQUEUE_MULTIPLE(qp, ep0, ep1, next_field) do {               \
-        if (GENERIC_Q_EMPTY (*(qp)))                                            \
-            GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE (qp, ep0, ep1, next_field);        \
-        else                                                                    \
-        {                                                                       \
-            (qp)->tail->next_field = ep0;                                       \
-            (qp)->tail = ep1;                                                   \
-            (ep1)->next_field = NULL;                                           \
-        }                                                                       \
-    } while (0)
-
-
-#define GENERIC_Q_DEQUEUE(qp, epp, next_field) do {     \
-        MPIU_Assert (!GENERIC_Q_EMPTY (*(qp)));         \
-        *(epp) = (qp)->head;                            \
-        (qp)->head = (*(epp))->next_field;              \
-        if ((qp)->head == NULL)                         \
-            (qp)->tail = NULL;                          \
-    } while (0)
-
-/* remove the elements from the top of the queue starting with ep0 through ep1 */
-#define GENERIC_Q_REMOVE_ELEMENTS(qp, ep0, ep1, next_field) do {        \
-        MPIU_Assert (GENERIC_Q_HEAD (*(qp)) == (ep0));                  \
-        (qp)->head = (ep1)->next_field;                                 \
-        if ((qp)->head == NULL)                                         \
-            (qp)->tail = NULL;                                          \
-    } while (0)
-
-
-
-/* Generic list macros */
-#define GENERIC_L_EMPTY(q) ((q).head == NULL)
-
-#define GENERIC_L_HEAD(q) ((q).head)
-
-#define GENERIC_L_ADD_EMPTY(qp, ep, next_field, prev_field) do {        \
-        MPIU_Assert (GENERIC_L_EMPTY (*(qp)));                          \
-        (qp)->head = ep;                                                \
-        (ep)->next_field = (ep)->prev_field = NULL;                     \
-    } while (0)
-
-#define GENERIC_L_ADD(qp, ep, next_field, prev_field) do {              \
-        if (GENERIC_L_EMPTY (*(qp)))                                    \
-            GENERIC_L_ADD_EMPTY (qp, ep, next_field, prev_field);       \
-        else                                                            \
-        {                                                               \
-            (ep)->prev_field = NULL;                                    \
-            (ep)->next_field = (qp)->head;                              \
-            (qp)->head->prev_field = ep;                                \
-            (qp)->head = ep;                                            \
-        }                                                               \
-    } while (0)
-
-#define GENERIC_L_REMOVE(qp, ep, next_field, prev_field) do {   \
-        MPIU_Assert (!GENERIC_L_EMPTY (*(qp)));                 \
-        if ((ep)->prev_field)                                   \
-            ((ep)->prev_field)->next_field = (ep)->next_field;  \
-        else                                                    \
-            (qp)->head = (ep)->next_field;                      \
-        if ((ep)->next_field)                                   \
-            ((ep)->next_field)->prev_field  = (ep)->prev_field; \
-    } while (0)
-
-
-/* Generic stack macros */
-#define GENERIC_S_EMPTY(s) ((s).top == NULL)
-
-#define GENERIC_S_TOP(s) ((s).top)
-
-#define GENERIC_S_PUSH(sp, ep, next_field) do { \
-        (ep)->next_field = (sp)->top;           \
-        (sp)->top = ep;                         \
-    } while (0)
-
-/* PUSH_MULTIPLE pushes a linked list of elements onto the stack.  It
-   assumes that ep0 is the head of the linked list and ep1 is at the tail */
-#define GENERIC_S_PUSH_MULTIPLE(sp, ep0, ep1, next_field) do {  \
-        (ep1)->next_field = (sp)->top;                          \
-        (sp)->top = ep0;                                        \
-    } while (0)
-
-#define GENERIC_S_POP(sp, ep, next_field) do {  \
-        *(ep) = (sp)->top;                      \
-        (sp)->top = (*(ep))->next_field;        \
-    } while (0)
-#endif /* TCP_MODULE_QUEUE_H */

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_send.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_send.c	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_send.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,579 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "wintcp_module_impl.h"
-
-#define NUM_PREALLOC_SENDQ 10
-#define MAX_SEND_IOV 10
-
-#define SENDQ_EMPTY(q) GENERIC_Q_EMPTY (q)
-#define SENDQ_HEAD(q) GENERIC_Q_HEAD (q)
-#define SENDQ_ENQUEUE(qp, ep) GENERIC_Q_ENQUEUE (qp, ep, dev.next)
-#define SENDQ_DEQUEUE(qp, ep) GENERIC_Q_DEQUEUE (qp, ep, dev.next)
-
-
-typedef struct MPID_nem_newtcp_module_send_q_element
-{
-    struct MPID_nem_newtcp_module_send_q_element *next;
-    size_t len;                        /* number of bytes left to send */
-    char *start;                       /* pointer to next byte to send */
-    MPID_nem_cell_ptr_t cell;
-    /*     char buf[MPID_NEM_MAX_PACKET_LEN];*/ /* data to be sent */
-} MPID_nem_newtcp_module_send_q_element_t;
-
-struct {MPID_nem_newtcp_module_send_q_element_t *top;} free_buffers = {0};
-
-#define ALLOC_Q_ELEMENT(e) do {                                                                                                         \
-        if (S_EMPTY (free_buffers))                                                                                                     \
-        {                                                                                                                               \
-            MPIU_CHKPMEM_MALLOC (*(e), MPID_nem_newtcp_module_send_q_element_t *, sizeof(MPID_nem_newtcp_module_send_q_element_t),      \
-                                 mpi_errno, "send queue element");                                                                      \
-        }                                                                                                                               \
-        else                                                                                                                            \
-        {                                                                                                                               \
-            S_POP (&free_buffers, e);                                                                                                   \
-        }                                                                                                                               \
-    } while (0)
-
-/* FREE_Q_ELEMENTS() frees a list if elements starting at e0 through e1 */
-#define FREE_Q_ELEMENTS(e0, e1) S_PUSH_MULTIPLE (&free_buffers, e0, e1)
-#define FREE_Q_ELEMENT(e) S_PUSH (&free_buffers, e)
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_send_init
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_send_init()
-{
-    int mpi_errno = MPI_SUCCESS;
-    int i;
-    MPIU_CHKPMEM_DECL (NUM_PREALLOC_SENDQ);
-    
-    /* preallocate sendq elements */
-    for (i = 0; i < NUM_PREALLOC_SENDQ; ++i)
-    {
-        MPID_nem_newtcp_module_send_q_element_t *e;
-        
-        MPIU_CHKPMEM_MALLOC (e, MPID_nem_newtcp_module_send_q_element_t *,
-                             sizeof(MPID_nem_newtcp_module_send_q_element_t), mpi_errno, "send queue element");
-        S_PUSH (&free_buffers, e);
-    }
-
-    MPIU_CHKPMEM_COMMIT();
-    return mpi_errno;
- fn_fail:
-    MPIU_CHKPMEM_REAP();
-    return mpi_errno;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_send
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_send (MPIDI_VC_t *vc, MPID_nem_cell_ptr_t cell, int datalen)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIU_Assert(0);
-    return mpi_errno;
-}
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_send_queued
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_send_queued (MPIDI_VC_t *vc)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPID_Request *sreq;
-    MPIDI_msg_sz_t offset;
-    MPID_IOV *iov;
-    int complete;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_SEND_QUEUED);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_SEND_QUEUED);
-
-    MPIU_Assert(vc != NULL);
-
-    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
-	goto fn_exit;
-
-    while (!SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
-    {
-        sreq = SENDQ_HEAD(VC_FIELD(vc, send_queue));
-        
-        iov = &sreq->dev.iov[sreq->dev.iov_offset];
-
-/*         printf("sreq = %p sreq->dev.iov = %p iov = %p\n", sreq, sreq->dev.iov, iov); */
-/*         printf("iov[0].MPID_IOV_BUF = %p iov[0].MPID_IOV_LEN = %d iov_count = %d\n", iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN, sreq->dev.iov_count);//DARIUS */
-/*         printf("&iov[0].MPID_IOV_LEN = %p sreq->dev.iov_offset = %d\n", &iov[0].MPID_IOV_LEN, sreq->dev.iov_offset);//DARIUS */
-        MPIU_OSW_RETRYON_INTR((offset < 0), 
-            (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, sreq->dev.iov_count, &offset)));
-        if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-        if(offset < 0){
-            offset = 0;
-        }
-
-        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write %d", offset);
-
-        complete = 1;
-        for (iov = &sreq->dev.iov[sreq->dev.iov_offset]; iov < &sreq->dev.iov[sreq->dev.iov_offset + sreq->dev.iov_count]; ++iov)
-        {
-            if (offset < iov->MPID_IOV_LEN)
-            {
-                iov->MPID_IOV_BUF = (char *)iov->MPID_IOV_BUF + offset;
-                iov->MPID_IOV_LEN -= offset;
-                /* iov_count should be equal to the number of iov's remaining */
-                sreq->dev.iov_count -= ((iov - sreq->dev.iov) - sreq->dev.iov_offset);
-                sreq->dev.iov_offset = iov - sreq->dev.iov;
-                complete = 0;
-                break;
-            }
-            offset -= iov->MPID_IOV_LEN;
-        }
-        if (!complete)
-        {
-            /* writev couldn't write the entire iov, give up for now */
-            break;
-        }
-        else
-        {
-            /* sent whole message */
-            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-
-            reqFn = sreq->dev.OnDataAvail;
-            if (!reqFn)
-            {
-                MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
-                MPIDI_CH3U_Request_complete(sreq);
-                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-                SENDQ_DEQUEUE(&VC_FIELD(vc, send_queue), &sreq);
-                break;
-            }
-
-            complete = 0;
-            mpi_errno = reqFn(vc, sreq, &complete);
-            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-            
-            if (complete)
-            {
-                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-                SENDQ_DEQUEUE(&VC_FIELD(vc, send_queue), &sreq);
-                break;
-            }
-            sreq->dev.iov_offset = 0;
-        }
-    }
-
-    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue))){
-        mpi_errno = MPID_nem_newtcp_module_conn_wr_disable(vc);
-        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-    }
-    
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_SEND_QUEUED);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_send_finalize
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_send_finalize()
-{
-    int mpi_errno = MPI_SUCCESS;
-
-    while (!S_EMPTY (free_buffers))
-    {
-        MPID_nem_newtcp_module_send_q_element_t *e;
-        S_POP (&free_buffers, &e);
-        MPIU_Free (e);
-    }
-    return mpi_errno;
-}
-
-/* MPID_nem_newtcp_module_conn_est -- this function is called when the
-   connection is finally extablished to send any pending sends */
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_conn_est
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_conn_est (MPIDI_VC_t *vc)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONN_EST);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONN_EST);
-
-    if (!SENDQ_EMPTY (VC_FIELD(vc, send_queue)))
-    {
-        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
-        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-        mpi_errno = MPID_nem_newtcp_module_send_queued (vc);
-        if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-    }
-
- fn_fail:    
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONN_EST);
-    return mpi_errno;
-}
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_iStartContigMsg
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz,
-                                    MPID_Request **sreq_ptr)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPID_Request * sreq = NULL;
-    MPIDI_msg_sz_t offset = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_ISTARTCONTIGMSG);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_ISTARTCONTIGMSG);
-    
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
-    
-    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newtcp_iStartContigMsg");
-    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
-    if (MPID_nem_newtcp_module_vc_is_connected(vc))
-    {
-        if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
-        {
-            MPID_IOV iov[2];
-
-            iov[0].MPID_IOV_BUF = hdr;
-            iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
-            iov[1].MPID_IOV_BUF = data;
-            iov[1].MPID_IOV_LEN = data_sz;
-        
-            MPIU_OSW_RETRYON_INTR((offset < 0),
-                (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, 2, &offset)));
-            if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-            if(offset < 0){
-                offset = 0;
-            }
-
-            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write %d", offset);
-
-            if (offset == sizeof(MPIDI_CH3_PktGeneric_t) + data_sz)
-            {
-                /* sent whole message */
-                *sreq_ptr = NULL;
-                goto fn_exit;
-            }
-        }
-    }
-    else
-    {
-        mpi_errno = MPID_nem_newtcp_module_connect(vc);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
-
-    /* create and enqueue request */
-    MPIU_DBG_MSG (CH3_CHANNEL, VERBOSE, "enqueuing");
-
-    /* create a request */
-    sreq = MPID_Request_create();
-    MPIU_Assert (sreq != NULL);
-    MPIU_Object_set_ref (sreq, 2);
-    sreq->kind = MPID_REQUEST_SEND;
-
-    sreq->dev.OnDataAvail = 0;
-    sreq->ch.vc = vc;
-    sreq->dev.iov_offset = 0;
-
-/*     printf("&sreq->dev.pending_pkt = %p sizeof(MPIDI_CH3_PktGeneric_t) = %d\n", &sreq->dev.pending_pkt, sizeof(MPIDI_CH3_PktGeneric_t));//DARIUS */
-/*     printf("offset = %d\n", offset);//DARIUS */
-
-    if (offset < sizeof(MPIDI_CH3_PktGeneric_t))
-    {
-        sreq->dev.pending_pkt = *(MPIDI_CH3_PktGeneric_t *)hdr;
-        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt + offset;
-        sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t) - offset ;
-        if (data_sz)
-        {
-            sreq->dev.iov[1].MPID_IOV_BUF = data;
-            sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
-            sreq->dev.iov_count = 2;
-        }
-        else
-            sreq->dev.iov_count = 1;
-    }
-    else
-    {
-        sreq->dev.iov[0].MPID_IOV_BUF = (char *)data + (offset - sizeof(MPIDI_CH3_PktGeneric_t));
-        sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_PktGeneric_t));
-        sreq->dev.iov_count = 1;
-    }
-    
-    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0);
-
-/*     printf("sreq = %p sreq->dev.iov = %p\n", sreq, sreq->dev.iov); */
-/*     printf("sreq->dev.iov[0].MPID_IOV_BUF = %p\n", sreq->dev.iov[0].MPID_IOV_BUF);//DARIUS */
-/*     printf("sreq->dev.iov[0].MPID_IOV_LEN = %d\n", sreq->dev.iov[0].MPID_IOV_LEN);//DARIUS */
-/*     printf("&sreq->dev.iov[0].MPID_IOV_LEN = %p\n", &sreq->dev.iov[0].MPID_IOV_LEN);//DARIUS */
-
-    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)) && MPID_nem_newtcp_module_vc_is_connected(vc)){
-        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
-        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-    }
-    SENDQ_ENQUEUE(&VC_FIELD(vc, send_queue), sreq);
-
-    *sreq_ptr = sreq;
-    
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_ISTARTCONTIGMSG);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_iSendContig
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPIDI_msg_sz_t hdr_sz,
-                                void *data, MPIDI_msg_sz_t data_sz)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_msg_sz_t offset = 0;
-    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_ISENDCONTIGMSG);
-
-    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_ISENDCONTIGMSG);
-    
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
-    
-    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newtcp_iSendContig");
-
-    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
-    if (MPID_nem_newtcp_module_vc_is_connected(vc))
-    {
-        if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
-        {
-            MPID_IOV iov[2];
-
-            iov[0].MPID_IOV_BUF = hdr;
-            iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
-            iov[1].MPID_IOV_BUF = data;
-            iov[1].MPID_IOV_LEN = data_sz;
-        
-            MPIU_OSW_RETRYON_INTR((offset < 0),
-                (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, 2, &offset)));
-            if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-            if(offset < 0){
-                offset = 0;
-            }
-
-            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write %d", offset);
-
-            if (offset == sizeof(MPIDI_CH3_PktGeneric_t) + data_sz)
-            {
-                /* sent whole message */
-                int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-
-                reqFn = sreq->dev.OnDataAvail;
-                if (!reqFn)
-                {
-                    MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
-                    MPIDI_CH3U_Request_complete(sreq);
-                    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-                    goto fn_exit;
-                }
-                else
-                {
-                    int complete = 0;
-                
-                    mpi_errno = reqFn(vc, sreq, &complete);
-                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-
-                    if (complete)
-                    {
-                        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-                        goto fn_exit;
-                    }
-
-                    /* not completed: more to send */
-                    goto enqueue_request;
-                }
-            }
-        }
-    }
-    else
-    {
-        mpi_errno = MPID_nem_newtcp_module_connect(vc);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
-
-
-    /* save iov */
-    if (offset < sizeof(MPIDI_CH3_PktGeneric_t))
-    {
-        sreq->dev.pending_pkt = *(MPIDI_CH3_PktGeneric_t *)hdr;
-        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt + offset;
-        sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t) - offset;
-        if (data_sz)
-        {
-            sreq->dev.iov[1].MPID_IOV_BUF = data;
-            sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
-            sreq->dev.iov_count = 2;
-        }
-        else
-            sreq->dev.iov_count = 1;
-    }
-    else
-    {
-        sreq->dev.iov[0].MPID_IOV_BUF = (char *)data + (offset - sizeof(MPIDI_CH3_PktGeneric_t));
-        sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_PktGeneric_t));
-        sreq->dev.iov_count = 1;
-    }
-
- enqueue_request:
-    /* enqueue request */
-    MPIU_DBG_MSG (CH3_CHANNEL, VERBOSE, "enqueuing");
-    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0);
-
-    sreq->ch.vc = vc;
-    sreq->dev.iov_offset = 0;
-
-    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)) && MPID_nem_newtcp_module_vc_is_connected(vc)){
-        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
-        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-    }
-    SENDQ_ENQUEUE(&VC_FIELD(vc, send_queue), sreq);
-
- fn_exit:
-    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_ISENDCONTIGMSG);
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_SendNoncontig
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int iov_n;
-    MPID_IOV iov[MPID_IOV_LIMIT];
-    MPID_IOV *iov_p;
-    MPIDI_msg_sz_t offset;
-    int complete;
-
-    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newtcp_SendNoncontig");
-    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_PktGeneric_t));
-    
-    iov[0].MPID_IOV_BUF = header;
-    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
-
-    iov_n = MPID_IOV_LIMIT - 1;
-    /* On the initial load of a send iov req, set the OnFinal action (null
-       for point-to-point) */
-    mpi_errno = MPIDI_CH3U_Request_load_send_iov(sreq, &iov[1], &iov_n);
-    MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|loadsendiov");
-
-    iov_n += 1;
-    offset = 0;
-
-    if (MPID_nem_newtcp_module_vc_is_connected(vc))
-    {
-        if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
-        {
-            MPIU_OSW_RETRYON_INTR((offset < 0),
-                (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, iov_n, &offset)));
-            if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-            if(offset < 0){
-                offset = 0;
-            }
-
-            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write noncontig %d", offset);
-        }
-    }
-    else
-    {
-        mpi_errno = MPID_nem_newtcp_module_connect(vc);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-    }
-
-    if (offset < iov[0].MPID_IOV_LEN)
-    {
-        /* header was not yet sent, save it in req */
-        sreq->dev.pending_pkt = *(MPIDI_CH3_PktGeneric_t *)header;
-        iov[0].MPID_IOV_BUF = &sreq->dev.pending_pkt;
-        iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
-    }
-
-    /* check if whole iov was sent, and save any unsent portion of iov */
-    sreq->dev.iov_count = 0;
-    complete = 1;
-    for (iov_p = &iov[0]; iov_p < &iov[iov_n]; ++iov_p)
-    {
-        if (offset < iov_p->MPID_IOV_LEN)
-        {
-            sreq->dev.iov[sreq->dev.iov_count].MPID_IOV_BUF = (char *)iov_p->MPID_IOV_BUF + offset;
-            sreq->dev.iov[sreq->dev.iov_count].MPID_IOV_LEN = iov_p->MPID_IOV_LEN - offset;
-            offset = 0;
-            ++sreq->dev.iov_count;
-            complete = 0;
-        }
-        else
-            offset -= iov_p->MPID_IOV_LEN;
-    }
-        
-    if (complete)
-    {
-        /* sent whole iov */
-        int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
-
-        reqFn = sreq->dev.OnDataAvail;
-        if (!reqFn)
-        {
-            MPIDI_CH3U_Request_complete(sreq);
-            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-            goto fn_exit;
-        }
-
-        complete = 0;
-        mpi_errno = reqFn(vc, sreq, &complete);
-        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
-            
-        if (complete)
-        {
-            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
-            goto fn_exit;
-        }
-    }
-        
-    /* enqueue request */
-    MPIU_DBG_MSG (CH3_CHANNEL, VERBOSE, "enqueuing");
-    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0);
-        
-    sreq->ch.vc = vc;
-    sreq->dev.iov_offset = 0;
-        
-    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)) && MPID_nem_newtcp_module_vc_is_connected(vc)){
-        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
-        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
-    }
-    SENDQ_ENQUEUE(&VC_FIELD(vc, send_queue), sreq);
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    MPIU_Object_set_ref(sreq, 0);
-    MPIDI_CH3_Request_destroy(sreq);
-    goto fn_exit;
-}

Deleted: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_utility.c
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_utility.c	2009-03-12 20:06:40 UTC (rev 4036)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_utility.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -1,156 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *  (C) 2006 by Argonne National Laboratory.
- *      See COPYRIGHT in top-level directory.
- */
-
-#include "wintcp_module_impl.h"
-
-/* MPID_nem_newtcp_module_get_conninfo -- This function takes a VC
-   pointer as input and outputs the sockaddr, pg_id, and pg_rank of
-   the remote process associated with this VC.  [NOTE: I'm not sure
-   yet, if the pg_id parameters will be char* or char**.  I'd like to
-   avoid a copy on this.] */
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_get_conninfo
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_get_conninfo (struct MPIDI_VC *vc, struct sockaddr_in *addr, char **pg_id, int *pg_rank)
-{
-    int mpi_errno = MPI_SUCCESS;
-
-    *addr = VC_FIELD(vc, sock_id);
-    *pg_id = (char *)vc->pg->id;
-    *pg_rank = vc->pg_rank;
-    
-    return mpi_errno;
-}
-
-/* MPID_nem_newtcp_module_get_vc_from_conninfo -- This function takes
-   the pg_id and pg_rank and returns the corresponding VC. */
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_get_vc_from_conninfo
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_get_vc_from_conninfo (char *pg_id, int pg_rank, struct MPIDI_VC **vc)
-{
-    int mpi_errno = MPI_SUCCESS;
-    MPIDI_PG_t *pg;
-    
-    mpi_errno = MPIDI_PG_Find (pg_id, &pg);
-    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
-
-    MPIU_ERR_CHKANDJUMP1 (pg == NULL, mpi_errno, MPI_ERR_OTHER, "**intern", "**intern %s", "invalid PG");
-    MPIU_ERR_CHKANDJUMP1 (pg_rank < 0 || pg_rank > MPIDI_PG_Get_size (pg), mpi_errno, MPI_ERR_OTHER, "**intern", "**intern %s", "invalid pg_rank");
-        
-    MPIDI_PG_Get_vc (pg, pg_rank, vc);
-    
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-#undef FUNCNAME
-#define FUNCNAME set_sockopts
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-int MPID_nem_newtcp_module_set_sockopts (int fd)
-{
-    int mpi_errno = MPI_SUCCESS;
-    int option, flags;
-    int ret;
-    socklen_t len;
-
-    /* I heard you have to read the options after setting them in some implementations */
-
-    option = 1;
-    len = sizeof(int);
-
-    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, IPPROTO_TCP, TCP_NODELAY, &option, len);
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    option = MPID_NEM_NEWTCP_MODULE_RCVBUF_SZ;
-    len = sizeof(int);
-
-    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, SOL_SOCKET, SO_RCVBUF, &option, len);
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    option = MPID_NEM_NEWTCP_MODULE_SNDBUF_SZ;
-    len = sizeof(int);
-
-    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, SOL_SOCKET, SO_SNDBUF, &option, len);
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-    
-    option = 1;
-    len = sizeof(int);
-
-    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, SOL_SOCKET, SO_REUSEADDR, &option, len);
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
-    option = 1;
-    mpi_errno = MPIU_SOCKW_Sock_cntrl_nb(fd, option);
-    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
-
- fn_exit:
-    return mpi_errno;
- fn_fail:
-    goto fn_exit;
-}
-
-
-/*
-  MPID_NEM_NEWTCP_MODULE_SOCK_ERROR_EOF : connection failed
-  MPID_NEM_NEWTCP_MODULE_SOCK_CONNECTED : socket connected (connection success)
-  MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT   : No event on socket
-
-N1: some implementations do not set POLLERR when there is a pending error on socket.
-So, solution is to check for readability/writeablility and then call getsockopt.
-Again, getsockopt behaves differently in different implementations which  is handled
-safely here (per Stevens-Unix Network Programming)
-
-N2: As far as the socket code is concerned, it doesn't really differentiate whether
-there is an error in the socket or whether the peer has closed it (i.e we have received
-EOF and hence recv returns 0). Either way, we deccide the socket fd is not usable any
-more. So, same return code is used.
-A design decision is not to write also, if the peer has closed the socket. Please note that
-write will still be succesful, even if the peer has sent us FIN. Only the subsequent 
-write will fail. So, this function is made tight enough and this should be called
-before doing any read/write at least in the connection establishment state machine code.
-
-N3: return code MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT is used only by the code that wants to
-know whether the connect is still not complete after a non-blocking connect is issued.
-
-TODO: Make this a macro for performance, if needed based on the usage.
-FIXME : Above comments are inconsistent now with the changes. No check for EOF is 
-actually done now in this function.
-*/
-
-
-#undef FUNCNAME
-#define FUNCNAME MPID_nem_newtcp_module_check_sock_status
-#undef FCNAME
-#define FCNAME MPIDI_QUOTE(FUNCNAME)
-MPID_NEM_NEWTCP_MODULE_SOCK_STATUS_t 
-MPID_nem_newtcp_module_check_sock_status(MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd)
-{
-    int rc = MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT;
-
-    /* FIXME: At least on windows there is no guarantee that a successful socket call
-     * resets the socket error code
-     * We will assume that sock is connected for now and let the state machine handle
-     * errors
-     */
-    if(MPIU_SOCKW_Waitset_is_sock_readable(fd_ws_hnd) ||
-        MPIU_SOCKW_Waitset_is_sock_writeable(fd_ws_hnd)){
-        rc = MPID_NEM_NEWTCP_MODULE_SOCK_CONNECTED;
-    }
-    else{
-        /* Should we return no event for OOB data - EX ? */
-        rc = MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT;
-    }
-
- fn_exit:
-    return rc;
-}
-

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_poll.c (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_poll.c)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_poll.c	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_poll.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,160 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "wintcp_impl.h"
+#ifdef HAVE_ERRNO_H
+	#include <errno.h>
+#endif
+
+char *MPID_nem_newtcp_module_recv_buf = NULL; /* avoid common symbol */
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_poll_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_poll_init()
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIU_CHKPMEM_DECL(1);
+
+    MPIU_CHKPMEM_MALLOC(MPID_nem_newtcp_module_recv_buf, char*, MPID_NEM_NEWTCP_MODULE_RECV_MAX_PKT_LEN, mpi_errno, "NewTCP temporary buffer");
+    MPIU_CHKPMEM_COMMIT();
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    MPIU_CHKPMEM_REAP();
+    goto fn_exit;
+}
+
+
+int MPID_nem_newtcp_module_poll_finalize()
+{
+    MPIU_Free(MPID_nem_newtcp_module_recv_buf);
+    return MPI_SUCCESS;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_recv_handler
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_recv_handler (struct pollfd *pfd, sockconn_t *sc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    ssize_t bytes_recvd;
+
+    if (((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active == NULL)
+    {
+        /* receive a new message */
+        MPIU_OSW_RETRYON_INTR((bytes_recvd < 0), 
+            (mpi_errno = MPIU_SOCKW_Read(sc->fd, MPID_nem_newtcp_module_recv_buf, MPID_NEM_NEWTCP_MODULE_RECV_MAX_PKT_LEN, &bytes_recvd)));
+        if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+        if(bytes_recvd < 0){
+            /* Handle this condn first/fast */
+            goto fn_exit;
+        }
+        else if(bytes_recvd == 0){
+            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
+        }
+        else{
+            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "New recv %d", bytes_recvd);
+            mpi_errno = MPID_nem_handle_pkt(sc->vc, MPID_nem_newtcp_module_recv_buf, bytes_recvd);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+        }
+    }
+    else
+    {
+        /* there is a pending receive, receive it directly into the user buffer */
+        MPID_Request *rreq = ((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active;
+        MPID_IOV *iov = &rreq->dev.iov[rreq->dev.iov_offset];
+        int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+
+        MPIU_OSW_RETRYON_INTR((bytes_recvd < 0),
+            (mpi_errno = MPIU_SOCKW_Readv(sc->fd, iov, rreq->dev.iov_count, &bytes_recvd)));
+        if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+        if(bytes_recvd < 0){
+            /* Handle this condn first/fast */
+            goto fn_exit;
+        }
+        
+        if(bytes_recvd == 0){
+            MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**sock_closed");
+        }
+
+        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "Cont recv %d", bytes_recvd);
+
+        /* update the iov */
+        for (iov = &rreq->dev.iov[rreq->dev.iov_offset]; iov < &rreq->dev.iov[rreq->dev.iov_offset + rreq->dev.iov_count]; ++iov)
+        {
+            if (bytes_recvd < iov->MPID_IOV_LEN)
+            {
+                iov->MPID_IOV_BUF = (char *)iov->MPID_IOV_BUF + bytes_recvd;
+                iov->MPID_IOV_LEN -= bytes_recvd;
+                rreq->dev.iov_count = &rreq->dev.iov[rreq->dev.iov_offset + rreq->dev.iov_count] - iov;
+                rreq->dev.iov_offset = iov - rreq->dev.iov;
+                MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "bytes_recvd = %d", bytes_recvd);
+                MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "iov len = %d", iov->MPID_IOV_LEN);
+                MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "iov_offset = %d", rreq->dev.iov_offset);
+                goto fn_exit;
+            }
+            bytes_recvd -= iov->MPID_IOV_LEN;
+        }
+        
+        /* the whole iov has been received */
+
+        reqFn = rreq->dev.OnDataAvail;
+        if (!reqFn)
+        {
+            MPIU_Assert(MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+            MPIDI_CH3U_Request_complete(rreq);
+            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "...complete");
+            ((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active = NULL;
+        }
+        else
+        {
+            int complete = 0;
+                
+            mpi_errno = reqFn(sc->vc, rreq, &complete);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+            if (complete)
+            {
+                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "...complete");
+                ((MPIDI_CH3I_VC *)sc->vc->channel_private)->recv_active = NULL;
+            }
+            else
+            {
+                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "...not complete");
+            }
+        }        
+    }
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_poll
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_poll (MPID_nem_poll_dir_t in_or_out)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    mpi_errno = MPID_nem_newtcp_module_connpoll();
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_queue.h (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_queue.h)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_queue.h	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_queue.h	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,134 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef TCP_MODULE_QUEUE_H
+#define TCP_MODULE_QUEUE_H
+
+/* Generic queue macros -- "next_field" should be set to the name of
+   the next pointer field in the element (e.g., "ch.newtcp_sendq_next") */
+
+#define PRINT_QUEUE(qp, next_field) do {        \
+    } while(0)       
+        
+
+#define GENERIC_Q_EMPTY(q) ((q).head == NULL)
+
+#define GENERIC_Q_HEAD(q) ((q).head)
+
+#define GENERIC_Q_ENQUEUE_EMPTY(qp, ep, next_field) do {        \
+        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                  \
+        (qp)->head = (qp)->tail = ep;                           \
+        (ep)->next_field = NULL;                                \
+        PRINT_QUEUE (qp, next_field);                           \
+    } while (0)
+
+#define GENERIC_Q_ENQUEUE(qp, ep, next_field) do {              \
+        if (GENERIC_Q_EMPTY (*(qp)))                            \
+            GENERIC_Q_ENQUEUE_EMPTY (qp, ep, next_field);       \
+        else                                                    \
+        {                                                       \
+            (qp)->tail->next_field = (qp)->tail = ep;           \
+            (ep)->next_field = NULL;                            \
+        }                                                       \
+        PRINT_QUEUE (qp, next_field);                           \
+    } while (0)
+
+/* the _MULTIPLE routines assume that ep0 is the head and ep1 is the
+   tail of a linked list of elements.  The list is inserted on the end
+   of the queue. */
+#define GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE(qp, ep0, ep1, next_field) do { \
+        MPIU_Assert (GENERIC_Q_EMPTY (*(qp)));                          \
+        (qp)->head = ep0;                                               \
+        (qp)->tail = ep1;                                               \
+        (ep1)->next_field = NULL;                                       \
+    } while (0)
+
+#define GENERIC_Q_ENQUEUE_MULTIPLE(qp, ep0, ep1, next_field) do {               \
+        if (GENERIC_Q_EMPTY (*(qp)))                                            \
+            GENERIC_Q_ENQUEUE_EMPTY_MULTIPLE (qp, ep0, ep1, next_field);        \
+        else                                                                    \
+        {                                                                       \
+            (qp)->tail->next_field = ep0;                                       \
+            (qp)->tail = ep1;                                                   \
+            (ep1)->next_field = NULL;                                           \
+        }                                                                       \
+    } while (0)
+
+
+#define GENERIC_Q_DEQUEUE(qp, epp, next_field) do {     \
+        MPIU_Assert (!GENERIC_Q_EMPTY (*(qp)));         \
+        *(epp) = (qp)->head;                            \
+        (qp)->head = (*(epp))->next_field;              \
+        if ((qp)->head == NULL)                         \
+            (qp)->tail = NULL;                          \
+    } while (0)
+
+/* remove the elements from the top of the queue starting with ep0 through ep1 */
+#define GENERIC_Q_REMOVE_ELEMENTS(qp, ep0, ep1, next_field) do {        \
+        MPIU_Assert (GENERIC_Q_HEAD (*(qp)) == (ep0));                  \
+        (qp)->head = (ep1)->next_field;                                 \
+        if ((qp)->head == NULL)                                         \
+            (qp)->tail = NULL;                                          \
+    } while (0)
+
+
+
+/* Generic list macros */
+#define GENERIC_L_EMPTY(q) ((q).head == NULL)
+
+#define GENERIC_L_HEAD(q) ((q).head)
+
+#define GENERIC_L_ADD_EMPTY(qp, ep, next_field, prev_field) do {        \
+        MPIU_Assert (GENERIC_L_EMPTY (*(qp)));                          \
+        (qp)->head = ep;                                                \
+        (ep)->next_field = (ep)->prev_field = NULL;                     \
+    } while (0)
+
+#define GENERIC_L_ADD(qp, ep, next_field, prev_field) do {              \
+        if (GENERIC_L_EMPTY (*(qp)))                                    \
+            GENERIC_L_ADD_EMPTY (qp, ep, next_field, prev_field);       \
+        else                                                            \
+        {                                                               \
+            (ep)->prev_field = NULL;                                    \
+            (ep)->next_field = (qp)->head;                              \
+            (qp)->head->prev_field = ep;                                \
+            (qp)->head = ep;                                            \
+        }                                                               \
+    } while (0)
+
+#define GENERIC_L_REMOVE(qp, ep, next_field, prev_field) do {   \
+        MPIU_Assert (!GENERIC_L_EMPTY (*(qp)));                 \
+        if ((ep)->prev_field)                                   \
+            ((ep)->prev_field)->next_field = (ep)->next_field;  \
+        else                                                    \
+            (qp)->head = (ep)->next_field;                      \
+        if ((ep)->next_field)                                   \
+            ((ep)->next_field)->prev_field  = (ep)->prev_field; \
+    } while (0)
+
+
+/* Generic stack macros */
+#define GENERIC_S_EMPTY(s) ((s).top == NULL)
+
+#define GENERIC_S_TOP(s) ((s).top)
+
+#define GENERIC_S_PUSH(sp, ep, next_field) do { \
+        (ep)->next_field = (sp)->top;           \
+        (sp)->top = ep;                         \
+    } while (0)
+
+/* PUSH_MULTIPLE pushes a linked list of elements onto the stack.  It
+   assumes that ep0 is the head of the linked list and ep1 is at the tail */
+#define GENERIC_S_PUSH_MULTIPLE(sp, ep0, ep1, next_field) do {  \
+        (ep1)->next_field = (sp)->top;                          \
+        (sp)->top = ep0;                                        \
+    } while (0)
+
+#define GENERIC_S_POP(sp, ep, next_field) do {  \
+        *(ep) = (sp)->top;                      \
+        (sp)->top = (*(ep))->next_field;        \
+    } while (0)
+#endif /* TCP_MODULE_QUEUE_H */

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_send.c (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_send.c)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_send.c	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_send.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,579 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "wintcp_impl.h"
+
+#define NUM_PREALLOC_SENDQ 10
+#define MAX_SEND_IOV 10
+
+#define SENDQ_EMPTY(q) GENERIC_Q_EMPTY (q)
+#define SENDQ_HEAD(q) GENERIC_Q_HEAD (q)
+#define SENDQ_ENQUEUE(qp, ep) GENERIC_Q_ENQUEUE (qp, ep, dev.next)
+#define SENDQ_DEQUEUE(qp, ep) GENERIC_Q_DEQUEUE (qp, ep, dev.next)
+
+
+typedef struct MPID_nem_newtcp_module_send_q_element
+{
+    struct MPID_nem_newtcp_module_send_q_element *next;
+    size_t len;                        /* number of bytes left to send */
+    char *start;                       /* pointer to next byte to send */
+    MPID_nem_cell_ptr_t cell;
+    /*     char buf[MPID_NEM_MAX_PACKET_LEN];*/ /* data to be sent */
+} MPID_nem_newtcp_module_send_q_element_t;
+
+struct {MPID_nem_newtcp_module_send_q_element_t *top;} free_buffers = {0};
+
+#define ALLOC_Q_ELEMENT(e) do {                                                                                                         \
+        if (S_EMPTY (free_buffers))                                                                                                     \
+        {                                                                                                                               \
+            MPIU_CHKPMEM_MALLOC (*(e), MPID_nem_newtcp_module_send_q_element_t *, sizeof(MPID_nem_newtcp_module_send_q_element_t),      \
+                                 mpi_errno, "send queue element");                                                                      \
+        }                                                                                                                               \
+        else                                                                                                                            \
+        {                                                                                                                               \
+            S_POP (&free_buffers, e);                                                                                                   \
+        }                                                                                                                               \
+    } while (0)
+
+/* FREE_Q_ELEMENTS() frees a list if elements starting at e0 through e1 */
+#define FREE_Q_ELEMENTS(e0, e1) S_PUSH_MULTIPLE (&free_buffers, e0, e1)
+#define FREE_Q_ELEMENT(e) S_PUSH (&free_buffers, e)
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_send_init
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_send_init()
+{
+    int mpi_errno = MPI_SUCCESS;
+    int i;
+    MPIU_CHKPMEM_DECL (NUM_PREALLOC_SENDQ);
+    
+    /* preallocate sendq elements */
+    for (i = 0; i < NUM_PREALLOC_SENDQ; ++i)
+    {
+        MPID_nem_newtcp_module_send_q_element_t *e;
+        
+        MPIU_CHKPMEM_MALLOC (e, MPID_nem_newtcp_module_send_q_element_t *,
+                             sizeof(MPID_nem_newtcp_module_send_q_element_t), mpi_errno, "send queue element");
+        S_PUSH (&free_buffers, e);
+    }
+
+    MPIU_CHKPMEM_COMMIT();
+    return mpi_errno;
+ fn_fail:
+    MPIU_CHKPMEM_REAP();
+    return mpi_errno;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_send
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_send (MPIDI_VC_t *vc, MPID_nem_cell_ptr_t cell, int datalen)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIU_Assert(0);
+    return mpi_errno;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_send_queued
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_send_queued (MPIDI_VC_t *vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_Request *sreq;
+    MPIDI_msg_sz_t offset;
+    MPID_IOV *iov;
+    int complete;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_SEND_QUEUED);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_SEND_QUEUED);
+
+    MPIU_Assert(vc != NULL);
+
+    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
+	goto fn_exit;
+
+    while (!SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
+    {
+        sreq = SENDQ_HEAD(VC_FIELD(vc, send_queue));
+        
+        iov = &sreq->dev.iov[sreq->dev.iov_offset];
+
+/*         printf("sreq = %p sreq->dev.iov = %p iov = %p\n", sreq, sreq->dev.iov, iov); */
+/*         printf("iov[0].MPID_IOV_BUF = %p iov[0].MPID_IOV_LEN = %d iov_count = %d\n", iov[0].MPID_IOV_BUF, iov[0].MPID_IOV_LEN, sreq->dev.iov_count);//DARIUS */
+/*         printf("&iov[0].MPID_IOV_LEN = %p sreq->dev.iov_offset = %d\n", &iov[0].MPID_IOV_LEN, sreq->dev.iov_offset);//DARIUS */
+        MPIU_OSW_RETRYON_INTR((offset < 0), 
+            (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, sreq->dev.iov_count, &offset)));
+        if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+        if(offset < 0){
+            offset = 0;
+        }
+
+        MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write %d", offset);
+
+        complete = 1;
+        for (iov = &sreq->dev.iov[sreq->dev.iov_offset]; iov < &sreq->dev.iov[sreq->dev.iov_offset + sreq->dev.iov_count]; ++iov)
+        {
+            if (offset < iov->MPID_IOV_LEN)
+            {
+                iov->MPID_IOV_BUF = (char *)iov->MPID_IOV_BUF + offset;
+                iov->MPID_IOV_LEN -= offset;
+                /* iov_count should be equal to the number of iov's remaining */
+                sreq->dev.iov_count -= ((iov - sreq->dev.iov) - sreq->dev.iov_offset);
+                sreq->dev.iov_offset = iov - sreq->dev.iov;
+                complete = 0;
+                break;
+            }
+            offset -= iov->MPID_IOV_LEN;
+        }
+        if (!complete)
+        {
+            /* writev couldn't write the entire iov, give up for now */
+            break;
+        }
+        else
+        {
+            /* sent whole message */
+            int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+
+            reqFn = sreq->dev.OnDataAvail;
+            if (!reqFn)
+            {
+                MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+                MPIDI_CH3U_Request_complete(sreq);
+                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+                SENDQ_DEQUEUE(&VC_FIELD(vc, send_queue), &sreq);
+                break;
+            }
+
+            complete = 0;
+            mpi_errno = reqFn(vc, sreq, &complete);
+            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            
+            if (complete)
+            {
+                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+                SENDQ_DEQUEUE(&VC_FIELD(vc, send_queue), &sreq);
+                break;
+            }
+            sreq->dev.iov_offset = 0;
+        }
+    }
+
+    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue))){
+        mpi_errno = MPID_nem_newtcp_module_conn_wr_disable(vc);
+        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+    }
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_SEND_QUEUED);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_send_finalize
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_send_finalize()
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    while (!S_EMPTY (free_buffers))
+    {
+        MPID_nem_newtcp_module_send_q_element_t *e;
+        S_POP (&free_buffers, &e);
+        MPIU_Free (e);
+    }
+    return mpi_errno;
+}
+
+/* MPID_nem_newtcp_module_conn_est -- this function is called when the
+   connection is finally extablished to send any pending sends */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_conn_est
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_conn_est (MPIDI_VC_t *vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONN_EST);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONN_EST);
+
+    if (!SENDQ_EMPTY (VC_FIELD(vc, send_queue)))
+    {
+        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
+        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+        mpi_errno = MPID_nem_newtcp_module_send_queued (vc);
+        if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+    }
+
+ fn_fail:    
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_MODULE_CONN_EST);
+    return mpi_errno;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_iStartContigMsg
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_iStartContigMsg(MPIDI_VC_t *vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz,
+                                    MPID_Request **sreq_ptr)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPID_Request * sreq = NULL;
+    MPIDI_msg_sz_t offset = 0;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_ISTARTCONTIGMSG);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_ISTARTCONTIGMSG);
+    
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    
+    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newtcp_iStartContigMsg");
+    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
+    if (MPID_nem_newtcp_module_vc_is_connected(vc))
+    {
+        if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
+        {
+            MPID_IOV iov[2];
+
+            iov[0].MPID_IOV_BUF = hdr;
+            iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
+            iov[1].MPID_IOV_BUF = data;
+            iov[1].MPID_IOV_LEN = data_sz;
+        
+            MPIU_OSW_RETRYON_INTR((offset < 0),
+                (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, 2, &offset)));
+            if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+            if(offset < 0){
+                offset = 0;
+            }
+
+            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write %d", offset);
+
+            if (offset == sizeof(MPIDI_CH3_PktGeneric_t) + data_sz)
+            {
+                /* sent whole message */
+                *sreq_ptr = NULL;
+                goto fn_exit;
+            }
+        }
+    }
+    else
+    {
+        mpi_errno = MPID_nem_newtcp_module_connect(vc);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    }
+
+    /* create and enqueue request */
+    MPIU_DBG_MSG (CH3_CHANNEL, VERBOSE, "enqueuing");
+
+    /* create a request */
+    sreq = MPID_Request_create();
+    MPIU_Assert (sreq != NULL);
+    MPIU_Object_set_ref (sreq, 2);
+    sreq->kind = MPID_REQUEST_SEND;
+
+    sreq->dev.OnDataAvail = 0;
+    sreq->ch.vc = vc;
+    sreq->dev.iov_offset = 0;
+
+/*     printf("&sreq->dev.pending_pkt = %p sizeof(MPIDI_CH3_PktGeneric_t) = %d\n", &sreq->dev.pending_pkt, sizeof(MPIDI_CH3_PktGeneric_t));//DARIUS */
+/*     printf("offset = %d\n", offset);//DARIUS */
+
+    if (offset < sizeof(MPIDI_CH3_PktGeneric_t))
+    {
+        sreq->dev.pending_pkt = *(MPIDI_CH3_PktGeneric_t *)hdr;
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt + offset;
+        sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t) - offset ;
+        if (data_sz)
+        {
+            sreq->dev.iov[1].MPID_IOV_BUF = data;
+            sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
+            sreq->dev.iov_count = 2;
+        }
+        else
+            sreq->dev.iov_count = 1;
+    }
+    else
+    {
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *)data + (offset - sizeof(MPIDI_CH3_PktGeneric_t));
+        sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_PktGeneric_t));
+        sreq->dev.iov_count = 1;
+    }
+    
+    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0);
+
+/*     printf("sreq = %p sreq->dev.iov = %p\n", sreq, sreq->dev.iov); */
+/*     printf("sreq->dev.iov[0].MPID_IOV_BUF = %p\n", sreq->dev.iov[0].MPID_IOV_BUF);//DARIUS */
+/*     printf("sreq->dev.iov[0].MPID_IOV_LEN = %d\n", sreq->dev.iov[0].MPID_IOV_LEN);//DARIUS */
+/*     printf("&sreq->dev.iov[0].MPID_IOV_LEN = %p\n", &sreq->dev.iov[0].MPID_IOV_LEN);//DARIUS */
+
+    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)) && MPID_nem_newtcp_module_vc_is_connected(vc)){
+        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
+        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+    }
+    SENDQ_ENQUEUE(&VC_FIELD(vc, send_queue), sreq);
+
+    *sreq_ptr = sreq;
+    
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_ISTARTCONTIGMSG);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_iSendContig
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_iSendContig(MPIDI_VC_t *vc, MPID_Request *sreq, void *hdr, MPIDI_msg_sz_t hdr_sz,
+                                void *data, MPIDI_msg_sz_t data_sz)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_msg_sz_t offset = 0;
+    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWTCP_ISENDCONTIGMSG);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWTCP_ISENDCONTIGMSG);
+    
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t));
+    
+    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newtcp_iSendContig");
+
+    MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *)hdr);
+    if (MPID_nem_newtcp_module_vc_is_connected(vc))
+    {
+        if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
+        {
+            MPID_IOV iov[2];
+
+            iov[0].MPID_IOV_BUF = hdr;
+            iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
+            iov[1].MPID_IOV_BUF = data;
+            iov[1].MPID_IOV_LEN = data_sz;
+        
+            MPIU_OSW_RETRYON_INTR((offset < 0),
+                (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, 2, &offset)));
+            if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+            if(offset < 0){
+                offset = 0;
+            }
+
+            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write %d", offset);
+
+            if (offset == sizeof(MPIDI_CH3_PktGeneric_t) + data_sz)
+            {
+                /* sent whole message */
+                int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+
+                reqFn = sreq->dev.OnDataAvail;
+                if (!reqFn)
+                {
+                    MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
+                    MPIDI_CH3U_Request_complete(sreq);
+                    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+                    goto fn_exit;
+                }
+                else
+                {
+                    int complete = 0;
+                
+                    mpi_errno = reqFn(vc, sreq, &complete);
+                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+
+                    if (complete)
+                    {
+                        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+                        goto fn_exit;
+                    }
+
+                    /* not completed: more to send */
+                    goto enqueue_request;
+                }
+            }
+        }
+    }
+    else
+    {
+        mpi_errno = MPID_nem_newtcp_module_connect(vc);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    }
+
+
+    /* save iov */
+    if (offset < sizeof(MPIDI_CH3_PktGeneric_t))
+    {
+        sreq->dev.pending_pkt = *(MPIDI_CH3_PktGeneric_t *)hdr;
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *)&sreq->dev.pending_pkt + offset;
+        sreq->dev.iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t) - offset;
+        if (data_sz)
+        {
+            sreq->dev.iov[1].MPID_IOV_BUF = data;
+            sreq->dev.iov[1].MPID_IOV_LEN = data_sz;
+            sreq->dev.iov_count = 2;
+        }
+        else
+            sreq->dev.iov_count = 1;
+    }
+    else
+    {
+        sreq->dev.iov[0].MPID_IOV_BUF = (char *)data + (offset - sizeof(MPIDI_CH3_PktGeneric_t));
+        sreq->dev.iov[0].MPID_IOV_LEN = data_sz - (offset - sizeof(MPIDI_CH3_PktGeneric_t));
+        sreq->dev.iov_count = 1;
+    }
+
+ enqueue_request:
+    /* enqueue request */
+    MPIU_DBG_MSG (CH3_CHANNEL, VERBOSE, "enqueuing");
+    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0);
+
+    sreq->ch.vc = vc;
+    sreq->dev.iov_offset = 0;
+
+    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)) && MPID_nem_newtcp_module_vc_is_connected(vc)){
+        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
+        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+    }
+    SENDQ_ENQUEUE(&VC_FIELD(vc, send_queue), sreq);
+
+ fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWTCP_ISENDCONTIGMSG);
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_SendNoncontig
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_SendNoncontig(MPIDI_VC_t *vc, MPID_Request *sreq, void *header, MPIDI_msg_sz_t hdr_sz)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int iov_n;
+    MPID_IOV iov[MPID_IOV_LIMIT];
+    MPID_IOV *iov_p;
+    MPIDI_msg_sz_t offset;
+    int complete;
+
+    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "newtcp_SendNoncontig");
+    MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_PktGeneric_t));
+    
+    iov[0].MPID_IOV_BUF = header;
+    iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
+
+    iov_n = MPID_IOV_LIMIT - 1;
+    /* On the initial load of a send iov req, set the OnFinal action (null
+       for point-to-point) */
+    mpi_errno = MPIDI_CH3U_Request_load_send_iov(sreq, &iov[1], &iov_n);
+    MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|loadsendiov");
+
+    iov_n += 1;
+    offset = 0;
+
+    if (MPID_nem_newtcp_module_vc_is_connected(vc))
+    {
+        if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)))
+        {
+            MPIU_OSW_RETRYON_INTR((offset < 0),
+                (mpi_errno = MPIU_SOCKW_Writev(VC_FIELD(vc, sc)->fd, iov, iov_n, &offset)));
+            if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+            if(offset < 0){
+                offset = 0;
+            }
+
+            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write noncontig %d", offset);
+        }
+    }
+    else
+    {
+        mpi_errno = MPID_nem_newtcp_module_connect(vc);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+    }
+
+    if (offset < iov[0].MPID_IOV_LEN)
+    {
+        /* header was not yet sent, save it in req */
+        sreq->dev.pending_pkt = *(MPIDI_CH3_PktGeneric_t *)header;
+        iov[0].MPID_IOV_BUF = &sreq->dev.pending_pkt;
+        iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_PktGeneric_t);
+    }
+
+    /* check if whole iov was sent, and save any unsent portion of iov */
+    sreq->dev.iov_count = 0;
+    complete = 1;
+    for (iov_p = &iov[0]; iov_p < &iov[iov_n]; ++iov_p)
+    {
+        if (offset < iov_p->MPID_IOV_LEN)
+        {
+            sreq->dev.iov[sreq->dev.iov_count].MPID_IOV_BUF = (char *)iov_p->MPID_IOV_BUF + offset;
+            sreq->dev.iov[sreq->dev.iov_count].MPID_IOV_LEN = iov_p->MPID_IOV_LEN - offset;
+            offset = 0;
+            ++sreq->dev.iov_count;
+            complete = 0;
+        }
+        else
+            offset -= iov_p->MPID_IOV_LEN;
+    }
+        
+    if (complete)
+    {
+        /* sent whole iov */
+        int (*reqFn)(MPIDI_VC_t *, MPID_Request *, int *);
+
+        reqFn = sreq->dev.OnDataAvail;
+        if (!reqFn)
+        {
+            MPIDI_CH3U_Request_complete(sreq);
+            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+            goto fn_exit;
+        }
+
+        complete = 0;
+        mpi_errno = reqFn(vc, sreq, &complete);
+        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
+            
+        if (complete)
+        {
+            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
+            goto fn_exit;
+        }
+    }
+        
+    /* enqueue request */
+    MPIU_DBG_MSG (CH3_CHANNEL, VERBOSE, "enqueuing");
+    MPIU_Assert(sreq->dev.iov_count >= 1 && sreq->dev.iov[0].MPID_IOV_LEN > 0);
+        
+    sreq->ch.vc = vc;
+    sreq->dev.iov_offset = 0;
+        
+    if (SENDQ_EMPTY(VC_FIELD(vc, send_queue)) && MPID_nem_newtcp_module_vc_is_connected(vc)){
+        mpi_errno = MPID_nem_newtcp_module_conn_wr_enable(vc);
+        if(mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
+    }
+    SENDQ_ENQUEUE(&VC_FIELD(vc, send_queue), sreq);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    MPIU_Object_set_ref(sreq, 0);
+    MPIDI_CH3_Request_destroy(sreq);
+    goto fn_exit;
+}

Copied: mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_utility.c (from rev 4035, mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_module_utility.c)
===================================================================
--- mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_utility.c	                        (rev 0)
+++ mpich2/trunk/src/mpid/ch3/channels/nemesis/nemesis/netmod/wintcp_module/wintcp_utility.c	2009-03-12 20:33:40 UTC (rev 4037)
@@ -0,0 +1,156 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *  (C) 2006 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "wintcp_impl.h"
+
+/* MPID_nem_newtcp_module_get_conninfo -- This function takes a VC
+   pointer as input and outputs the sockaddr, pg_id, and pg_rank of
+   the remote process associated with this VC.  [NOTE: I'm not sure
+   yet, if the pg_id parameters will be char* or char**.  I'd like to
+   avoid a copy on this.] */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_get_conninfo
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_get_conninfo (struct MPIDI_VC *vc, struct sockaddr_in *addr, char **pg_id, int *pg_rank)
+{
+    int mpi_errno = MPI_SUCCESS;
+
+    *addr = VC_FIELD(vc, sock_id);
+    *pg_id = (char *)vc->pg->id;
+    *pg_rank = vc->pg_rank;
+    
+    return mpi_errno;
+}
+
+/* MPID_nem_newtcp_module_get_vc_from_conninfo -- This function takes
+   the pg_id and pg_rank and returns the corresponding VC. */
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_get_vc_from_conninfo
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_get_vc_from_conninfo (char *pg_id, int pg_rank, struct MPIDI_VC **vc)
+{
+    int mpi_errno = MPI_SUCCESS;
+    MPIDI_PG_t *pg;
+    
+    mpi_errno = MPIDI_PG_Find (pg_id, &pg);
+    if (mpi_errno) MPIU_ERR_POP (mpi_errno);
+
+    MPIU_ERR_CHKANDJUMP1 (pg == NULL, mpi_errno, MPI_ERR_OTHER, "**intern", "**intern %s", "invalid PG");
+    MPIU_ERR_CHKANDJUMP1 (pg_rank < 0 || pg_rank > MPIDI_PG_Get_size (pg), mpi_errno, MPI_ERR_OTHER, "**intern", "**intern %s", "invalid pg_rank");
+        
+    MPIDI_PG_Get_vc (pg, pg_rank, vc);
+    
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+#undef FUNCNAME
+#define FUNCNAME set_sockopts
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+int MPID_nem_newtcp_module_set_sockopts (int fd)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int option, flags;
+    int ret;
+    socklen_t len;
+
+    /* I heard you have to read the options after setting them in some implementations */
+
+    option = 1;
+    len = sizeof(int);
+
+    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, IPPROTO_TCP, TCP_NODELAY, &option, len);
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    option = MPID_NEM_NEWTCP_MODULE_RCVBUF_SZ;
+    len = sizeof(int);
+
+    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, SOL_SOCKET, SO_RCVBUF, &option, len);
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    option = MPID_NEM_NEWTCP_MODULE_SNDBUF_SZ;
+    len = sizeof(int);
+
+    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, SOL_SOCKET, SO_SNDBUF, &option, len);
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+    
+    option = 1;
+    len = sizeof(int);
+
+    mpi_errno = MPIU_SOCKW_Sock_setopt(fd, SOL_SOCKET, SO_REUSEADDR, &option, len);
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+    option = 1;
+    mpi_errno = MPIU_SOCKW_Sock_cntrl_nb(fd, option);
+    if(mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);
+
+ fn_exit:
+    return mpi_errno;
+ fn_fail:
+    goto fn_exit;
+}
+
+
+/*
+  MPID_NEM_NEWTCP_MODULE_SOCK_ERROR_EOF : connection failed
+  MPID_NEM_NEWTCP_MODULE_SOCK_CONNECTED : socket connected (connection success)
+  MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT   : No event on socket
+
+N1: some implementations do not set POLLERR when there is a pending error on socket.
+So, solution is to check for readability/writeablility and then call getsockopt.
+Again, getsockopt behaves differently in different implementations which  is handled
+safely here (per Stevens-Unix Network Programming)
+
+N2: As far as the socket code is concerned, it doesn't really differentiate whether
+there is an error in the socket or whether the peer has closed it (i.e we have received
+EOF and hence recv returns 0). Either way, we deccide the socket fd is not usable any
+more. So, same return code is used.
+A design decision is not to write also, if the peer has closed the socket. Please note that
+write will still be succesful, even if the peer has sent us FIN. Only the subsequent 
+write will fail. So, this function is made tight enough and this should be called
+before doing any read/write at least in the connection establishment state machine code.
+
+N3: return code MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT is used only by the code that wants to
+know whether the connect is still not complete after a non-blocking connect is issued.
+
+TODO: Make this a macro for performance, if needed based on the usage.
+FIXME : Above comments are inconsistent now with the changes. No check for EOF is 
+actually done now in this function.
+*/
+
+
+#undef FUNCNAME
+#define FUNCNAME MPID_nem_newtcp_module_check_sock_status
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+MPID_NEM_NEWTCP_MODULE_SOCK_STATUS_t 
+MPID_nem_newtcp_module_check_sock_status(MPIU_SOCKW_Waitset_sock_hnd_t fd_ws_hnd)
+{
+    int rc = MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT;
+
+    /* FIXME: At least on windows there is no guarantee that a successful socket call
+     * resets the socket error code
+     * We will assume that sock is connected for now and let the state machine handle
+     * errors
+     */
+    if(MPIU_SOCKW_Waitset_is_sock_readable(fd_ws_hnd) ||
+        MPIU_SOCKW_Waitset_is_sock_writeable(fd_ws_hnd)){
+        rc = MPID_NEM_NEWTCP_MODULE_SOCK_CONNECTED;
+    }
+    else{
+        /* Should we return no event for OOB data - EX ? */
+        rc = MPID_NEM_NEWTCP_MODULE_SOCK_NOEVENT;
+    }
+
+ fn_exit:
+    return rc;
+}
+



More information about the mpich2-commits mailing list