[mpich2-commits] r6683 - in mpich2/trunk/src/mpi/romio/adio: ad_xfs common include

robl at mcs.anl.gov robl at mcs.anl.gov
Wed May 19 15:52:43 CDT 2010


Author: robl
Date: 2010-05-19 15:52:43 -0500 (Wed, 19 May 2010)
New Revision: 6683

Modified:
   mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c
   mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c
   mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c
   mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c
   mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c
   mpich2/trunk/src/mpi/romio/adio/include/adio.h
   mpich2/trunk/src/mpi/romio/adio/include/adioi.h
Log:
from Michael Raymond <mraymond at sgi.com>: clean up XFS direct i/o


Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c	2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c	2010-05-19 20:52:43 UTC (rev 6683)
@@ -8,22 +8,62 @@
 #include "ad_xfs.h"
 #include "adio_extern.h"
 
+static unsigned xfs_direct_read_chunk_size;
+static unsigned xfs_direct_write_chunk_size;
+
 void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
 {
-    char *value;
+    char *value, * c;
     int flag;
+    static char xfs_initialized = 0;
 
     if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
 
-    /* the nightly builds say somthing is calling MPI_Info_set w/ a null info,
-     * so protect the calls to MPI_Info_set */
-    if (fd->info != MPI_INFO_NULL ) {
-	    MPI_Info_set(fd->info, "direct_read", "false");
-	    MPI_Info_set(fd->info, "direct_write", "false");
-	    fd->direct_read = fd->direct_write = 0;
-    }
-	
-    /* has user specified values for keys "direct_read" and "direct wirte"? */
+    MPI_Info_set(fd->info, "direct_read", "false");
+    MPI_Info_set(fd->info, "direct_write", "false");
+    fd->direct_read = fd->direct_write = 0;
+
+	if (!xfs_initialized) {
+		xfs_initialized = 1;
+		c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE");
+		if (c) {
+			int io;
+			io = atoi(c);
+			if (io <= 0) {
+				fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n"
+"     It must be set to a positive integer value.\n");
+			} else {
+				xfs_direct_read_chunk_size = io;
+			}
+		} else {
+			xfs_direct_read_chunk_size = 0;
+		}
+
+		c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE");
+		if (c) {
+			int io;
+			io = atoi(c);
+			if (io <= 0) {
+				fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n"
+"     It must be set to a positive integer value.\n");
+			} else {
+				xfs_direct_write_chunk_size = io;
+			}
+		} else {
+			xfs_direct_write_chunk_size = 0;
+		}
+	}
+
+	if (!fd->hints->initialized) {
+		fd->hints->fs_hints.xfs.read_chunk_sz =
+			xfs_direct_read_chunk_size;
+		fd->hints->fs_hints.xfs.write_chunk_sz =
+			xfs_direct_write_chunk_size;
+	}
+
+    /* has user specified values for keys "direct_read" and "direct write"? */
     if (users_info != MPI_INFO_NULL) {
 	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
@@ -47,8 +87,10 @@
     /* set the values for collective I/O and data sieving parameters */
     ADIOI_GEN_SetInfo(fd, users_info, error_code);
 
+    /* Environment variables override MPI_Info hints */
     if (ADIOI_Direct_read) fd->direct_read = 1;
     if (ADIOI_Direct_write) fd->direct_write = 1;
+
     /* environment variables checked in ADIO_Init */
 
     *error_code = MPI_SUCCESS;

Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c	2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c	2010-05-19 20:52:43 UTC (rev 6683)
@@ -5,26 +5,26 @@
  *   See COPYRIGHT notice in top-level directory.
  */
 
+#define _GNU_SOURCE          // for O_DIRECT
+
 #include "ad_xfs.h"
+#include <sys/ioctl.h>
 #ifdef HAVE_STDDEF_H
 #include <stddef.h>
 #endif
 
-#if defined(MPISGI)
-#include <mpitypedefs.h>
-#include <mpifunctions.h>
-#endif
-
 #ifndef HAVE_LSEEK64
 #define lseek64 lseek
 #endif
 
 void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
 {
-    int perm, amode, amode_direct;
+    int perm, amode, amode_direct, factor;
     unsigned int old_mask;
     struct dioattr st;
     static char myname[] = "ADIOI_XFS_OPEN";
+    unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
+    unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
 
     if (fd->perm == ADIO_PERM_NULL) {
 	old_mask = umask(022);
@@ -53,7 +53,7 @@
     fd->fd_direct = open(fd->filename, amode_direct, perm);
     if (fd->fd_direct != -1) {
 
-#if defined(LINUX) && defined(MPISGI)
+#if defined(MPISGI)
 	ioctl(fd->fd_direct, XFS_IOC_DIOINFO, &st);
 #else
 	fcntl(fd->fd_direct, F_DIOINFO, &st);
@@ -61,8 +61,35 @@
 
 	fd->d_mem = st.d_mem;
 	fd->d_miniosz = st.d_miniosz;
-	fd->d_maxiosz = st.d_maxiosz;
 
+	if (read_chunk_sz == 0) {
+		fd->hints->fs_hints.xfs.read_chunk_sz = st.d_maxiosz;
+	} else {
+		/*
+		 * MPIO_DIRECT_READ_CHUNK_SIZE was set.
+		 * Make read_chunk_sz a multiple of d_miniosz.
+		 */
+		factor = read_chunk_sz / fd->d_miniosz;
+		if (factor == 0 || read_chunk_sz != fd->d_miniosz * factor) {
+			fd->hints->fs_hints.xfs.read_chunk_sz =
+				fd->d_miniosz * (factor + 1);
+		}
+	}
+
+	if (write_chunk_sz == 0) {
+		fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz;
+	} else {
+		/*
+		 * MPIO_DIRECT_WRITE_CHUNK_SIZE was set. 
+		 * Make write_chunk_sz a multiple of d_miniosz.
+		 */
+		factor = write_chunk_sz / fd->d_miniosz;
+		if (factor == 0 || write_chunk_sz != fd->d_miniosz * factor) {
+			fd->hints->fs_hints.xfs.write_chunk_sz =
+				fd->d_miniosz * (factor + 1);
+		}
+	}
+
 	if (fd->d_mem > XFS_MEMALIGN) {
 	    FPRINTF(stderr, "MPI: Run-time Direct-IO memory alignment, %d, does not match compile-time value, %d.\n",
 		fd->d_mem, XFS_MEMALIGN);

Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c	2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c	2010-05-19 20:52:43 UTC (rev 6683)
@@ -102,6 +102,7 @@
               ADIO_Offset offset, int *err)
 {
     int ntimes, rem, newrem, i, size, nbytes;
+    unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
 
     /* memory buffer is aligned, offset in file is aligned,
        io_size may or may not be of the right size.
@@ -109,33 +110,33 @@
        use buffered I/O for remaining. */
 
     if (!(len % fd->d_miniosz) && 
-	(len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
+	(len >= fd->d_miniosz) && (len <= read_chunk_sz))
 	*err = pread(fd->fd_direct, buf, len, offset);
     else if (len < fd->d_miniosz)
 	*err = pread(fd->fd_sys, buf, len, offset);
-    else if (len > fd->d_maxiosz) {
-	ntimes = len/(fd->d_maxiosz);
-	rem = len - ntimes * fd->d_maxiosz;
+    else if (len > read_chunk_sz) {
+	ntimes = len/(read_chunk_sz);
+	rem = len - ntimes * read_chunk_sz;
 	nbytes = 0;
 	for (i=0; i<ntimes; i++) {
-	    nbytes += pread(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
-			 fd->d_maxiosz, offset);
-	    offset += fd->d_maxiosz;
+	    nbytes += pread(fd->fd_direct, ((char *)buf) + i * read_chunk_sz,
+			 read_chunk_sz, offset);
+	    offset += read_chunk_sz;
 	}
 	if (rem) {
 	    if (!(rem % fd->d_miniosz))
 		nbytes += pread(fd->fd_direct, 
-		     ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
+		     ((char *)buf) + ntimes * read_chunk_sz, rem, offset);
 	    else {
 		newrem = rem % fd->d_miniosz;
 		size = rem - newrem;
 		if (size) {
 		    nbytes += pread(fd->fd_direct, 
-		         ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+		         ((char *)buf) + ntimes * read_chunk_sz, size, offset);
 		    offset += size;
 		}
 		nbytes += pread(fd->fd_sys, 
-	              ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+	              ((char *)buf) + ntimes * read_chunk_sz + size, newrem, offset);
 	    }
 	}
 	*err = nbytes;

Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c	2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c	2010-05-19 20:52:43 UTC (rev 6683)
@@ -13,14 +13,15 @@
 
 /* style: allow:free:2 sig:0 */
 
-static void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, 
-					     ADIO_Offset offset, int *err);
+static int ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf,
+						  ADIO_Offset len, ADIO_Offset offset);
 
 void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, 
                      MPI_Datatype datatype, int file_ptr_type,
 		     ADIO_Offset offset, ADIO_Status *status, int *error_code)
 {
-    int err=-1, datatype_size, len, diff, size, nbytes;
+    int err=-1, datatype_size, diff, size;
+    ssize_t len;
     void *newbuf;
     static char myname[] = "ADIOI_XFS_WRITECONTIG";
 
@@ -31,44 +32,48 @@
 
     if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
 
-    if (!(fd->direct_write))     /* direct I/O not enabled */
+    if (!(fd->direct_write)) {    /* direct I/O not enabled */
 	err = pwrite(fd->fd_sys, buf, len, offset);
-    else {       /* direct I/O enabled */
+	if (err < 0) {goto leaving;}
+    } else {       /* direct I/O enabled */
 
 	/* (1) if mem_aligned && file_aligned 
                     use direct I/O to write up to correct io_size
                     use buffered I/O for remaining  */
 
-	if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) 
-	    ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset, &err);
+	if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) {
+	    err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset);
+	    if (err < 0) {goto leaving;}
 
         /* (2) if !file_aligned
                     use buffered I/O to write up to file_aligned
                     At that point, if still mem_aligned, use (1)
    		        else copy into aligned buf and then use (1) */
-	else if (offset % fd->d_miniosz) {
+	} else if (offset % fd->d_miniosz) {
 	    diff = fd->d_miniosz - (offset % fd->d_miniosz);
 	    diff = ADIOI_MIN(diff, len);
-	    nbytes = pwrite(fd->fd_sys, buf, diff, offset);
+	    err = pwrite(fd->fd_sys, buf, diff, offset);
+	    if (err < 0) {goto leaving;}
 
 	    buf = ((char *) buf) + diff;
 	    offset += diff;
 	    size = len - diff;
 	    if (!(((long) buf) % fd->d_mem)) {
-		ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset, &err);
-		nbytes += err;
+		err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset);
+		if (err < 0) {goto leaving;}
 	    }
 	    else {
 		newbuf = (void *) memalign(XFS_MEMALIGN, size);
 		if (newbuf) {
 		    memcpy(newbuf, buf, size);
-		    ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
-		    nbytes += err;
+		    err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset);
 		    ADIOI_Free(newbuf);
+		    if (err < 0) {goto leaving;}
+		} else {
+		    err = pwrite(fd->fd_sys, buf, size, offset);
+		    if (err < 0) {goto leaving;}
 		}
-		else nbytes += pwrite(fd->fd_sys, buf, size, offset);
 	    }
-	    err = nbytes;
 	}
 
         /* (3) if !mem_aligned && file_aligned
@@ -77,19 +82,22 @@
 	    newbuf = (void *) memalign(XFS_MEMALIGN, len);
 	    if (newbuf) {
 		memcpy(newbuf, buf, len);
-		ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset, &err);
+		err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset);
 		ADIOI_Free(newbuf);
+	    } else {
+		 err = pwrite(fd->fd_sys, buf, len, offset);
 	    }
-	    else err = pwrite(fd->fd_sys, buf, len, offset);
+
+	    if (err < 0) {goto leaving;}
 	}
     }
 
-    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += err;
+    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
 
 #ifdef HAVE_STATUS_SET_BYTES
-    if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
+    if (err != -1) MPIR_Status_set_bytes(status, datatype, len);
 #endif
-
+leaving:
     if (err == -1) {
 	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
 					   myname, __LINE__, MPI_ERR_IO, "**io",
@@ -99,10 +107,13 @@
 }
 
 
-void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, 
-              ADIO_Offset offset, int *err)
+static int
+ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, 
+              ADIO_Offset offset)
 {
-    int ntimes, rem, newrem, i, size, nbytes;
+    unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
+    ADIO_Offset nbytes, rem, newrem, size;
+    int ntimes, i;
 
     /* memory buffer is aligned, offset in file is aligned,
        io_size may or may not be of the right size.
@@ -110,42 +121,50 @@
        use buffered I/O for remaining. */
 
     if (!(len % fd->d_miniosz) && 
-	(len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
-	*err = pwrite(fd->fd_direct, buf, len, offset);
-    else if (len < fd->d_miniosz)
-	*err = pwrite(fd->fd_sys, buf, len, offset);
-    else if (len > fd->d_maxiosz) {
-	ntimes = len/(fd->d_maxiosz);
-	rem = len - ntimes * fd->d_maxiosz;
+	 (len >= fd->d_miniosz) && (len <= write_chunk_sz)) {
+	nbytes = pwrite(fd->fd_direct, buf, len, offset);
+	if (nbytes < 0) {return -1;}
+    } else if (len < fd->d_miniosz) {
+	nbytes = pwrite(fd->fd_sys, buf, len, offset);
+	if (nbytes < 0) {return -1;}
+    } else if (len > write_chunk_sz) {
+	ntimes = len/(write_chunk_sz);
+	rem = len - ntimes * write_chunk_sz;
 	nbytes = 0;
 	for (i=0; i<ntimes; i++) {
-	    nbytes += pwrite(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
-			 fd->d_maxiosz, offset);
-	    offset += fd->d_maxiosz;
+	    nbytes = pwrite(fd->fd_direct, ((char *)buf) + i * write_chunk_sz,
+			 write_chunk_sz, offset);
+	    offset += write_chunk_sz;
+	    if (nbytes < 0) {return -1;}
 	}
 	if (rem) {
-	    if (!(rem % fd->d_miniosz))
-		nbytes += pwrite(fd->fd_direct, 
-		             ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
-	    else {
+	    if (!(rem % fd->d_miniosz)) {
+		nbytes = pwrite(fd->fd_direct, 
+		             ((char *)buf) + ntimes * write_chunk_sz, rem, offset);
+		if (nbytes < 0) {return -1;}
+	    } else {
 		newrem = rem % fd->d_miniosz;
 		size = rem - newrem;
 		if (size) {
-		    nbytes += pwrite(fd->fd_direct, 
-		            ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+		    nbytes = pwrite(fd->fd_direct, 
+		            ((char *)buf) + ntimes * write_chunk_sz, size, offset);
 		    offset += size;
+		    if (nbytes < 0) {return -1;}
 		}
-		nbytes += pwrite(fd->fd_sys, 
-	              ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+		nbytes = pwrite(fd->fd_sys, 
+	              ((char *)buf) + ntimes * write_chunk_sz + size, newrem, offset);
+		if (nbytes < 0) {return -1;}
 	    }
 	}
-	*err = nbytes;
     }
     else {
 	rem = len % fd->d_miniosz;
 	size = len - rem;
 	nbytes = pwrite(fd->fd_direct, buf, size, offset);
-	nbytes += pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
-	*err = nbytes;
+	if (nbytes < 0) {return -1;}
+	nbytes = pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
+	if (nbytes < 0) {return -1;}
     }
+
+    return 0;
 }

Modified: mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c	2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c	2010-05-19 20:52:43 UTC (rev 6683)
@@ -86,10 +86,24 @@
     int error_code;
     struct aiocb *aiocbp;
     ADIOI_AIO_Request *aio_req;
+#if defined(ROMIO_XFS)
+    unsigned maxiosz = wr ? fd->hints->fs_hints.xfs.write_chunk_sz :
+	    fd->hints->fs_hints.xfs.read_chunk_sz;
+#endif /* ROMIO_XFS */
 
-
     fd_sys = fd->fd_sys;
 
+#if defined(ROMIO_XFS)
+    /* Use Direct I/O if desired and properly aligned */
+    if (fd->fns == &ADIO_XFS_operations &&
+	 ((wr && fd->direct_write) || (!wr && fd->direct_read)) &&
+	 !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) && 
+	 !(len % fd->d_miniosz) && (len >= fd->d_miniosz) && 
+	 (len <= maxiosz)) {
+	    fd_sys = fd->fd_direct;
+    }
+#endif /* ROMIO_XFS */
+
     aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
     aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
     aiocbp->aio_offset = offset;

Modified: mpich2/trunk/src/mpi/romio/adio/include/adio.h
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/include/adio.h	2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/include/adio.h	2010-05-19 20:52:43 UTC (rev 6683)
@@ -195,7 +195,6 @@
     unsigned d_mem;          /* data buffer memory alignment */
     unsigned d_miniosz;      /* min xfer size, xfer size multiple,
                                 and file seek offset alignment */
-    unsigned d_maxiosz;      /* max xfer size */
     ADIO_Offset fp_ind;      /* individual file pointer in MPI-IO (in bytes)*/
     ADIO_Offset fp_sys_posn; /* current location of the system file-pointer
                                 in bytes */

Modified: mpich2/trunk/src/mpi/romio/adio/include/adioi.h
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/include/adioi.h	2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/include/adioi.h	2010-05-19 20:52:43 UTC (rev 6683)
@@ -70,6 +70,10 @@
                     int coll_threshold;
                     int ds_in_coll;
             } lustre;
+		struct {
+			unsigned read_chunk_sz; /* chunk size for direct reads */
+			unsigned write_chunk_sz; /* chunk size for direct writes */
+		} xfs;
     } fs_hints;
 
 };



More information about the mpich2-commits mailing list