[mpich2-commits] r6683 - in mpich2/trunk/src/mpi/romio/adio: ad_xfs common include
robl at mcs.anl.gov
robl at mcs.anl.gov
Wed May 19 15:52:43 CDT 2010
Author: robl
Date: 2010-05-19 15:52:43 -0500 (Wed, 19 May 2010)
New Revision: 6683
Modified:
mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c
mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c
mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c
mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c
mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c
mpich2/trunk/src/mpi/romio/adio/include/adio.h
mpich2/trunk/src/mpi/romio/adio/include/adioi.h
Log:
from Michael Raymond <mraymond at sgi.com>: clean up XFS direct i/o
Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c 2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_hints.c 2010-05-19 20:52:43 UTC (rev 6683)
@@ -8,22 +8,62 @@
#include "ad_xfs.h"
#include "adio_extern.h"
+static unsigned xfs_direct_read_chunk_size;
+static unsigned xfs_direct_write_chunk_size;
+
void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
- char *value;
+ char *value, * c;
int flag;
+ static char xfs_initialized = 0;
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
- /* the nightly builds say somthing is calling MPI_Info_set w/ a null info,
- * so protect the calls to MPI_Info_set */
- if (fd->info != MPI_INFO_NULL ) {
- MPI_Info_set(fd->info, "direct_read", "false");
- MPI_Info_set(fd->info, "direct_write", "false");
- fd->direct_read = fd->direct_write = 0;
- }
-
- /* has user specified values for keys "direct_read" and "direct wirte"? */
+ MPI_Info_set(fd->info, "direct_read", "false");
+ MPI_Info_set(fd->info, "direct_write", "false");
+ fd->direct_read = fd->direct_write = 0;
+
+ if (!xfs_initialized) {
+ xfs_initialized = 1;
+ c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE");
+ if (c) {
+ int io;
+ io = atoi(c);
+ if (io <= 0) {
+ fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n"
+" It must be set to a positive integer value.\n");
+ } else {
+ xfs_direct_read_chunk_size = io;
+ }
+ } else {
+ xfs_direct_read_chunk_size = 0;
+ }
+
+ c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE");
+ if (c) {
+ int io;
+ io = atoi(c);
+ if (io <= 0) {
+ fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n"
+" It must be set to a positive integer value.\n");
+ } else {
+ xfs_direct_write_chunk_size = io;
+ }
+ } else {
+ xfs_direct_write_chunk_size = 0;
+ }
+ }
+
+ if (!fd->hints->initialized) {
+ fd->hints->fs_hints.xfs.read_chunk_sz =
+ xfs_direct_read_chunk_size;
+ fd->hints->fs_hints.xfs.write_chunk_sz =
+ xfs_direct_write_chunk_size;
+ }
+
+ /* has user specified values for keys "direct_read" and "direct write"? */
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
@@ -47,8 +87,10 @@
/* set the values for collective I/O and data sieving parameters */
ADIOI_GEN_SetInfo(fd, users_info, error_code);
+ /* Environment variables override MPI_Info hints */
if (ADIOI_Direct_read) fd->direct_read = 1;
if (ADIOI_Direct_write) fd->direct_write = 1;
+
/* environment variables checked in ADIO_Init */
*error_code = MPI_SUCCESS;
Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c 2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_open.c 2010-05-19 20:52:43 UTC (rev 6683)
@@ -5,26 +5,26 @@
* See COPYRIGHT notice in top-level directory.
*/
+#define _GNU_SOURCE // for O_DIRECT
+
#include "ad_xfs.h"
+#include <sys/ioctl.h>
#ifdef HAVE_STDDEF_H
#include <stddef.h>
#endif
-#if defined(MPISGI)
-#include <mpitypedefs.h>
-#include <mpifunctions.h>
-#endif
-
#ifndef HAVE_LSEEK64
#define lseek64 lseek
#endif
void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
{
- int perm, amode, amode_direct;
+ int perm, amode, amode_direct, factor;
unsigned int old_mask;
struct dioattr st;
static char myname[] = "ADIOI_XFS_OPEN";
+ unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
+ unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
@@ -53,7 +53,7 @@
fd->fd_direct = open(fd->filename, amode_direct, perm);
if (fd->fd_direct != -1) {
-#if defined(LINUX) && defined(MPISGI)
+#if defined(MPISGI)
ioctl(fd->fd_direct, XFS_IOC_DIOINFO, &st);
#else
fcntl(fd->fd_direct, F_DIOINFO, &st);
@@ -61,8 +61,35 @@
fd->d_mem = st.d_mem;
fd->d_miniosz = st.d_miniosz;
- fd->d_maxiosz = st.d_maxiosz;
+ if (read_chunk_sz == 0) {
+ fd->hints->fs_hints.xfs.read_chunk_sz = st.d_maxiosz;
+ } else {
+ /*
+ * MPIO_DIRECT_READ_CHUNK_SIZE was set.
+ * Make read_chunk_sz a multiple of d_miniosz.
+ */
+ factor = read_chunk_sz / fd->d_miniosz;
+ if (factor == 0 || read_chunk_sz != fd->d_miniosz * factor) {
+ fd->hints->fs_hints.xfs.read_chunk_sz =
+ fd->d_miniosz * (factor + 1);
+ }
+ }
+
+ if (write_chunk_sz == 0) {
+ fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz;
+ } else {
+ /*
+ * MPIO_DIRECT_WRITE_CHUNK_SIZE was set.
+ * Make write_chunk_sz a multiple of d_miniosz.
+ */
+ factor = write_chunk_sz / fd->d_miniosz;
+ if (factor == 0 || write_chunk_sz != fd->d_miniosz * factor) {
+ fd->hints->fs_hints.xfs.write_chunk_sz =
+ fd->d_miniosz * (factor + 1);
+ }
+ }
+
if (fd->d_mem > XFS_MEMALIGN) {
FPRINTF(stderr, "MPI: Run-time Direct-IO memory alignment, %d, does not match compile-time value, %d.\n",
fd->d_mem, XFS_MEMALIGN);
Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c 2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_read.c 2010-05-19 20:52:43 UTC (rev 6683)
@@ -102,6 +102,7 @@
ADIO_Offset offset, int *err)
{
int ntimes, rem, newrem, i, size, nbytes;
+ unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
/* memory buffer is aligned, offset in file is aligned,
io_size may or may not be of the right size.
@@ -109,33 +110,33 @@
use buffered I/O for remaining. */
if (!(len % fd->d_miniosz) &&
- (len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
+ (len >= fd->d_miniosz) && (len <= read_chunk_sz))
*err = pread(fd->fd_direct, buf, len, offset);
else if (len < fd->d_miniosz)
*err = pread(fd->fd_sys, buf, len, offset);
- else if (len > fd->d_maxiosz) {
- ntimes = len/(fd->d_maxiosz);
- rem = len - ntimes * fd->d_maxiosz;
+ else if (len > read_chunk_sz) {
+ ntimes = len/(read_chunk_sz);
+ rem = len - ntimes * read_chunk_sz;
nbytes = 0;
for (i=0; i<ntimes; i++) {
- nbytes += pread(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
- fd->d_maxiosz, offset);
- offset += fd->d_maxiosz;
+ nbytes += pread(fd->fd_direct, ((char *)buf) + i * read_chunk_sz,
+ read_chunk_sz, offset);
+ offset += read_chunk_sz;
}
if (rem) {
if (!(rem % fd->d_miniosz))
nbytes += pread(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
+ ((char *)buf) + ntimes * read_chunk_sz, rem, offset);
else {
newrem = rem % fd->d_miniosz;
size = rem - newrem;
if (size) {
nbytes += pread(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+ ((char *)buf) + ntimes * read_chunk_sz, size, offset);
offset += size;
}
nbytes += pread(fd->fd_sys,
- ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+ ((char *)buf) + ntimes * read_chunk_sz + size, newrem, offset);
}
}
*err = nbytes;
Modified: mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c 2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/ad_xfs/ad_xfs_write.c 2010-05-19 20:52:43 UTC (rev 6683)
@@ -13,14 +13,15 @@
/* style: allow:free:2 sig:0 */
-static void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
- ADIO_Offset offset, int *err);
+static int ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf,
+ ADIO_Offset len, ADIO_Offset offset);
void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
- int err=-1, datatype_size, len, diff, size, nbytes;
+ int err=-1, datatype_size, diff, size;
+ ssize_t len;
void *newbuf;
static char myname[] = "ADIOI_XFS_WRITECONTIG";
@@ -31,44 +32,48 @@
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
- if (!(fd->direct_write)) /* direct I/O not enabled */
+ if (!(fd->direct_write)) { /* direct I/O not enabled */
err = pwrite(fd->fd_sys, buf, len, offset);
- else { /* direct I/O enabled */
+ if (err < 0) {goto leaving;}
+ } else { /* direct I/O enabled */
/* (1) if mem_aligned && file_aligned
use direct I/O to write up to correct io_size
use buffered I/O for remaining */
- if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz))
- ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset, &err);
+ if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) {
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset);
+ if (err < 0) {goto leaving;}
/* (2) if !file_aligned
use buffered I/O to write up to file_aligned
At that point, if still mem_aligned, use (1)
else copy into aligned buf and then use (1) */
- else if (offset % fd->d_miniosz) {
+ } else if (offset % fd->d_miniosz) {
diff = fd->d_miniosz - (offset % fd->d_miniosz);
diff = ADIOI_MIN(diff, len);
- nbytes = pwrite(fd->fd_sys, buf, diff, offset);
+ err = pwrite(fd->fd_sys, buf, diff, offset);
+ if (err < 0) {goto leaving;}
buf = ((char *) buf) + diff;
offset += diff;
size = len - diff;
if (!(((long) buf) % fd->d_mem)) {
- ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset, &err);
- nbytes += err;
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset);
+ if (err < 0) {goto leaving;}
}
else {
newbuf = (void *) memalign(XFS_MEMALIGN, size);
if (newbuf) {
memcpy(newbuf, buf, size);
- ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
- nbytes += err;
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset);
ADIOI_Free(newbuf);
+ if (err < 0) {goto leaving;}
+ } else {
+ err = pwrite(fd->fd_sys, buf, size, offset);
+ if (err < 0) {goto leaving;}
}
- else nbytes += pwrite(fd->fd_sys, buf, size, offset);
}
- err = nbytes;
}
/* (3) if !mem_aligned && file_aligned
@@ -77,19 +82,22 @@
newbuf = (void *) memalign(XFS_MEMALIGN, len);
if (newbuf) {
memcpy(newbuf, buf, len);
- ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset, &err);
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset);
ADIOI_Free(newbuf);
+ } else {
+ err = pwrite(fd->fd_sys, buf, len, offset);
}
- else err = pwrite(fd->fd_sys, buf, len, offset);
+
+ if (err < 0) {goto leaving;}
}
}
- if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += err;
+ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
#ifdef HAVE_STATUS_SET_BYTES
- if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
+ if (err != -1) MPIR_Status_set_bytes(status, datatype, len);
#endif
-
+leaving:
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO, "**io",
@@ -99,10 +107,13 @@
}
-void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
- ADIO_Offset offset, int *err)
+static int
+ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len,
+ ADIO_Offset offset)
{
- int ntimes, rem, newrem, i, size, nbytes;
+ unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
+ ADIO_Offset nbytes, rem, newrem, size;
+ int ntimes, i;
/* memory buffer is aligned, offset in file is aligned,
io_size may or may not be of the right size.
@@ -110,42 +121,50 @@
use buffered I/O for remaining. */
if (!(len % fd->d_miniosz) &&
- (len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
- *err = pwrite(fd->fd_direct, buf, len, offset);
- else if (len < fd->d_miniosz)
- *err = pwrite(fd->fd_sys, buf, len, offset);
- else if (len > fd->d_maxiosz) {
- ntimes = len/(fd->d_maxiosz);
- rem = len - ntimes * fd->d_maxiosz;
+ (len >= fd->d_miniosz) && (len <= write_chunk_sz)) {
+ nbytes = pwrite(fd->fd_direct, buf, len, offset);
+ if (nbytes < 0) {return -1;}
+ } else if (len < fd->d_miniosz) {
+ nbytes = pwrite(fd->fd_sys, buf, len, offset);
+ if (nbytes < 0) {return -1;}
+ } else if (len > write_chunk_sz) {
+ ntimes = len/(write_chunk_sz);
+ rem = len - ntimes * write_chunk_sz;
nbytes = 0;
for (i=0; i<ntimes; i++) {
- nbytes += pwrite(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
- fd->d_maxiosz, offset);
- offset += fd->d_maxiosz;
+ nbytes = pwrite(fd->fd_direct, ((char *)buf) + i * write_chunk_sz,
+ write_chunk_sz, offset);
+ offset += write_chunk_sz;
+ if (nbytes < 0) {return -1;}
}
if (rem) {
- if (!(rem % fd->d_miniosz))
- nbytes += pwrite(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
- else {
+ if (!(rem % fd->d_miniosz)) {
+ nbytes = pwrite(fd->fd_direct,
+ ((char *)buf) + ntimes * write_chunk_sz, rem, offset);
+ if (nbytes < 0) {return -1;}
+ } else {
newrem = rem % fd->d_miniosz;
size = rem - newrem;
if (size) {
- nbytes += pwrite(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+ nbytes = pwrite(fd->fd_direct,
+ ((char *)buf) + ntimes * write_chunk_sz, size, offset);
offset += size;
+ if (nbytes < 0) {return -1;}
}
- nbytes += pwrite(fd->fd_sys,
- ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+ nbytes = pwrite(fd->fd_sys,
+ ((char *)buf) + ntimes * write_chunk_sz + size, newrem, offset);
+ if (nbytes < 0) {return -1;}
}
}
- *err = nbytes;
}
else {
rem = len % fd->d_miniosz;
size = len - rem;
nbytes = pwrite(fd->fd_direct, buf, size, offset);
- nbytes += pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
- *err = nbytes;
+ if (nbytes < 0) {return -1;}
+ nbytes = pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
+ if (nbytes < 0) {return -1;}
}
+
+ return 0;
}
Modified: mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c 2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/common/ad_iwrite.c 2010-05-19 20:52:43 UTC (rev 6683)
@@ -86,10 +86,24 @@
int error_code;
struct aiocb *aiocbp;
ADIOI_AIO_Request *aio_req;
+#if defined(ROMIO_XFS)
+ unsigned maxiosz = wr ? fd->hints->fs_hints.xfs.write_chunk_sz :
+ fd->hints->fs_hints.xfs.read_chunk_sz;
+#endif /* ROMIO_XFS */
-
fd_sys = fd->fd_sys;
+#if defined(ROMIO_XFS)
+ /* Use Direct I/O if desired and properly aligned */
+ if (fd->fns == &ADIO_XFS_operations &&
+ ((wr && fd->direct_write) || (!wr && fd->direct_read)) &&
+ !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) &&
+ !(len % fd->d_miniosz) && (len >= fd->d_miniosz) &&
+ (len <= maxiosz)) {
+ fd_sys = fd->fd_direct;
+ }
+#endif /* ROMIO_XFS */
+
aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
aiocbp->aio_offset = offset;
Modified: mpich2/trunk/src/mpi/romio/adio/include/adio.h
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/include/adio.h 2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/include/adio.h 2010-05-19 20:52:43 UTC (rev 6683)
@@ -195,7 +195,6 @@
unsigned d_mem; /* data buffer memory alignment */
unsigned d_miniosz; /* min xfer size, xfer size multiple,
and file seek offset alignment */
- unsigned d_maxiosz; /* max xfer size */
ADIO_Offset fp_ind; /* individual file pointer in MPI-IO (in bytes)*/
ADIO_Offset fp_sys_posn; /* current location of the system file-pointer
in bytes */
Modified: mpich2/trunk/src/mpi/romio/adio/include/adioi.h
===================================================================
--- mpich2/trunk/src/mpi/romio/adio/include/adioi.h 2010-05-19 20:48:41 UTC (rev 6682)
+++ mpich2/trunk/src/mpi/romio/adio/include/adioi.h 2010-05-19 20:52:43 UTC (rev 6683)
@@ -70,6 +70,10 @@
int coll_threshold;
int ds_in_coll;
} lustre;
+ struct {
+ unsigned read_chunk_sz; /* chunk size for direct reads */
+ unsigned write_chunk_sz; /* chunk size for direct writes */
+ } xfs;
} fs_hints;
};
More information about the mpich2-commits
mailing list