IOR
aiori-MPIIO.c
Go to the documentation of this file.
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  */
4 /******************************************************************************\
5 * *
6 * Copyright (c) 2003, The Regents of the University of California *
7 * See the file COPYRIGHT for a complete copyright notice and license. *
8 * *
9 ********************************************************************************
10 *
11 * Implement abstract I/O interface for MPIIO.
12 *
13 \******************************************************************************/
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/stat.h>
22 
23 #include "ior.h"
24 #include "iordef.h"
25 #include "aiori.h"
26 #include "utilities.h"
27 
28 #ifndef MPIAPI
29 #define MPIAPI /* defined as __stdcall on Windows */
30 #endif
31 
32 /**************************** P R O T O T Y P E S *****************************/
33 
34 static IOR_offset_t SeekOffset(MPI_File, IOR_offset_t, IOR_param_t *);
35 
36 static void *MPIIO_Create(char *, IOR_param_t *);
37 static void *MPIIO_Open(char *, IOR_param_t *);
38 static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *,
40 static void MPIIO_Close(void *, IOR_param_t *);
41 static char* MPIIO_GetVersion();
42 static void MPIIO_Fsync(void *, IOR_param_t *);
43 
44 
45 /************************** D E C L A R A T I O N S ***************************/
46 
48  .name = "MPIIO",
49  .name_legacy = NULL,
50  .create = MPIIO_Create,
51  .open = MPIIO_Open,
52  .xfer = MPIIO_Xfer,
53  .close = MPIIO_Close,
54  .delete = MPIIO_Delete,
55  .get_version = MPIIO_GetVersion,
56  .fsync = MPIIO_Fsync,
57  .get_file_size = MPIIO_GetFileSize,
58  .statfs = aiori_posix_statfs,
59  .mkdir = aiori_posix_mkdir,
60  .rmdir = aiori_posix_rmdir,
61  .access = MPIIO_Access,
62  .stat = aiori_posix_stat,
63 };
64 
65 /***************************** F U N C T I O N S ******************************/
66 
67 /*
68  * Try to access a file through the MPIIO interface.
69  */
70 int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
71 {
72  if(param->dryRun){
73  return MPI_SUCCESS;
74  }
75  MPI_File fd;
76  int mpi_mode = MPI_MODE_UNIQUE_OPEN;
77  MPI_Info mpiHints = MPI_INFO_NULL;
78 
79  if ((mode & W_OK) && (mode & R_OK))
80  mpi_mode |= MPI_MODE_RDWR;
81  else if (mode & W_OK)
82  mpi_mode |= MPI_MODE_WRONLY;
83  else
84  mpi_mode |= MPI_MODE_RDONLY;
85 
86  SetHints(&mpiHints, param->hintsFileName);
87 
88  int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, mpiHints, &fd);
89 
90  if (!ret)
91  MPI_File_close(&fd);
92 
93  if (mpiHints != MPI_INFO_NULL)
94  MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed");
95  return ret;
96 }
97 
98 /*
99  * Create and open a file through the MPIIO interface.
100  */
101 static void *MPIIO_Create(char *testFileName, IOR_param_t * param)
102 {
103  if(param->dryRun){
104  return 0;
105  }
106  return MPIIO_Open(testFileName, param);
107 }
108 
109 /*
110  * Open a file through the MPIIO interface. Setup file view.
111  */
112 static void *MPIIO_Open(char *testFileName, IOR_param_t * param)
113 {
114  int fd_mode = (int)0,
115  offsetFactor,
116  tasksPerFile,
117  transfersPerBlock = param->blockSize / param->transferSize;
118  struct fileTypeStruct {
119  int globalSizes[2], localSizes[2], startIndices[2];
120  } fileTypeStruct;
121  MPI_File *fd;
122  MPI_Comm comm;
123  MPI_Info mpiHints = MPI_INFO_NULL;
124 
125  fd = (MPI_File *) malloc(sizeof(MPI_File));
126  if (fd == NULL)
127  ERR("malloc failed()");
128 
129  *fd = 0;
130 
131  /* set IOR file flags to MPIIO flags */
132  /* -- file open flags -- */
133  if (param->openFlags & IOR_RDONLY) {
134  fd_mode |= MPI_MODE_RDONLY;
135  }
136  if (param->openFlags & IOR_WRONLY) {
137  fd_mode |= MPI_MODE_WRONLY;
138  }
139  if (param->openFlags & IOR_RDWR) {
140  fd_mode |= MPI_MODE_RDWR;
141  }
142  if (param->openFlags & IOR_APPEND) {
143  fd_mode |= MPI_MODE_APPEND;
144  }
145  if (param->openFlags & IOR_CREAT) {
146  fd_mode |= MPI_MODE_CREATE;
147  }
148  if (param->openFlags & IOR_EXCL) {
149  fd_mode |= MPI_MODE_EXCL;
150  }
151  if (param->openFlags & IOR_TRUNC) {
152  fprintf(stdout, "File truncation not implemented in MPIIO\n");
153  }
154  if (param->openFlags & IOR_DIRECT) {
155  fprintf(stdout, "O_DIRECT not implemented in MPIIO\n");
156  }
157 
158  /*
159  * MPI_MODE_UNIQUE_OPEN mode optimization eliminates the overhead of file
160  * locking. Only open a file in this mode when the file will not be con-
161  * currently opened elsewhere, either inside or outside the MPI environment.
162  */
163  fd_mode |= MPI_MODE_UNIQUE_OPEN;
164 
165  if (param->filePerProc) {
166  comm = MPI_COMM_SELF;
167  } else {
168  comm = testComm;
169  }
170 
171  SetHints(&mpiHints, param->hintsFileName);
172  /*
173  * note that with MP_HINTS_FILTERED=no, all key/value pairs will
174  * be in the info object. The info object that is attached to
175  * the file during MPI_File_open() will only contain those pairs
176  * deemed valid by the implementation.
177  */
178  /* show hints passed to file */
179  if (rank == 0 && param->showHints) {
180  fprintf(stdout, "\nhints passed to MPI_File_open() {\n");
181  ShowHints(&mpiHints);
182  fprintf(stdout, "}\n");
183  }
184  if(! param->dryRun){
185  MPI_CHECKF(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd),
186  "cannot open file: %s", testFileName);
187  }
188 
189  /* show hints actually attached to file handle */
190  if (rank == 0 && param->showHints && ! param->dryRun) {
191  if (mpiHints != MPI_INFO_NULL)
192  MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed");
193  MPI_CHECK(MPI_File_get_info(*fd, &mpiHints),
194  "cannot get file info");
195  fprintf(stdout, "\nhints returned from opened file {\n");
196  ShowHints(&mpiHints);
197  fprintf(stdout, "}\n");
198  }
199 
200  /* preallocate space for file */
201  if (param->preallocate && param->open == WRITE && ! param->dryRun) {
202  MPI_CHECK(MPI_File_preallocate(*fd,
203  (MPI_Offset) (param->segmentCount
204  *
205  param->blockSize *
206  param->numTasks)),
207  "cannot preallocate file");
208  }
209  /* create file view */
210  if (param->useFileView) {
211  /* create contiguous transfer datatype */
212  MPI_CHECK(MPI_Type_contiguous
213  (param->transferSize / sizeof(IOR_size_t),
214  MPI_LONG_LONG_INT, &param->transferType),
215  "cannot create contiguous datatype");
216  MPI_CHECK(MPI_Type_commit(&param->transferType),
217  "cannot commit datatype");
218  if (param->filePerProc) {
219  offsetFactor = 0;
220  tasksPerFile = 1;
221  } else {
222  offsetFactor = (rank + rankOffset) % param->numTasks;
223  tasksPerFile = param->numTasks;
224  }
225 
226  /*
227  * create file type using subarray
228  */
229  fileTypeStruct.globalSizes[0] = 1;
230  fileTypeStruct.globalSizes[1] =
231  transfersPerBlock * tasksPerFile;
232  fileTypeStruct.localSizes[0] = 1;
233  fileTypeStruct.localSizes[1] = transfersPerBlock;
234  fileTypeStruct.startIndices[0] = 0;
235  fileTypeStruct.startIndices[1] =
236  transfersPerBlock * offsetFactor;
237 
238  MPI_CHECK(MPI_Type_create_subarray
239  (2, fileTypeStruct.globalSizes,
240  fileTypeStruct.localSizes,
241  fileTypeStruct.startIndices, MPI_ORDER_C,
243  "cannot create subarray");
244  MPI_CHECK(MPI_Type_commit(&param->fileType),
245  "cannot commit datatype");
246 
247  if(! param->dryRun){
248  MPI_CHECK(MPI_File_set_view(*fd, (MPI_Offset) 0,
250  param->fileType, "native",
251  (MPI_Info) MPI_INFO_NULL),
252  "cannot set file view");
253  }
254  }
255  if (mpiHints != MPI_INFO_NULL)
256  MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed");
257  return ((void *)fd);
258 }
259 
260 /*
261  * Write or read access to file using the MPIIO interface.
262  */
263 static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer,
264  IOR_offset_t length, IOR_param_t * param)
265 {
266  /* NOTE: The second arg is (void *) for reads, and (const void *)
267  for writes. Therefore, one of the two sets of assignments below
268  will get "assignment from incompatible pointer-type" warnings,
269  if we only use this one set of signatures. */
270 
271  if(param->dryRun)
272  return length;
273 
274  int (MPIAPI * Access) (MPI_File, void *, int,
275  MPI_Datatype, MPI_Status *);
276  int (MPIAPI * Access_at) (MPI_File, MPI_Offset, void *, int,
277  MPI_Datatype, MPI_Status *);
278  int (MPIAPI * Access_all) (MPI_File, void *, int,
279  MPI_Datatype, MPI_Status *);
280  int (MPIAPI * Access_at_all) (MPI_File, MPI_Offset, void *, int,
281  MPI_Datatype, MPI_Status *);
282  /*
283  * this needs to be properly implemented:
284  *
285  * int (*Access_ordered)(MPI_File, void *, int,
286  * MPI_Datatype, MPI_Status *);
287  */
288  MPI_Status status;
289 
290  /* point functions to appropriate MPIIO calls */
291  if (access == WRITE) { /* WRITE */
292  Access = (int (MPIAPI *)(MPI_File, void *, int,
293  MPI_Datatype, MPI_Status *)) MPI_File_write;
294  Access_at = (int (MPIAPI *)(MPI_File, MPI_Offset, void *, int,
295  MPI_Datatype, MPI_Status *)) MPI_File_write_at;
296  Access_all = (int (MPIAPI *) (MPI_File, void *, int,
297  MPI_Datatype, MPI_Status *)) MPI_File_write_all;
298  Access_at_all = (int (MPIAPI *) (MPI_File, MPI_Offset, void *, int,
299  MPI_Datatype, MPI_Status *)) MPI_File_write_at_all;
300  /*
301  * this needs to be properly implemented:
302  *
303  * Access_ordered = MPI_File_write_ordered;
304  */
305  } else { /* READ or CHECK */
306  Access = MPI_File_read;
307  Access_at = MPI_File_read_at;
308  Access_all = MPI_File_read_all;
309  Access_at_all = MPI_File_read_at_all;
310  /*
311  * this needs to be properly implemented:
312  *
313  * Access_ordered = MPI_File_read_ordered;
314  */
315  }
316 
317  /*
318  * 'useFileView' uses derived datatypes and individual file pointers
319  */
320  if (param->useFileView) {
321  /* find offset in file */
322  if (SeekOffset(*(MPI_File *) fd, param->offset, param) <
323  0) {
324  /* if unsuccessful */
325  length = -1;
326  } else {
327  /*
328  * 'useStridedDatatype' fits multi-strided pattern into a datatype;
329  * must use 'length' to determine repetitions (fix this for
330  * multi-segments someday, WEL):
331  * e.g., 'IOR -s 2 -b 32K -t 32K -a MPIIO -S'
332  */
333  if (param->useStridedDatatype) {
334  length = param->segmentCount;
335  } else {
336  length = 1;
337  }
338  if (param->collective) {
339  /* individual, collective call */
340  MPI_CHECK(Access_all
341  (*(MPI_File *) fd, buffer, length,
342  param->transferType, &status),
343  "cannot access collective");
344  } else {
345  /* individual, noncollective call */
346  MPI_CHECK(Access
347  (*(MPI_File *) fd, buffer, length,
348  param->transferType, &status),
349  "cannot access noncollective");
350  }
351  length *= param->transferSize; /* for return value in bytes */
352  }
353  } else {
354  /*
355  * !useFileView does not use derived datatypes, but it uses either
356  * shared or explicit file pointers
357  */
358  if (param->useSharedFilePointer) {
359  /* find offset in file */
360  if (SeekOffset
361  (*(MPI_File *) fd, param->offset, param) < 0) {
362  /* if unsuccessful */
363  length = -1;
364  } else {
365  /* shared, collective call */
366  /*
367  * this needs to be properly implemented:
368  *
369  * MPI_CHECK(Access_ordered(fd.MPIIO, buffer, length,
370  * MPI_BYTE, &status),
371  * "cannot access shared, collective");
372  */
373  fprintf(stdout,
374  "useSharedFilePointer not implemented\n");
375  }
376  } else {
377  if (param->collective) {
378  /* explicit, collective call */
379  MPI_CHECK(Access_at_all
380  (*(MPI_File *) fd, param->offset,
381  buffer, length, MPI_BYTE, &status),
382  "cannot access explicit, collective");
383  } else {
384  /* explicit, noncollective call */
385  MPI_CHECK(Access_at
386  (*(MPI_File *) fd, param->offset,
387  buffer, length, MPI_BYTE, &status),
388  "cannot access explicit, noncollective");
389  }
390  }
391  }
392  if((access == WRITE) && (param->fsyncPerWrite == TRUE))
393  MPIIO_Fsync(fd, param);
394  return (length);
395 }
396 
397 /*
398  * Perform fsync().
399  */
400 static void MPIIO_Fsync(void *fdp, IOR_param_t * param)
401 {
402  if(param->dryRun)
403  return;
404  if (MPI_File_sync(*(MPI_File *)fdp) != MPI_SUCCESS)
405  EWARN("fsync() failed");
406 }
407 
408 /*
409  * Close a file through the MPIIO interface.
410  */
411 static void MPIIO_Close(void *fd, IOR_param_t * param)
412 {
413  if(! param->dryRun){
414  MPI_CHECK(MPI_File_close((MPI_File *) fd), "cannot close file");
415  }
416  if ((param->useFileView == TRUE) && (param->fd_fppReadCheck == NULL)) {
417  /*
418  * need to free the datatype, so done in the close process
419  */
420  MPI_CHECK(MPI_Type_free(&param->fileType),
421  "cannot free MPI file datatype");
422  MPI_CHECK(MPI_Type_free(&param->transferType),
423  "cannot free MPI transfer datatype");
424  }
425  free(fd);
426 }
427 
428 /*
429  * Delete a file through the MPIIO interface.
430  */
431 void MPIIO_Delete(char *testFileName, IOR_param_t * param)
432 {
433  if(param->dryRun)
434  return;
435  MPI_CHECKF(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL),
436  "cannot delete file: %s", testFileName);
437 }
438 
439 /*
440  * Determine api version.
441  */
442 static char* MPIIO_GetVersion()
443 {
444  static char ver[1024] = {};
445  int version, subversion;
446  MPI_CHECK(MPI_Get_version(&version, &subversion), "cannot get MPI version");
447  sprintf(ver, "(%d.%d)", version, subversion);
448  return ver;
449 }
450 
451 /*
452  * Seek to offset in file using the MPIIO interface.
453  */
455  IOR_param_t * param)
456 {
457  int offsetFactor, tasksPerFile;
458  IOR_offset_t tempOffset;
459 
460  tempOffset = offset;
461 
462  if (param->filePerProc) {
463  offsetFactor = 0;
464  tasksPerFile = 1;
465  } else {
466  offsetFactor = (rank + rankOffset) % param->numTasks;
467  tasksPerFile = param->numTasks;
468  }
469  if (param->useFileView) {
470  /* recall that offsets in a file view are
471  counted in units of transfer size */
472  if (param->filePerProc) {
473  tempOffset = tempOffset / param->transferSize;
474  } else {
475  /*
476  * this formula finds a file view offset for a task
477  * from an absolute offset
478  */
479  tempOffset = ((param->blockSize / param->transferSize)
480  * (tempOffset /
481  (param->blockSize * tasksPerFile)))
482  + (((tempOffset % (param->blockSize * tasksPerFile))
483  - (offsetFactor * param->blockSize))
484  / param->transferSize);
485  }
486  }
487  MPI_CHECK(MPI_File_seek(fd, tempOffset, MPI_SEEK_SET),
488  "cannot seek offset");
489  return (offset);
490 }
491 
492 /*
493  * Use MPI_File_get_size() to return aggregate file size.
494  * NOTE: This function is used by the HDF5 and NCMPI backends.
495  */
497  char *testFileName)
498 {
499  if(test->dryRun)
500  return 0;
501  IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
502  MPI_File fd;
503  MPI_Comm comm;
504  MPI_Info mpiHints = MPI_INFO_NULL;
505 
506  if (test->filePerProc == TRUE) {
507  comm = MPI_COMM_SELF;
508  } else {
509  comm = testComm;
510  }
511 
512  SetHints(&mpiHints, test->hintsFileName);
513  MPI_CHECK(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY,
514  mpiHints, &fd),
515  "cannot open file to get file size");
516  MPI_CHECK(MPI_File_get_size(fd, (MPI_Offset *) & aggFileSizeFromStat),
517  "cannot get file size");
518  MPI_CHECK(MPI_File_close(&fd), "cannot close file");
519  if (mpiHints != MPI_INFO_NULL)
520  MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed");
521 
522  if (test->filePerProc == TRUE) {
523  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
524  MPI_LONG_LONG_INT, MPI_SUM, testComm),
525  "cannot total data moved");
526  aggFileSizeFromStat = tmpSum;
527  } else {
528  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
529  MPI_LONG_LONG_INT, MPI_MIN, testComm),
530  "cannot total data moved");
531  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
532  MPI_LONG_LONG_INT, MPI_MAX, testComm),
533  "cannot total data moved");
534  if (tmpMin != tmpMax) {
535  if (rank == 0) {
536  WARN("inconsistent file size by different tasks");
537  }
538  /* incorrect, but now consistent across tasks */
539  aggFileSizeFromStat = tmpMin;
540  }
541  }
542 
543  return (aggFileSizeFromStat);
544 }
int showHints
Definition: ior.h:132
MPI_Datatype fileType
Definition: ior.h:168
#define ERR(MSG)
Definition: iordef.h:184
void ShowHints(MPI_Info *mpiHints)
Definition: utilities.c:515
int filePerProc
Definition: ior.h:111
IOR_offset_t segmentCount
Definition: ior.h:123
int useStridedDatatype
Definition: ior.h:131
int useSharedFilePointer
Definition: ior.h:130
IOR_offset_t transferSize
Definition: ior.h:125
int aiori_posix_rmdir(const char *path, IOR_param_t *param)
Definition: aiori.c:185
MPI_Datatype transferType
Definition: ior.h:167
#define IOR_APPEND
Definition: aiori.h:37
int aiori_posix_mkdir(const char *path, mode_t mode, IOR_param_t *param)
Definition: aiori.c:180
#define IOR_RDONLY
Definition: aiori.h:34
unsigned int openFlags
Definition: ior.h:88
int fsyncPerWrite
Definition: ior.h:162
#define MPI_CHECKF(MPI_STATUS, FORMAT,...)
Definition: iordef.h:204
#define WRITE
Definition: iordef.h:95
#define EWARN(MSG)
Definition: iordef.h:169
int aiori_posix_statfs(const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t *param)
Definition: aiori.c:155
#define IOR_CREAT
Definition: aiori.h:38
static void * MPIIO_Create(char *, IOR_param_t *)
Definition: aiori-MPIIO.c:101
#define IOR_EXCL
Definition: aiori.h:40
char * hintsFileName
Definition: ior.h:95
MPI_Comm testComm
Definition: utilities.c:60
#define IOR_TRUNC
Definition: aiori.h:39
#define MPI_CHECK(MPI_STATUS, MSG)
Definition: iordef.h:224
static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *)
Definition: aiori-MPIIO.c:263
int dryRun
Definition: ior.h:98
Definition: ior.h:48
void * fd_fppReadCheck
Definition: ior.h:147
void MPIIO_Delete(char *testFileName, IOR_param_t *param)
Definition: aiori-MPIIO.c:431
static IOR_param_t param
Definition: mdtest.c:170
static IOR_offset_t SeekOffset(MPI_File, IOR_offset_t, IOR_param_t *)
Definition: aiori-MPIIO.c:454
int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
Definition: aiori-MPIIO.c:70
#define IOR_WRONLY
Definition: aiori.h:35
static char * MPIIO_GetVersion()
Definition: aiori-MPIIO.c:442
int rankOffset
Definition: utilities.c:58
int useFileView
Definition: ior.h:129
static void MPIIO_Fsync(void *, IOR_param_t *)
Definition: aiori-MPIIO.c:400
long long int IOR_size_t
Definition: iordef.h:123
#define WARN(MSG)
Definition: iordef.h:144
static void MPIIO_Close(void *, IOR_param_t *)
Definition: aiori-MPIIO.c:411
int numTasks
Definition: ior.h:99
static void * MPIIO_Open(char *, IOR_param_t *)
Definition: aiori-MPIIO.c:112
ior_aiori_t mpiio_aiori
Definition: aiori-MPIIO.c:47
int collective
Definition: ior.h:122
IOR_offset_t offset
Definition: ior.h:126
int open
Definition: ior.h:108
int aiori_posix_stat(const char *path, struct stat *buf, IOR_param_t *param)
Definition: aiori.c:195
void SetHints(MPI_Info *mpiHints, char *hintsFileName)
Definition: utilities.c:454
#define IOR_RDWR
Definition: aiori.h:36
int preallocate
Definition: ior.h:128
#define MPIAPI
Definition: aiori-MPIIO.c:29
char * name
Definition: aiori.h:68
long long int IOR_offset_t
Definition: iordef.h:122
int rank
Definition: utilities.c:57
IOR_offset_t blockSize
Definition: ior.h:124
#define TRUE
Definition: iordef.h:75
#define IOR_DIRECT
Definition: aiori.h:41
IOR_offset_t MPIIO_GetFileSize(IOR_param_t *test, MPI_Comm testComm, char *testFileName)
Definition: aiori-MPIIO.c:496
#define NULL
Definition: iordef.h:79