IOR
mdtest.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2003, The Regents of the University of California.
3  * Produced at the Lawrence Livermore National Laboratory.
4  * Written by Christopher J. Morrone <morrone@llnl.gov>,
5  * Bill Loewe <loewe@loewe.net>, Tyce McLarty <mclarty@llnl.gov>,
6  * and Ryan Kroiss <rrkroiss@lanl.gov>.
7  * All rights reserved.
8  * UCRL-CODE-155800
9  *
10  * Please read the COPYRIGHT file.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License (as published by
14  * the Free Software Foundation) version 2, dated June 1991.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * terms and conditions of the GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  *
25  * CVS info:
26  * $RCSfile: mdtest.c,v $
27  * $Revision: 1.4 $
28  * $Date: 2013/11/27 17:05:31 $
29  * $Author: brettkettering $
30  */
31 #include <limits.h>
32 #include <math.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdbool.h>
36 #include <inttypes.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <stdarg.h>
40 
41 #include "option.h"
42 #include "utilities.h"
43 
44 #if HAVE_SYS_PARAM_H
45 #include <sys/param.h>
46 #endif
47 
48 #if HAVE_SYS_MOUNT_H
49 #include <sys/mount.h>
50 #endif
51 
52 #if HAVE_SYS_STATFS_H
53 #include <sys/statfs.h>
54 #endif
55 
56 #if HAVE_SYS_STATVFS_H
57 #include <sys/statvfs.h>
58 #endif
59 
60 #include <fcntl.h>
61 #include <string.h>
62 
63 #if HAVE_STRINGS_H
64 #include <strings.h>
65 #endif
66 
67 #include <unistd.h>
68 #include <dirent.h>
69 #include <errno.h>
70 #include <time.h>
71 #include <sys/time.h>
72 
73 #include "aiori.h"
74 #include "ior.h"
75 #include "mdtest.h"
76 
77 #include <mpi.h>
78 
79 #pragma GCC diagnostic ignored "-Wformat-overflow"
80 
81 #ifdef HAVE_LUSTRE_LUSTREAPI
82 #include <lustre/lustreapi.h>
83 #endif /* HAVE_LUSTRE_LUSTREAPI */
84 
85 #define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH
86 #define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH
87 #define RELEASE_VERS META_VERSION
88 #define TEST_DIR "test-dir"
89 #define ITEM_COUNT 25000
90 
91 #define LLU "%lu"
92 
93 typedef struct {
94  int size;
95  uint64_t *rand_array;
96  char testdir[MAX_PATHLEN];
97  char testdirpath[MAX_PATHLEN];
98  char base_tree_name[MAX_PATHLEN];
99  char **filenames;
100  char hostname[MAX_PATHLEN];
101  char mk_name[MAX_PATHLEN];
102  char stat_name[MAX_PATHLEN];
103  char read_name[MAX_PATHLEN];
104  char rm_name[MAX_PATHLEN];
105  char unique_mk_dir[MAX_PATHLEN];
106  char unique_chdir_dir[MAX_PATHLEN];
107  char unique_stat_dir[MAX_PATHLEN];
108  char unique_read_dir[MAX_PATHLEN];
109  char unique_rm_dir[MAX_PATHLEN];
110  char unique_rm_uni_dir[MAX_PATHLEN];
114 
115 
116  int barriers;
126  unsigned branch_factor;
127  int depth;
128  int random_buffer_offset; /* user settable value, otherwise random */
129 
130  /*
131  * This is likely a small value, but it's sometimes computed by
132  * branch_factor^(depth+1), so we'll make it a larger variable,
133  * just in case.
134  */
136  /*
137  * As we start moving towards Exascale, we could have billions
138  * of files in a directory. Make room for that possibility with
139  * a larger variable.
140  */
141  uint64_t items;
142  uint64_t items_per_dir;
143  uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */
156  size_t write_bytes;
158  size_t read_bytes;
162  int nstride; /* neighbor stride */
164  #ifdef HAVE_LUSTRE_LUSTREAPI
165  int global_dir_layout;
166  #endif /* HAVE_LUSTRE_LUSTREAPI */
167  char * saveRankDetailsCSV; /* save the details about the performance to a file */
168 
170  pid_t pid;
171  uid_t uid;
172 
173  /* Use the POSIX backend by default */
177  char * api;
179 
181 
182 
183 /* This structure describes the processing status for stonewalling */
184 typedef struct{
185  double start_time;
186 
188 
189  uint64_t items_start;
190  uint64_t items_done;
191 
192  uint64_t items_per_dir;
194 
195 #define CHECK_STONE_WALL(p) (((p)->stone_wall_timer_seconds != 0) && ((GetTimeStamp() - (p)->start_time) > (p)->stone_wall_timer_seconds))
196 
197 /* for making/removing unique directory && stating/deleting subdirectory */
199 
200 /* a helper function for passing debug and verbose messages.
201  use the MACRO as it will insert __LINE__ for you.
202  Pass the verbose level for root to print, then the verbose level for anyone to print.
203  Pass -1 to suppress the print for anyone.
204  Then do the standard printf stuff. This function adds the newline for you.
205 */
206 #define VERBOSE(root,any,...) VerboseMessage(root,any,__LINE__,__VA_ARGS__)
207 void VerboseMessage (int root_level, int any_level, int line, char * format, ...) {
208  if ((rank==0 && verbose >= root_level) || (any_level > 0 && verbose >= any_level)) {
209  char buffer[1024];
210  va_list args;
211  va_start (args, format);
212  vsnprintf (buffer, 1024, format, args);
213  va_end (args);
214  if (root_level == 0 && any_level == -1) {
215  /* No header when it is just the standard output */
216  fprintf( out_logfile, "%s\n", buffer );
217  } else {
218  /* add a header when the verbose is greater than 0 */
219  fprintf( out_logfile, "V-%d: Rank %3d Line %5d %s\n", root_level, rank, line, buffer );
220  }
221  fflush(out_logfile);
222  }
223 }
224 
225 void offset_timers(double * t, int tcount) {
226  double toffset;
227  int i;
228 
229  VERBOSE(1,-1,"V-1: Entering offset_timers..." );
230 
231  toffset = GetTimeStamp() - t[tcount];
232  for (i = 0; i < tcount+1; i++) {
233  t[i] += toffset;
234  }
235 }
236 
237 void parse_dirpath(char *dirpath_arg) {
238  char * tmp, * token;
239  char delimiter_string[3] = { '@', '\n', '\0' };
240  int i = 0;
241 
242 
243  VERBOSE(1,-1, "Entering parse_dirpath on %s...", dirpath_arg );
244 
245  tmp = dirpath_arg;
246 
247  if (* tmp != '\0') o.path_count++;
248  while (* tmp != '\0') {
249  if (* tmp == '@') {
250  o.path_count++;
251  }
252  tmp++;
253  }
254  // prevent changes to the original dirpath_arg
255  dirpath_arg = strdup(dirpath_arg);
256  o.filenames = (char **) safeMalloc(o.path_count * sizeof(char **));
257 
258  token = strtok(dirpath_arg, delimiter_string);
259  while (token != NULL) {
260  o.filenames[i] = token;
261  token = strtok(NULL, delimiter_string);
262  i++;
263  }
264 }
265 
266 static void prep_testdir(int j, int dir_iter){
267  int pos = sprintf(o.testdir, "%s", o.testdirpath);
268  if ( o.testdir[strlen( o.testdir ) - 1] != '/' ) {
269  pos += sprintf(& o.testdir[pos], "/");
270  }
271  pos += sprintf(& o.testdir[pos], "%s", TEST_DIR);
272  pos += sprintf(& o.testdir[pos], ".%d-%d", j, dir_iter);
273 }
274 
275 static void phase_end(){
276  if (o.call_sync){
277  if(! o.backend->sync){
278  FAIL("Error, backend does not provide the sync method, but you requested to use sync.\n");
279  }
281  }
282 
283  if (o.barriers) {
284  MPI_Barrier(testComm);
285  }
286 }
287 
288 /*
289  * This function copies the unique directory name for a given option to
290  * the "to" parameter. Some memory must be allocated to the "to" parameter.
291  */
292 
293 void unique_dir_access(int opt, char *to) {
294  if (opt == MK_UNI_DIR) {
295  MPI_Barrier(testComm);
296  sprintf( to, "%s/%s", o.testdir, o.unique_chdir_dir );
297  } else if (opt == STAT_SUB_DIR) {
298  sprintf( to, "%s/%s", o.testdir, o.unique_stat_dir );
299  } else if (opt == READ_SUB_DIR) {
300  sprintf( to, "%s/%s", o.testdir, o.unique_read_dir );
301  } else if (opt == RM_SUB_DIR) {
302  sprintf( to, "%s/%s", o.testdir, o.unique_rm_dir );
303  } else if (opt == RM_UNI_DIR) {
304  sprintf( to, "%s/%s", o.testdir, o.unique_rm_uni_dir );
305  }
306  VERBOSE(1,-1,"Entering unique_dir_access, set it to %s", to );
307 }
308 
309 static void create_remove_dirs (const char *path, bool create, uint64_t itemNum) {
310  char curr_item[MAX_PATHLEN];
311  const char *operation = create ? "create" : "remove";
312 
313  if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) {
314  VERBOSE(3,5,"dir: "LLU"", operation, itemNum);
315  }
316 
317  //create dirs
318  sprintf(curr_item, "%s/dir.%s%" PRIu64, path, create ? o.mk_name : o.rm_name, itemNum);
319  VERBOSE(3,5,"create_remove_items_helper (dirs %s): curr_item is '%s'", operation, curr_item);
320 
321  if (create) {
322  if (o.backend->mkdir(curr_item, DIRMODE, o.backend_options) == -1) {
323  EWARNF("unable to create directory %s", curr_item);
324  }
325  } else {
326  if (o.backend->rmdir(curr_item, o.backend_options) == -1) {
327  EWARNF("unable to remove directory %s", curr_item);
328  }
329  }
330 }
331 
332 static void remove_file (const char *path, uint64_t itemNum) {
333  char curr_item[MAX_PATHLEN];
334 
335  if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) {
336  VERBOSE(3,5,"remove file: "LLU"\n", itemNum);
337  }
338 
339  //remove files
340  sprintf(curr_item, "%s/file.%s"LLU"", path, o.rm_name, itemNum);
341  VERBOSE(3,5,"create_remove_items_helper (non-dirs remove): curr_item is '%s'", curr_item);
342  if (!(o.shared_file && rank != 0)) {
343  o.backend->delete (curr_item, o.backend_options);
344  }
345 }
346 
347 
348 static void create_file (const char *path, uint64_t itemNum) {
349  char curr_item[MAX_PATHLEN];
350  aiori_fd_t *aiori_fh = NULL;
351 
352  if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) {
353  VERBOSE(3,5,"create file: "LLU"", itemNum);
354  }
355 
356  //create files
357  sprintf(curr_item, "%s/file.%s"LLU"", path, o.mk_name, itemNum);
358  VERBOSE(3,5,"create_remove_items_helper (non-dirs create): curr_item is '%s'", curr_item);
359 
360  if (o.make_node) {
361  int ret;
362  VERBOSE(3,5,"create_remove_items_helper : mknod..." );
363 
364  ret = o.backend->mknod (curr_item);
365  if (ret != 0)
366  EWARNF("unable to mknode file %s", curr_item);
367 
368  return;
369  } else if (o.collective_creates) {
370  VERBOSE(3,5,"create_remove_items_helper (collective): open..." );
371 
372  aiori_fh = o.backend->open (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options);
373  if (NULL == aiori_fh){
374  EWARNF("unable to open file %s", curr_item);
375  return;
376  }
377 
378  /*
379  * !collective_creates
380  */
381  } else {
382  o.hints.filePerProc = ! o.shared_file;
383  VERBOSE(3,5,"create_remove_items_helper (non-collective, shared): open..." );
384 
385  aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options);
386  if (NULL == aiori_fh){
387  EWARNF("unable to create file %s", curr_item);
388  return;
389  }
390  }
391 
392  if (o.write_bytes > 0) {
393  VERBOSE(3,5,"create_remove_items_helper: write..." );
394 
395  /*
396  * According to Bill Loewe, writes are only done one time, so they are always at
397  * offset 0 (zero).
398  */
401 
402  if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
403  EWARNF("unable to write file %s", curr_item);
404  }
405 
406  if (o.verify_write) {
407  o.write_buffer[0] = 42;
408  if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
409  EWARNF("unable to verify write (read/back) file %s", curr_item);
410  }
412  }
413  }
414 
415  VERBOSE(3,5,"create_remove_items_helper: close..." );
416  o.backend->close (aiori_fh, o.backend_options);
417 }
418 
419 /* helper for creating/removing items */
420 void create_remove_items_helper(const int dirs, const int create, const char *path,
421  uint64_t itemNum, rank_progress_t * progress) {
422 
423  VERBOSE(1,-1,"Entering create_remove_items_helper on %s", path );
424 
425  for (uint64_t i = progress->items_start; i < progress->items_per_dir ; ++i) {
426  if (!dirs) {
427  if (create) {
428  create_file (path, itemNum + i);
429  } else {
430  remove_file (path, itemNum + i);
431  }
432  } else {
433  create_remove_dirs (path, create, itemNum + i);
434  }
435  if(CHECK_STONE_WALL(progress)){
436  if(progress->items_done == 0){
437  progress->items_done = i + 1;
438  }
439  return;
440  }
441  }
442  progress->items_done = progress->items_per_dir;
443 }
444 
445 /* helper function to do collective operations */
446 void collective_helper(const int dirs, const int create, const char* path, uint64_t itemNum, rank_progress_t * progress) {
447  char curr_item[MAX_PATHLEN];
448 
449  VERBOSE(1,-1,"Entering collective_helper on %s", path );
450  for (uint64_t i = progress->items_start ; i < progress->items_per_dir ; ++i) {
451  if (dirs) {
452  create_remove_dirs (path, create, itemNum + i);
453  continue;
454  }
455 
456  sprintf(curr_item, "%s/file.%s"LLU"", path, create ? o.mk_name : o.rm_name, itemNum+i);
457  VERBOSE(3,5,"create file: %s", curr_item);
458 
459  if (create) {
460  aiori_fd_t *aiori_fh;
461 
462  //create files
463  aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options);
464  if (NULL == aiori_fh) {
465  EWARNF("unable to create file %s", curr_item);
466  }else{
467  o.backend->close (aiori_fh, o.backend_options);
468  }
469  } else if (!(o.shared_file && rank != 0)) {
470  //remove files
471  o.backend->delete (curr_item, o.backend_options);
472  }
473  if(CHECK_STONE_WALL(progress)){
474  progress->items_done = i + 1;
475  return;
476  }
477  }
478  progress->items_done = progress->items_per_dir;
479 }
480 
481 /* recursive function to create and remove files/directories from the
482  directory tree */
483 void create_remove_items(int currDepth, const int dirs, const int create, const int collective, const char *path, uint64_t dirNum, rank_progress_t * progress) {
484  unsigned i;
485  char dir[MAX_PATHLEN];
486  char temp_path[MAX_PATHLEN];
487  unsigned long long currDir = dirNum;
488 
489 
490  VERBOSE(1,-1,"Entering create_remove_items on %s, currDepth = %d...", path, currDepth );
491 
492 
493  memset(dir, 0, MAX_PATHLEN);
494  strcpy(temp_path, path);
495 
496  VERBOSE(3,5,"create_remove_items (start): temp_path is '%s'", temp_path );
497 
498  if (currDepth == 0) {
499  /* create items at this depth */
500  if (! o.leaf_only || (o.depth == 0 && o.leaf_only)) {
501  if (collective) {
502  collective_helper(dirs, create, temp_path, 0, progress);
503  } else {
504  create_remove_items_helper(dirs, create, temp_path, 0, progress);
505  }
506  }
507 
508  if (o.depth > 0) {
509  create_remove_items(++currDepth, dirs, create,
510  collective, temp_path, ++dirNum, progress);
511  }
512 
513  } else if (currDepth <= o.depth) {
514  /* iterate through the branches */
515  for (i=0; i< o.branch_factor; i++) {
516 
517  /* determine the current branch and append it to the path */
518  sprintf(dir, "%s.%llu/", o.base_tree_name, currDir);
519  strcat(temp_path, "/");
520  strcat(temp_path, dir);
521 
522  VERBOSE(3,5,"create_remove_items (for loop): temp_path is '%s'", temp_path );
523 
524  /* create the items in this branch */
525  if (! o.leaf_only || (o.leaf_only && currDepth == o.depth)) {
526  if (collective) {
527  collective_helper(dirs, create, temp_path, currDir* o.items_per_dir, progress);
528  } else {
529  create_remove_items_helper(dirs, create, temp_path, currDir*o.items_per_dir, progress);
530  }
531  }
532 
533  /* make the recursive call for the next level below this branch */
535  ++currDepth,
536  dirs,
537  create,
538  collective,
539  temp_path,
540  ( currDir * ( unsigned long long ) o.branch_factor ) + 1,
541  progress
542  );
543  currDepth--;
544 
545  /* reset the path */
546  strcpy(temp_path, path);
547  currDir++;
548  }
549  }
550 }
551 
552 /* stats all of the items created as specified by the input parameters */
553 void mdtest_stat(const int random, const int dirs, const long dir_iter, const char *path, rank_progress_t * progress) {
554  struct stat buf;
555  uint64_t parent_dir, item_num = 0;
556  char item[MAX_PATHLEN], temp[MAX_PATHLEN];
557 
558  VERBOSE(1,-1,"Entering mdtest_stat on %s", path );
559 
560  uint64_t stop_items = o.items;
561 
562  if( o.directory_loops != 1 ){
563  stop_items = o.items_per_dir;
564  }
565 
566  /* iterate over all of the item IDs */
567  for (uint64_t i = 0 ; i < stop_items ; ++i) {
568  /*
569  * It doesn't make sense to pass the address of the array because that would
570  * be like passing char **. Tested it on a Cray and it seems to work either
571  * way, but it seems that it is correct without the "&".
572  *
573  memset(&item, 0, MAX_PATHLEN);
574  */
575  memset(item, 0, MAX_PATHLEN);
576  memset(temp, 0, MAX_PATHLEN);
577 
578 
579  /* determine the item number to stat */
580  if (random) {
581  item_num = o.rand_array[i];
582  } else {
583  item_num = i;
584  }
585 
586  /* make adjustments if in leaf only mode*/
587  if (o.leaf_only) {
588  item_num += o.items_per_dir *
589  (o.num_dirs_in_tree - (uint64_t) pow( o.branch_factor, o.depth ));
590  }
591 
592  /* create name of file/dir to stat */
593  if (dirs) {
594  if ( (i % ITEM_COUNT == 0) && (i != 0)) {
595  VERBOSE(3,5,"stat dir: "LLU"", i);
596  }
597  sprintf(item, "dir.%s"LLU"", o.stat_name, item_num);
598  } else {
599  if ( (i % ITEM_COUNT == 0) && (i != 0)) {
600  VERBOSE(3,5,"stat file: "LLU"", i);
601  }
602  sprintf(item, "file.%s"LLU"", o.stat_name, item_num);
603  }
604 
605  /* determine the path to the file/dir to be stat'ed */
606  parent_dir = item_num / o.items_per_dir;
607 
608  if (parent_dir > 0) { //item is not in tree's root directory
609 
610  /* prepend parent directory to item's path */
611  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
612  strcpy(item, temp);
613 
614  //still not at the tree's root dir
615  while (parent_dir > o.branch_factor) {
616  parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor);
617  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
618  strcpy(item, temp);
619  }
620  }
621 
622  /* Now get item to have the full path */
623  sprintf( temp, "%s/%s", path, item );
624  strcpy( item, temp );
625 
626  /* below temp used to be hiername */
627  VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item);
628  if (-1 == o.backend->stat (item, &buf, o.backend_options)) {
629  EWARNF("unable to stat %s %s", dirs ? "directory" : "file", item);
630  }
631  }
632 }
633 
634 /* reads all of the items created as specified by the input parameters */
635 void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
636  uint64_t parent_dir, item_num = 0;
637  char item[MAX_PATHLEN], temp[MAX_PATHLEN];
638  aiori_fd_t *aiori_fh;
639 
640  VERBOSE(1,-1,"Entering mdtest_read on %s", path );
641  char *read_buffer;
642 
643  /* allocate read buffer */
644  if (o.read_bytes > 0) {
646  memset(read_buffer, -1, o.read_bytes);
647  }
648 
649  uint64_t stop_items = o.items;
650 
651  if( o.directory_loops != 1 ){
652  stop_items = o.items_per_dir;
653  }
654 
655  /* iterate over all of the item IDs */
656  for (uint64_t i = 0 ; i < stop_items ; ++i) {
657  /*
658  * It doesn't make sense to pass the address of the array because that would
659  * be like passing char **. Tested it on a Cray and it seems to work either
660  * way, but it seems that it is correct without the "&".
661  *
662  * NTH: Both are technically correct in C.
663  *
664  * memset(&item, 0, MAX_PATHLEN);
665  */
666  memset(item, 0, MAX_PATHLEN);
667  memset(temp, 0, MAX_PATHLEN);
668 
669  /* determine the item number to read */
670  if (random) {
671  item_num = o.rand_array[i];
672  } else {
673  item_num = i;
674  }
675 
676  /* make adjustments if in leaf only mode*/
677  if (o.leaf_only) {
678  item_num += o.items_per_dir *
679  (o.num_dirs_in_tree - (uint64_t) pow (o.branch_factor, o.depth));
680  }
681 
682  /* create name of file to read */
683  if (!dirs) {
684  if ((i%ITEM_COUNT == 0) && (i != 0)) {
685  VERBOSE(3,5,"read file: "LLU"", i);
686  }
687  sprintf(item, "file.%s"LLU"", o.read_name, item_num);
688  }
689 
690  /* determine the path to the file/dir to be read'ed */
691  parent_dir = item_num / o.items_per_dir;
692 
693  if (parent_dir > 0) { //item is not in tree's root directory
694 
695  /* prepend parent directory to item's path */
696  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
697  strcpy(item, temp);
698 
699  /* still not at the tree's root dir */
700  while (parent_dir > o.branch_factor) {
701  parent_dir = (unsigned long long) ((parent_dir-1) / o.branch_factor);
702  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
703  strcpy(item, temp);
704  }
705  }
706 
707  /* Now get item to have the full path */
708  sprintf( temp, "%s/%s", path, item );
709  strcpy( item, temp );
710 
711  /* below temp used to be hiername */
712  VERBOSE(3,5,"mdtest_read file: %s", item);
713 
714  /* open file for reading */
715  aiori_fh = o.backend->open (item, O_RDONLY, o.backend_options);
716  if (NULL == aiori_fh) {
717  EWARNF("unable to open file %s", item);
718  continue;
719  }
720 
721  /* read file */
722  if (o.read_bytes > 0) {
723  read_buffer[0] = 42;
724  if (o.read_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, o.read_bytes, 0, o.backend_options)) {
725  EWARNF("unable to read file %s", item);
726  continue;
727  }
728  if(o.verify_read){
729  int pretend_rank = (2 * o.nstride + rank) % o.size;
730  if (o.shared_file) {
731  pretend_rank = rank;
732  }
733  o.verification_error += verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank);
734  }else if((o.read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (o.read_bytes < 8 && read_buffer[0] != (char) item_num)){
735  // do a lightweight check, which cost is neglectable
736  o.verification_error++;
737  }
738  }
739 
740  /* close file */
741  o.backend->close (aiori_fh, o.backend_options);
742  }
743  if(o.read_bytes){
744  aligned_buffer_free(read_buffer, o.gpu_memory_flags);
745  }
746 }
747 
748 /* This method should be called by rank 0. It subsequently does all of
749  the creates and removes for the other ranks */
750 void collective_create_remove(const int create, const int dirs, const int ntasks, const char *path, rank_progress_t * progress) {
751  char temp[MAX_PATHLEN];
752 
753  VERBOSE(1,-1,"Entering collective_create_remove on %s", path );
754 
755  /* rank 0 does all of the creates and removes for all of the ranks */
756  for (int i = 0 ; i < ntasks ; ++i) {
757  memset(temp, 0, MAX_PATHLEN);
758 
759  strcpy(temp, o.testdir);
760  strcat(temp, "/");
761 
762  /* set the base tree name appropriately */
763  if (o.unique_dir_per_task) {
764  sprintf(o.base_tree_name, "mdtest_tree.%d", i);
765  } else {
766  sprintf(o.base_tree_name, "mdtest_tree");
767  }
768 
769  /* Setup to do I/O to the appropriate test dir */
770  strcat(temp, o.base_tree_name);
771  strcat(temp, ".0");
772 
773  /* set all item names appropriately */
774  if (! o.shared_file) {
775  sprintf(o.mk_name, "mdtest.%d.", (i+(0*o.nstride))%ntasks);
776  sprintf(o.stat_name, "mdtest.%d.", (i+(1*o.nstride))%ntasks);
777  sprintf(o.read_name, "mdtest.%d.", (i+(2*o.nstride))%ntasks);
778  sprintf(o.rm_name, "mdtest.%d.", (i+(3*o.nstride))%ntasks);
779  }
780  if (o.unique_dir_per_task) {
781  VERBOSE(3,5,"i %d nstride %d ntasks %d", i, o.nstride, ntasks);
782  sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir,
783  (i+(0*o.nstride))%ntasks);
784  sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir,
785  (i+(1*o.nstride))%ntasks);
786  sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir,
787  (i+(2*o.nstride))%ntasks);
788  sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir,
789  (i+(3*o.nstride))%ntasks);
790  sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir,
791  (i+(4*o.nstride))%ntasks);
792  sprintf(o.unique_rm_uni_dir, "%s", o.testdir);
793  }
794 
795  /* Now that everything is set up as it should be, do the create or remove */
796  VERBOSE(3,5,"collective_create_remove (create_remove_items): temp is '%s'", temp);
797 
798  create_remove_items(0, dirs, create, 1, temp, 0, progress);
799  }
800 
801  /* reset all of the item names */
802  if (o.unique_dir_per_task) {
803  sprintf(o.base_tree_name, "mdtest_tree.0");
804  } else {
805  sprintf(o.base_tree_name, "mdtest_tree");
806  }
807  if (! o.shared_file) {
808  sprintf(o.mk_name, "mdtest.%d.", (0+(0*o.nstride))%ntasks);
809  sprintf(o.stat_name, "mdtest.%d.", (0+(1*o.nstride))%ntasks);
810  sprintf(o.read_name, "mdtest.%d.", (0+(2*o.nstride))%ntasks);
811  sprintf(o.rm_name, "mdtest.%d.", (0+(3*o.nstride))%ntasks);
812  }
813  if (o.unique_dir_per_task) {
814  sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir,
815  (0+(0*o.nstride))%ntasks);
816  sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir,
817  (0+(1*o.nstride))%ntasks);
818  sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir,
819  (0+(2*o.nstride))%ntasks);
820  sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir,
821  (0+(3*o.nstride))%ntasks);
822  sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir,
823  (0+(4*o.nstride))%ntasks);
824  sprintf(o.unique_rm_uni_dir, "%s", o.testdir);
825  }
826 }
827 
828 void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank_progress_t * progress) {
829  uint64_t parent_dir, item_num = 0;
830  char item[MAX_PATHLEN], temp[MAX_PATHLEN];
831  char item_last[MAX_PATHLEN];
832 
833  if(o.backend->rename == NULL){
834  WARN("Backend doesn't support rename\n");
835  return;
836  }
837 
838  VERBOSE(1,-1,"Entering mdtest_rename on %s", path );
839 
840  uint64_t stop_items = o.items;
841 
842  if( o.directory_loops != 1 ){
843  stop_items = o.items_per_dir;
844  }
845 
846  if(stop_items == 1) return;
847 
848  /* iterate over all of the item IDs */
849  char first_item_name[MAX_PATHLEN];
850  for (uint64_t i = 0 ; i < stop_items; ++i) {
851  item_num = i;
852  /* make adjustments if in leaf only mode*/
853  if (o.leaf_only) {
854  item_num += o.items_per_dir * (o.num_dirs_in_tree - (uint64_t) pow( o.branch_factor, o.depth ));
855  }
856 
857  /* create name of file/dir to stat */
858  if (dirs) {
859  sprintf(item, "dir.%s"LLU"", o.stat_name, item_num);
860  } else {
861  sprintf(item, "file.%s"LLU"", o.stat_name, item_num);
862  }
863 
864  /* determine the path to the file/dir to be stat'ed */
865  parent_dir = item_num / o.items_per_dir;
866 
867  if (parent_dir > 0) { //item is not in tree's root directory
868  /* prepend parent directory to item's path */
869  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
870  strcpy(item, temp);
871 
872  //still not at the tree's root dir
873  while (parent_dir > o.branch_factor) {
874  parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor);
875  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
876  strcpy(item, temp);
877  }
878  }
879 
880  /* Now get item to have the full path */
881  sprintf( temp, "%s/%s", path, item );
882  strcpy( item, temp );
883 
884  VERBOSE(3,5,"mdtest_rename %4s: %s", (dirs ? "dir" : "file"), item);
885  if(i == 0){
886  sprintf(first_item_name, "%s-XX", item);
887  strcpy(item_last, first_item_name);
888  }else if(i == stop_items - 1){
889  strcpy(item, first_item_name);
890  }
891  if (-1 == o.backend->rename(item, item_last, o.backend_options)) {
892  EWARNF("unable to rename %s %s", dirs ? "directory" : "file", item);
893  }
894 
895  strcpy(item_last, item);
896  }
897 }
898 
899 static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_t item_count, int t, double * times, double * tBefore){
900  res->time[test] = times[t] - times[t-1];
901  if(tBefore){
902  res->time_before_barrier[test] = tBefore[t] - times[t-1];
903  }else{
904  res->time_before_barrier[test] = res->time[test];
905  }
906  res->rate[test] = item_count/res->time[test];
907  res->rate_before_barrier[test] = item_count/res->time_before_barrier[test];
908  res->items[test] = item_count;
909  res->stonewall_last_item[test] = o.items;
910 }
911 
912 void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) {
913  int size;
914  double t[6] = {0};
915  double tBefore[6] = {0};
916  char temp_path[MAX_PATHLEN];
917  mdtest_results_t * res = & o.summary_table[iteration];
918 
919  MPI_Comm_size(testComm, &size);
920 
921  VERBOSE(1,-1,"Entering directory_test on %s", path );
922 
923  tBefore[0] = GetTimeStamp();
924  MPI_Barrier(testComm);
925  t[0] = GetTimeStamp();
926 
927  /* create phase */
928  if(o.create_only) {
930  progress->items_done = 0;
931  progress->start_time = GetTimeStamp();
932  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
933  prep_testdir(iteration, dir_iter);
934  if (o.unique_dir_per_task) {
935  unique_dir_access(MK_UNI_DIR, temp_path);
936  if (! o.time_unique_dir_overhead) {
937  offset_timers(t, 0);
938  }
939  } else {
940  sprintf( temp_path, "%s/%s", o.testdir, path );
941  }
942 
943  VERBOSE(3,-1,"directory_test: create path is '%s'", temp_path );
944 
945  /* "touch" the files */
946  if (o.collective_creates) {
947  if (rank == 0) {
948  collective_create_remove(1, 1, ntasks, temp_path, progress);
949  }
950  } else {
951  /* create directories */
952  create_remove_items(0, 1, 1, 0, temp_path, 0, progress);
953  }
954  }
955  progress->stone_wall_timer_seconds = 0;
956  }
957 
958  tBefore[1] = GetTimeStamp();
959  phase_end();
960  t[1] = GetTimeStamp();
961 
962  /* stat phase */
963  if (o.stat_only) {
964  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
965  prep_testdir(iteration, dir_iter);
966  if (o.unique_dir_per_task) {
967  unique_dir_access(STAT_SUB_DIR, temp_path);
968  if (! o.time_unique_dir_overhead) {
969  offset_timers(t, 1);
970  }
971  } else {
972  sprintf( temp_path, "%s/%s", o.testdir, path );
973  }
974 
975  VERBOSE(3,5,"stat path is '%s'", temp_path );
976 
977  /* stat directories */
978  if (o.random_seed > 0) {
979  mdtest_stat(1, 1, dir_iter, temp_path, progress);
980  } else {
981  mdtest_stat(0, 1, dir_iter, temp_path, progress);
982  }
983  }
984  }
985  tBefore[2] = GetTimeStamp();
986  phase_end();
987  t[2] = GetTimeStamp();
988  if (o.rename_dirs && o.items > 1) { // moved close to execution
989  updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, 4, t, tBefore);
990  }
991  /* read phase */
992  if (o.read_only) {
993  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
994  prep_testdir(iteration, dir_iter);
995  if (o.unique_dir_per_task) {
996  unique_dir_access(READ_SUB_DIR, temp_path);
997  if (! o.time_unique_dir_overhead) {
998  offset_timers(t, 2);
999  }
1000  } else {
1001  sprintf( temp_path, "%s/%s", o.testdir, path );
1002  }
1003 
1004  VERBOSE(3,5,"directory_test: read path is '%s'", temp_path );
1005 
1006  /* read directories */
1007  if (o.random_seed > 0) {
1008  ; /* N/A */
1009  } else {
1010  ; /* N/A */
1011  }
1012  }
1013  }
1014  tBefore[3] = GetTimeStamp();
1015  phase_end();
1016 
1017  t[3] = GetTimeStamp();
1018  if(o.rename_dirs){
1019  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1020  prep_testdir(iteration, dir_iter);
1021  if (o.unique_dir_per_task) {
1022  unique_dir_access(STAT_SUB_DIR, temp_path);
1023  if (! o.time_unique_dir_overhead) {
1024  offset_timers(t, 1);
1025  }
1026  } else {
1027  sprintf( temp_path, "%s/%s", o.testdir, path );
1028  }
1029 
1030  VERBOSE(3,5,"rename path is '%s'", temp_path );
1031 
1032  rename_dir_test(1, dir_iter, temp_path, progress);
1033  }
1034  }
1035  tBefore[4] = GetTimeStamp();
1036  phase_end();
1037 
1038  t[4] = GetTimeStamp();
1039  if (o.rename_dirs && o.items > 1) { // moved close to execution
1040  updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, 4, t, tBefore);
1041  }
1042 
1043  if (o.remove_only) {
1044  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1045  prep_testdir(iteration, dir_iter);
1046  if (o.unique_dir_per_task) {
1047  unique_dir_access(RM_SUB_DIR, temp_path);
1048  if (!o.time_unique_dir_overhead) {
1049  offset_timers(t, 3);
1050  }
1051  } else {
1052  sprintf( temp_path, "%s/%s", o.testdir, path );
1053  }
1054 
1055  VERBOSE(3,5,"directory_test: remove directories path is '%s'", temp_path );
1056 
1057  /* remove directories */
1058  if (o.collective_creates) {
1059  if (rank == 0) {
1060  collective_create_remove(0, 1, ntasks, temp_path, progress);
1061  }
1062  } else {
1063  create_remove_items(0, 1, 0, 0, temp_path, 0, progress);
1064  }
1065  }
1066  }
1067 
1068  tBefore[5] = GetTimeStamp();
1069  phase_end();
1070  t[5] = GetTimeStamp();
1071 
1072  if (o.remove_only) {
1073  if (o.unique_dir_per_task) {
1074  unique_dir_access(RM_UNI_DIR, temp_path);
1075  } else {
1076  sprintf( temp_path, "%s/%s", o.testdir, path );
1077  }
1078 
1079  VERBOSE(3,5,"directory_test: remove unique directories path is '%s'\n", temp_path );
1080  }
1081 
1083  offset_timers(t, 5);
1084  }
1085 
1086  /* calculate times */
1087  if (o.create_only) {
1088  updateResult(res, MDTEST_DIR_CREATE_NUM, o.items, 1, t, tBefore);
1089  }
1090  if (o.stat_only) {
1091  updateResult(res, MDTEST_DIR_STAT_NUM, o.items, 2, t, tBefore);
1092  }
1093  if (o.read_only) {
1094  updateResult(res, MDTEST_DIR_READ_NUM, o.items, 3, t, tBefore);
1095  }
1096  if (o.remove_only) {
1097  updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items, 5, t, tBefore);
1098  }
1099  VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[0]);
1100  VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[1]);
1101  VERBOSE(1,-1," Directory rename : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[MDTEST_DIR_RENAME_NUM]);
1102  VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[5] - t[4], o.summary_table[iteration].rate[4]);
1103 }
1104 
1105 /* Returns if the stonewall was hit */
1106 int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, uint64_t * out_max_iter){
1107  int hit = 0;
1108  long long unsigned max_iter = 0;
1109 
1110  VERBOSE(1,1,"stonewall hit with %lld items", (long long) items_done );
1111  MPI_Allreduce(& items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm);
1113  o.summary_table[iteration].stonewall_last_item[MDTEST_FILE_CREATE_NUM] = items_done;
1114  *out_max_iter = max_iter;
1115 
1116  // continue to the maximum...
1117  long long min_accessed = 0;
1118  MPI_Reduce(& items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm);
1119  long long sum_accessed = 0;
1120  MPI_Reduce(& items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm);
1121  o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed;
1122  o.summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * o.size;
1123 
1124  if(o.items != (sum_accessed / o.size)){
1125  VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / o.size);
1126  hit = 1;
1127  }
1128 
1129  return hit;
1130 }
1131 
1132 void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t * progress, double *t){
1133  char temp_path[MAX_PATHLEN];
1134  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1135  prep_testdir(iteration, dir_iter);
1136 
1137  if (o.unique_dir_per_task) {
1138  unique_dir_access(MK_UNI_DIR, temp_path);
1139  VERBOSE(5,5,"operating on %s", temp_path);
1140  if (! o.time_unique_dir_overhead) {
1141  offset_timers(t, 0);
1142  }
1143  } else {
1144  sprintf( temp_path, "%s/%s", o.testdir, path );
1145  }
1146 
1147  VERBOSE(3,-1,"file_test: create path is '%s'", temp_path );
1148  /* "touch" the files */
1149  if (o.collective_creates) {
1150  if (rank == 0) {
1151  collective_create_remove(1, 0, ntasks, temp_path, progress);
1152  }
1153  MPI_Barrier(testComm);
1154  }
1155 
1156  /* create files */
1157  create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
1159  // hit the stonewall
1160  uint64_t max_iter = 0;
1161  uint64_t items_done = progress->items_done + dir_iter * o.items_per_dir;
1162  int hit = updateStoneWallIterations(iteration, items_done, t[0], & max_iter);
1163  progress->items_start = items_done;
1164  progress->items_per_dir = max_iter;
1165  if (hit){
1166  progress->stone_wall_timer_seconds = 0;
1167  VERBOSE(1,1,"stonewall: %lld of %lld", (long long) progress->items_start, (long long) progress->items_per_dir);
1168  create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
1169  // now reset the values
1171  o.items = progress->items_done;
1172  }
1173  if (o.stoneWallingStatusFile){
1175  }
1176  // reset stone wall timer to allow proper cleanup
1177  progress->stone_wall_timer_seconds = 0;
1178  // at the moment, stonewall can be done only with one directory_loop, so we can return here safely
1179  break;
1180  }
1181  }
1182 }
1183 
1184 void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) {
1185  int size;
1186  double t[5] = {0};
1187  double tBefore[5] = {0};
1188  char temp_path[MAX_PATHLEN];
1189  MPI_Comm_size(testComm, &size);
1190 
1191  VERBOSE(3,5,"Entering file_test on %s", path);
1192 
1193  tBefore[0] = GetTimeStamp();
1194  MPI_Barrier(testComm);
1195  t[0] = GetTimeStamp();
1196 
1197  /* create phase */
1198  if (o.create_only ) {
1200  progress->items_done = 0;
1201  progress->start_time = GetTimeStamp();
1202  file_test_create(iteration, ntasks, path, progress, t);
1203  }else{
1204  if (o.stoneWallingStatusFile){
1205  int64_t expected_items;
1206  /* The number of items depends on the stonewalling file */
1208  if(expected_items >= 0){
1209  if(o.directory_loops > 1){
1210  o.directory_loops = expected_items / o.items_per_dir;
1211  o.items = o.items_per_dir;
1212  }else{
1213  o.items = expected_items;
1214  progress->items_per_dir = o.items;
1215  }
1216  }
1217  if (rank == 0) {
1218  if(expected_items == -1){
1219  WARN("Could not read stonewall status file");
1220  }else {
1221  VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", o.items);
1222  }
1223  }
1224  }
1225  }
1226 
1227  tBefore[1] = GetTimeStamp();
1228  phase_end();
1229  t[1] = GetTimeStamp();
1230 
1231  /* stat phase */
1232  if (o.stat_only ) {
1233  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1234  prep_testdir(iteration, dir_iter);
1235  if (o.unique_dir_per_task) {
1236  unique_dir_access(STAT_SUB_DIR, temp_path);
1237  if (!o.time_unique_dir_overhead) {
1238  offset_timers(t, 1);
1239  }
1240  } else {
1241  sprintf( temp_path, "%s/%s", o.testdir, path );
1242  }
1243 
1244  VERBOSE(3,5,"file_test: stat path is '%s'", temp_path );
1245 
1246  /* stat files */
1247  mdtest_stat((o.random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress);
1248  }
1249  }
1250 
1251  tBefore[2] = GetTimeStamp();
1252  phase_end();
1253  t[2] = GetTimeStamp();
1254 
1255  /* read phase */
1256  if (o.read_only ) {
1257  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1258  prep_testdir(iteration, dir_iter);
1259  if (o.unique_dir_per_task) {
1260  unique_dir_access(READ_SUB_DIR, temp_path);
1261  if (! o.time_unique_dir_overhead) {
1262  offset_timers(t, 2);
1263  }
1264  } else {
1265  sprintf( temp_path, "%s/%s", o.testdir, path );
1266  }
1267 
1268  VERBOSE(3,5,"file_test: read path is '%s'", temp_path );
1269 
1270  /* read files */
1271  if (o.random_seed > 0) {
1272  mdtest_read(1,0, dir_iter, temp_path);
1273  } else {
1274  mdtest_read(0,0, dir_iter, temp_path);
1275  }
1276  }
1277  }
1278 
1279  tBefore[3] = GetTimeStamp();
1280  phase_end();
1281  t[3] = GetTimeStamp();
1282 
1283  if (o.remove_only) {
1284  progress->items_start = 0;
1285 
1286  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1287  prep_testdir(iteration, dir_iter);
1288  if (o.unique_dir_per_task) {
1289  unique_dir_access(RM_SUB_DIR, temp_path);
1290  if (! o.time_unique_dir_overhead) {
1291  offset_timers(t, 3);
1292  }
1293  } else {
1294  sprintf( temp_path, "%s/%s", o.testdir, path );
1295  }
1296 
1297  VERBOSE(3,5,"file_test: rm directories path is '%s'", temp_path );
1298 
1299  if (o.collective_creates) {
1300  if (rank == 0) {
1301  collective_create_remove(0, 0, ntasks, temp_path, progress);
1302  }
1303  } else {
1304  VERBOSE(3,5,"gonna create %s", temp_path);
1305  create_remove_items(0, 0, 0, 0, temp_path, 0, progress);
1306  }
1307  }
1308  }
1309 
1310  tBefore[4] = GetTimeStamp();
1311  phase_end();
1312  t[4] = GetTimeStamp();
1313  if (o.remove_only) {
1314  if (o.unique_dir_per_task) {
1315  unique_dir_access(RM_UNI_DIR, temp_path);
1316  } else {
1317  strcpy( temp_path, path );
1318  }
1319 
1320  VERBOSE(3,5,"file_test: rm unique directories path is '%s'", temp_path );
1321  }
1322 
1324  offset_timers(t, 4);
1325  }
1326 
1327  if(o.num_dirs_in_tree_calc){ /* this is temporary fix needed when using -n and -i together */
1329  }
1330 
1331  mdtest_results_t * res = & o.summary_table[iteration];
1332  /* calculate times */
1333  if (o.create_only) {
1334  updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, 1, t, tBefore);
1335  }
1336  if (o.stat_only) {
1337  updateResult(res, MDTEST_FILE_STAT_NUM, o.items, 2, t, tBefore);
1338  }
1339  if (o.read_only) {
1340  updateResult(res, MDTEST_FILE_READ_NUM, o.items, 3, t, tBefore);
1341  }
1342  if (o.remove_only) {
1343  updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, 4, t, tBefore);
1344  }
1345 
1346  VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[4]);
1348  VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]);
1349  }
1350  VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[5]);
1351  VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], o.summary_table[iteration].rate[6]);
1352  VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[7]);
1353 }
1354 
1355 char const * mdtest_test_name(int i){
1356  switch (i) {
1357  case MDTEST_DIR_CREATE_NUM: return "Directory creation";
1358  case MDTEST_DIR_STAT_NUM: return "Directory stat";
1359  case MDTEST_DIR_READ_NUM: return "Directory read";
1360  case MDTEST_DIR_REMOVE_NUM: return "Directory removal";
1361  case MDTEST_DIR_RENAME_NUM: return "Directory rename";
1362  case MDTEST_FILE_CREATE_NUM: return "File creation";
1363  case MDTEST_FILE_STAT_NUM: return "File stat";
1364  case MDTEST_FILE_READ_NUM: return "File read";
1365  case MDTEST_FILE_REMOVE_NUM: return "File removal";
1366  case MDTEST_TREE_CREATE_NUM: return "Tree creation";
1367  case MDTEST_TREE_REMOVE_NUM: return "Tree removal";
1368  default: return "ERR INVALID TESTNAME :";
1369  }
1370  return NULL;
1371 }
1372 
1373 /*
1374  * Store the results of each process in a file
1375  */
1376 static void StoreRankInformation(int iterations, mdtest_results_t * agg){
1377  const size_t size = sizeof(mdtest_results_t) * iterations;
1378  if(rank == 0){
1379  FILE* fd = fopen(o.saveRankDetailsCSV, "a");
1380  if (fd == NULL){
1381  FAIL("Cannot open saveRankPerformanceDetails file for writes!");
1382  }
1383 
1384  mdtest_results_t * results = safeMalloc(size * o.size);
1385  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, results, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1386 
1387  char buff[4096];
1388  char * cpos = buff;
1389  cpos += sprintf(cpos, "all,%llu", (long long unsigned) o.items);
1390  for(int e = 0; e < MDTEST_LAST_NUM; e++){
1391  if(agg->items[e] == 0){
1392  cpos += sprintf(cpos, ",,");
1393  }else{
1394  cpos += sprintf(cpos, ",%.10e,%.10e", agg->items[e] / agg->time[e], agg->time[e]);
1395  }
1396  }
1397  cpos += sprintf(cpos, "\n");
1398  int ret = fwrite(buff, cpos - buff, 1, fd);
1399 
1400  for(int iter = 0; iter < iterations; iter++){
1401  for(int i=0; i < o.size; i++){
1402  mdtest_results_t * cur = & results[i * iterations + iter];
1403  cpos = buff;
1404  cpos += sprintf(cpos, "%d,", i);
1405  for(int e = 0; e < MDTEST_TREE_CREATE_NUM; e++){
1406  if(cur->items[e] == 0){
1407  cpos += sprintf(cpos, ",,");
1408  }else{
1409  cpos += sprintf(cpos, ",%.10e,%.10e", cur->items[e] / cur->time_before_barrier[e], cur->time_before_barrier[e]);
1410  }
1411  }
1412  cpos += sprintf(cpos, "\n");
1413  ret = fwrite(buff, cpos - buff, 1, fd);
1414  if(ret != 1){
1415  WARN("Couln't append to saveRankPerformanceDetailsCSV file\n");
1416  break;
1417  }
1418  }
1419  }
1420  fclose(fd);
1421  free(results);
1422  }else{
1423  /* this is a hack for now assuming all datatypes in the structure are double */
1424  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1425  }
1426 }
1427 
1428 static mdtest_results_t* get_result_index(mdtest_results_t* all_results, int proc, int iter, int interation_count){
1429  return & all_results[proc * interation_count + iter];
1430 }
1431 
1432 static void summarize_results_rank0(int iterations, mdtest_results_t * all_results, int print_time) {
1433  int start, stop;
1434  double min, max, mean, sd, sum, var, curr = 0;
1435  double imin, imax, imean, isum, icur; // calculation per iteration
1436  char const * access;
1437  /* if files only access, skip entries 0-3 (the dir tests) */
1438  if (o.files_only && ! o.dirs_only) {
1439  start = MDTEST_FILE_CREATE_NUM;
1440  } else {
1441  start = 0;
1442  }
1443 
1444  /* if directories only access, skip entries 4-7 (the file tests) */
1445  if (o.dirs_only && !o.files_only) {
1446  stop = MDTEST_FILE_CREATE_NUM;
1447  } else {
1448  stop = MDTEST_TREE_CREATE_NUM;
1449  }
1450 
1451  /* special case: if no directory or file tests, skip all */
1452  if (!o.dirs_only && !o.files_only) {
1453  start = stop = 0;
1454  }
1455 
1456  if(o.print_all_proc){
1457  fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate");
1458  for (int j = 0; j < iterations; j++) {
1459  fprintf(out_logfile, "iteration: %d\n", j);
1460  for (int i = start; i < MDTEST_LAST_NUM; i++) {
1461  access = mdtest_test_name(i);
1462  if(access == NULL){
1463  continue;
1464  }
1465  fprintf(out_logfile, "Test %s", access);
1466  for (int k=0; k < o.size; k++) {
1467  mdtest_results_t * cur = get_result_index(all_results, k, j, iterations);
1468  if(print_time){
1469  curr = cur->time_before_barrier[i];
1470  }else{
1471  curr = cur->rate_before_barrier[i];
1472  }
1473  fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr);
1474  }
1475  fprintf(out_logfile, "\n");
1476  }
1477  }
1478  }
1479 
1480  VERBOSE(0, -1, "\nSUMMARY %s: (of %d iterations)", print_time ? "time" : "rate", iterations);
1481  VERBOSE(0, -1,
1482  " Operation per Rank: Max Min Mean "
1483  " per Iteration: Max Min Mean Std Dev");
1484  VERBOSE(0, -1,
1485  " --------- --- --- ---- "
1486  " --- --- ---- -------");
1487  for (int i = start; i < stop; i++) {
1488  min = 1e308;
1489  max = 0;
1490  sum = var = 0;
1491  imin = 1e308;
1492  isum = imax = 0;
1493  double iter_result[iterations];
1494  for (int j = 0; j < iterations; j++) {
1495  icur = print_time ? 0 : 1e308;
1496  for (int k = 0; k < o.size; k++) {
1497  mdtest_results_t * cur = get_result_index(all_results, k, j, iterations);
1498  if(print_time){
1499  curr = cur->time_before_barrier[i];
1500  }else{
1501  curr = cur->rate_before_barrier[i];
1502  }
1503  if (min > curr) {
1504  min = curr;
1505  }
1506  if (max < curr) {
1507  max = curr;
1508  }
1509  sum += curr;
1510 
1511  if (print_time) {
1512  curr = cur->time[i];
1513  if (icur < curr) {
1514  icur = curr;
1515  }
1516  } else {
1517  curr = cur->rate[i];
1518  if (icur > curr) {
1519  icur = curr;
1520  }
1521  }
1522  }
1523 
1524  if (icur > imax) {
1525  imax = icur;
1526  }
1527  if (icur < imin) {
1528  imin = icur;
1529  }
1530  isum += icur;
1531  if(print_time){
1532  iter_result[j] = icur;
1533  }else{
1534  iter_result[j] = icur * o.size;
1535  }
1536  }
1537  mean = sum / iterations / o.size;
1538  imean = isum / iterations;
1539  if(! print_time){
1540  imax *= o.size;
1541  imin *= o.size;
1542  isum *= o.size;
1543  imean *= o.size;
1544  }
1545  for (int j = 0; j < iterations; j++) {
1546  var += (imean - iter_result[j]) * (imean - iter_result[j]);
1547  }
1548  var = var / (iterations - 1);
1549  sd = sqrt(var);
1550  access = mdtest_test_name(i);
1551  if (i != 2) {
1552  fprintf(out_logfile, " %-22s ", access);
1553  fprintf(out_logfile, "%14.3f ", max);
1554  fprintf(out_logfile, "%14.3f ", min);
1555  fprintf(out_logfile, "%14.3f ", mean);
1556  fprintf(out_logfile, "%18.3f ", imax);
1557  fprintf(out_logfile, "%14.3f ", imin);
1558  fprintf(out_logfile, "%14.3f ", imean);
1559  fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd);
1560  fflush(out_logfile);
1561  }
1562  }
1563 
1564  /* calculate tree create/remove rates, applies only to Rank 0 */
1565  for (int i = MDTEST_TREE_CREATE_NUM; i < MDTEST_LAST_NUM; i++) {
1566  min = imin = 1e308;
1567  max = imax = 0;
1568  sum = var = 0;
1569  for (int j = 0; j < iterations; j++) {
1570  if(print_time){
1571  curr = o.summary_table[j].time[i];
1572  }else{
1573  curr = o.summary_table[j].rate[i];
1574  }
1575  if (min > curr) {
1576  min = curr;
1577  }
1578  if (max < curr) {
1579  max = curr;
1580  }
1581  sum += curr;
1582  if(curr > imax){
1583  imax = curr;
1584  }
1585  if(curr < imin){
1586  imin = curr;
1587  }
1588  }
1589 
1590  mean = sum / (iterations);
1591 
1592  for (int j = 0; j < iterations; j++) {
1593  if(print_time){
1594  curr = o.summary_table[j].time[i];
1595  }else{
1596  curr = o.summary_table[j].rate[i];
1597  }
1598  var += (mean - curr)*(mean - curr);
1599  }
1600  var = var / (iterations - 1);
1601  sd = sqrt(var);
1602  access = mdtest_test_name(i);
1603  fprintf(out_logfile, " %-22s ", access);
1604  fprintf(out_logfile, "%14.3f ", max);
1605  fprintf(out_logfile, "%14.3f ", min);
1606  fprintf(out_logfile, "%14.3f ", mean);
1607  fprintf(out_logfile, "%18.3f ", imax);
1608  fprintf(out_logfile, "%14.3f ", imin);
1609  fprintf(out_logfile, "%14.3f ", sum / iterations);
1610  fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd);
1611  fflush(out_logfile);
1612  }
1613 }
1614 
1615 /*
1616  Output the results and summarize them into rank 0's o.summary_table
1617  */
1618 void summarize_results(int iterations, mdtest_results_t * results) {
1619  const size_t size = sizeof(mdtest_results_t) * iterations;
1620  mdtest_results_t * all_results = NULL;
1621  if(rank == 0){
1622  all_results = safeMalloc(size * o.size);
1623  memset(all_results, 0, size * o.size);
1624  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, all_results, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1625  // calculate the aggregated values for all processes
1626  for(int j=0; j < iterations; j++){
1627  for(int i=0; i < MDTEST_LAST_NUM; i++){
1628  //double sum_rate = 0;
1629  double max_time = 0;
1630  double max_stonewall_time = 0;
1631  uint64_t sum_items = 0;
1632 
1633  // reduce over the processes
1634  for(int p=0; p < o.size; p++){
1635  mdtest_results_t * cur = get_result_index(all_results, p, j, iterations);
1636  //sum_rate += all_results[p + j*p]->rate[i];
1637  double t = cur->time[i];
1638  max_time = max_time < t ? t : max_time;
1639 
1640  sum_items += cur->items[i];
1641 
1642  t = cur->stonewall_time[i];
1643  max_stonewall_time = max_stonewall_time < t ? t : max_stonewall_time;
1644  }
1645 
1646  results[j].items[i] = sum_items;
1647  results[j].time[i] = max_time;
1648  results[j].stonewall_time[i] = max_stonewall_time;
1649  if(sum_items == 0){
1650  results[j].rate[i] = 0.0;
1651  }else{
1652  results[j].rate[i] = sum_items / max_time;
1653  }
1654 
1655  /* These results have already been reduced to Rank 0 */
1656  results[j].stonewall_item_sum[i] = o.summary_table[j].stonewall_item_sum[i];
1657  results[j].stonewall_item_min[i] = o.summary_table[j].stonewall_item_min[i];
1658  results[j].stonewall_time[i] = o.summary_table[j].stonewall_time[i];
1659  }
1660  }
1661  }else{
1662  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1663  }
1664 
1665  /* share global results across processes as these are returned by the API */
1666  MPI_Bcast(results, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1667 
1668  /* update relevant result values with local values as these are returned by the API */
1669  for(int j=0; j < iterations; j++){
1670  for(int i=0; i < MDTEST_LAST_NUM; i++){
1671  results[j].time_before_barrier[i] = o.summary_table[j].time_before_barrier[i];
1672  results[j].stonewall_last_item[i] = o.summary_table[j].stonewall_last_item[i];
1673  }
1674  }
1675 
1676  if(rank != 0){
1677  return;
1678  }
1679 
1680  if (o.print_rate_and_time){
1681  summarize_results_rank0(iterations, all_results, 0);
1682  summarize_results_rank0(iterations, all_results, 1);
1683  }else{
1684  summarize_results_rank0(iterations, all_results, o.print_time);
1685  }
1686 
1687  free(all_results);
1688 }
1689 
1690 /* Checks to see if the test setup is valid. If it isn't, fail. */
1692 
1693  if (((o.stone_wall_timer_seconds > 0) && (o.branch_factor > 1)) || ! o.barriers) {
1694  FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", o.branch_factor);
1695  }
1696 
1697  if (!o.create_only && ! o.stat_only && ! o.read_only && !o.remove_only && !o.rename_dirs) {
1698  o.create_only = o.stat_only = o.read_only = o.remove_only = o.rename_dirs = 1;
1699  VERBOSE(1,-1,"main: Setting create/stat/read/remove_only to True" );
1700  }
1701 
1702  VERBOSE(1,-1,"Entering md_validate_tests..." );
1703 
1704  /* if dirs_only and files_only were both left unset, set both now */
1705  if (!o.dirs_only && !o.files_only) {
1706  o.dirs_only = o.files_only = 1;
1707  }
1708 
1709  /* if shared file 'S' access, no directory tests */
1710  if (o.shared_file) {
1711  o.dirs_only = 0;
1712  }
1713 
1714  /* check for no barriers with shifting processes for different phases.
1715  that is, one may not specify both -B and -N as it will introduce
1716  race conditions that may cause errors stat'ing or deleting after
1717  creates.
1718  */
1719  if (( o.barriers == 0 ) && ( o.nstride != 0 ) && ( rank == 0 )) {
1720  FAIL( "Possible race conditions will occur: -B not compatible with -N");
1721  }
1722 
1723  /* check for collective_creates incompatibilities */
1724  if (o.shared_file && o.collective_creates && rank == 0) {
1725  FAIL("-c not compatible with -S");
1726  }
1727  if (o.path_count > 1 && o.collective_creates && rank == 0) {
1728  FAIL("-c not compatible with multiple test directories");
1729  }
1730  if (o.collective_creates && !o.barriers) {
1731  FAIL("-c not compatible with -B");
1732  }
1733 
1734  /* check for shared file incompatibilities */
1735  if (o.unique_dir_per_task && o.shared_file && rank == 0) {
1736  FAIL("-u not compatible with -S");
1737  }
1738 
1739  /* check multiple directory paths and strided option */
1740  if (o.path_count > 1 && o.nstride > 0) {
1741  FAIL("cannot have multiple directory paths with -N strides between neighbor tasks");
1742  }
1743 
1744  /* check for shared directory and multiple directories incompatibility */
1745  if (o.path_count > 1 && o.unique_dir_per_task != 1) {
1746  FAIL("shared directory mode is not compatible with multiple directory paths");
1747  }
1748 
1749  /* check if more directory paths than ranks */
1750  if (o.path_count > o.size) {
1751  FAIL("cannot have more directory paths than MPI tasks");
1752  }
1753 
1754  /* check depth */
1755  if (o.depth < 0) {
1756  FAIL("depth must be greater than or equal to zero");
1757  }
1758  /* check branch_factor */
1759  if (o.branch_factor < 1 && o.depth > 0) {
1760  FAIL("branch factor must be greater than or equal to zero");
1761  }
1762  /* check for valid number of items */
1763  if ((o.items > 0) && (o.items_per_dir > 0)) {
1764  if(o.unique_dir_per_task){
1765  FAIL("only specify the number of items or the number of items per directory");
1766  }else if( o.items % o.items_per_dir != 0){
1767  FAIL("items must be a multiple of items per directory");
1768  }
1769  }
1770  /* check for using mknod */
1771  if (o.write_bytes > 0 && o.make_node) {
1772  FAIL("-k not compatible with -w");
1773  }
1774 
1775  if(o.verify_read && ! o.read_only)
1776  FAIL("Verify read requires that the read test is used");
1777 
1778  if(o.verify_read && o.read_bytes <= 0)
1779  FAIL("Verify read requires that read bytes is > 0");
1780 
1781  if(o.read_only && o.read_bytes <= 0)
1782  WARN("Read bytes is 0, thus, a read test will actually just open/close");
1783 
1784  if(o.create_only && o.read_only && o.read_bytes > o.write_bytes)
1785  FAIL("When writing and reading files, read bytes must be smaller than write bytes");
1786 
1787  if (rank == 0 && o.saveRankDetailsCSV){
1788  // check that the file is writeable, truncate it and add header
1789  FILE* fd = fopen(o.saveRankDetailsCSV, "w");
1790  if (fd == NULL){
1791  FAIL("Cannot open saveRankPerformanceDetails file for write!");
1792  }
1793  char * head = "rank,items";
1794  int ret = fwrite(head, strlen(head), 1, fd);
1795  for(int e = 0; e < MDTEST_LAST_NUM; e++){
1796  char buf[1024];
1797  const char * str = mdtest_test_name(e);
1798 
1799  sprintf(buf, ",rate-%s,time-%s", str, str);
1800  ret = fwrite(buf, strlen(buf), 1, fd);
1801  if(ret != 1){
1802  FAIL("Cannot write header to saveRankPerformanceDetails file");
1803  }
1804  }
1805  fwrite("\n", 1, 1, fd);
1806  fclose(fd);
1807  }
1808 }
1809 
1810 void show_file_system_size(char *file_system) {
1811  char real_path[MAX_PATHLEN];
1812  char file_system_unit_str[MAX_PATHLEN] = "GiB";
1813  char inode_unit_str[MAX_PATHLEN] = "Mi";
1814  int64_t file_system_unit_val = 1024 * 1024 * 1024;
1815  int64_t inode_unit_val = 1024 * 1024;
1816  int64_t total_file_system_size,
1817  free_file_system_size,
1818  total_inodes,
1819  free_inodes;
1820  double total_file_system_size_hr,
1821  used_file_system_percentage,
1822  used_inode_percentage;
1823  ior_aiori_statfs_t stat_buf;
1824  int ret;
1825 
1826  VERBOSE(1,-1,"Entering show_file_system_size on %s", file_system );
1827 
1828  ret = o.backend->statfs (file_system, &stat_buf, o.backend_options);
1829  if (0 != ret) {
1830  FAIL("unable to stat file system %s", file_system);
1831  }
1832 
1833  total_file_system_size = stat_buf.f_blocks * stat_buf.f_bsize;
1834  free_file_system_size = stat_buf.f_bfree * stat_buf.f_bsize;
1835 
1836  used_file_system_percentage = (1 - ((double)free_file_system_size
1837  / (double)total_file_system_size)) * 100;
1838  total_file_system_size_hr = (double)total_file_system_size
1839  / (double)file_system_unit_val;
1840  if (total_file_system_size_hr > 1024) {
1841  total_file_system_size_hr = total_file_system_size_hr / 1024;
1842  strcpy(file_system_unit_str, "TiB");
1843  }
1844 
1845  /* inodes */
1846  total_inodes = stat_buf.f_files;
1847  free_inodes = stat_buf.f_ffree;
1848 
1849  used_inode_percentage = (1 - ((double)free_inodes/(double)total_inodes))
1850  * 100;
1851 
1852  if (realpath(file_system, real_path) == NULL) {
1853  WARN("unable to use realpath() on file system");
1854  }
1855 
1856 
1857  /* show results */
1858  VERBOSE(0,-1,"Path: %s", real_path);
1859  VERBOSE(0,-1,"FS: %.1f %s Used FS: %2.1f%% Inodes: %.1f %s Used Inodes: %2.1f%%\n",
1860  total_file_system_size_hr, file_system_unit_str, used_file_system_percentage,
1861  (double)total_inodes / (double)inode_unit_val, inode_unit_str, used_inode_percentage);
1862 
1863  return;
1864 }
1865 
1867  int currDepth, char* path, int dirNum, rank_progress_t * progress) {
1868 
1869  unsigned i;
1870  char dir[MAX_PATHLEN];
1871 
1872 
1873  VERBOSE(1,5,"Entering create_remove_directory_tree on %s, currDepth = %d...", path, currDepth );
1874 
1875  if (currDepth == 0) {
1876  sprintf(dir, "%s/%s.%d/", path, o.base_tree_name, dirNum);
1877 
1878  if (create) {
1879  VERBOSE(2,5,"Making directory '%s'", dir);
1880  if (-1 == o.backend->mkdir (dir, DIRMODE, o.backend_options)) {
1881  EWARNF("unable to create tree directory '%s'", dir);
1882  }
1883 #ifdef HAVE_LUSTRE_LUSTREAPI
1884  /* internal node for branching, can be non-striped for children */
1885  if (o.global_dir_layout && \
1886  llapi_dir_set_default_lmv_stripe(dir, -1, 0,
1887  LMV_HASH_TYPE_FNV_1A_64,
1888  NULL) == -1) {
1889  FAIL("Unable to reset to global default directory layout");
1890  }
1891 #endif /* HAVE_LUSTRE_LUSTREAPI */
1892  }
1893 
1894  create_remove_directory_tree(create, ++currDepth, dir, ++dirNum, progress);
1895 
1896  if (!create) {
1897  VERBOSE(2,5,"Remove directory '%s'", dir);
1898  if (-1 == o.backend->rmdir(dir, o.backend_options)) {
1899  EWARNF("Unable to remove directory %s", dir);
1900  }
1901  }
1902  } else if (currDepth <= o.depth) {
1903 
1904  char temp_path[MAX_PATHLEN];
1905  strcpy(temp_path, path);
1906  int currDir = dirNum;
1907 
1908  for (i=0; i < o.branch_factor; i++) {
1909  sprintf(dir, "%s.%d/", o.base_tree_name, currDir);
1910  strcat(temp_path, dir);
1911 
1912  if (create) {
1913  VERBOSE(2,5,"Making directory '%s'", temp_path);
1914  if (-1 == o.backend->mkdir(temp_path, DIRMODE, o.backend_options)) {
1915  EWARNF("Unable to create directory %s", temp_path);
1916  }
1917  }
1918 
1919  create_remove_directory_tree(create, ++currDepth,
1920  temp_path, (o.branch_factor*currDir)+1, progress);
1921  currDepth--;
1922 
1923  if (!create) {
1924  VERBOSE(2,5,"Remove directory '%s'", temp_path);
1925  if (-1 == o.backend->rmdir(temp_path, o.backend_options)) {
1926  EWARNF("Unable to remove directory %s", temp_path);
1927  }
1928  }
1929 
1930  strcpy(temp_path, path);
1931  currDir++;
1932  }
1933  }
1934 }
1935 
1936 static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t * summary_table){
1937  rank_progress_t progress_o;
1938  memset(& progress_o, 0 , sizeof(progress_o));
1939  progress_o.stone_wall_timer_seconds = 0;
1940  progress_o.items_per_dir = o.items_per_dir;
1941  rank_progress_t * progress = & progress_o;
1942 
1943  /* start and end times of directory tree create/remove */
1944  double startCreate, endCreate;
1945  int k;
1946 
1947  VERBOSE(1,-1,"main: * iteration %d *", j+1);
1948 
1949  if(o.create_only){
1950  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1951  if (rank >= o.path_count) {
1952  continue;
1953  }
1954  prep_testdir(j, dir_iter);
1955 
1956  VERBOSE(2,5,"main (for j loop): making o.testdir, '%s'", o.testdir );
1957  if (o.backend->access(o.testdir, F_OK, o.backend_options) != 0) {
1958  if (o.backend->mkdir(o.testdir, DIRMODE, o.backend_options) != 0) {
1959  EWARNF("Unable to create test directory %s", o.testdir);
1960  }
1961 #ifdef HAVE_LUSTRE_LUSTREAPI
1962  /* internal node for branching, can be non-striped for children */
1963  if (o.global_dir_layout && o.unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) {
1964  EWARNF("Unable to reset to global default directory layout");
1965  }
1966 #endif /* HAVE_LUSTRE_LUSTREAPI */
1967  }
1968  }
1969 
1970  /* create hierarchical directory structure */
1971  MPI_Barrier(testComm);
1972 
1973  startCreate = GetTimeStamp();
1974  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1975  prep_testdir(j, dir_iter);
1976 
1977  if (o.unique_dir_per_task) {
1978  if (o.collective_creates && (rank == 0)) {
1979  /*
1980  * This is inside two loops, one of which already uses "i" and the other uses "j".
1981  * I don't know how this ever worked. I'm changing this loop to use "k".
1982  */
1983  for (k=0; k < o.size; k++) {
1984  sprintf(o.base_tree_name, "mdtest_tree.%d", k);
1985 
1986  VERBOSE(3,5,"main (create hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", o.testdir );
1987  /*
1988  * Let's pass in the path to the directory we most recently made so that we can use
1989  * full paths in the other calls.
1990  */
1991  create_remove_directory_tree(1, 0, o.testdir, 0, progress);
1992  if(CHECK_STONE_WALL(progress)){
1993  o.size = k;
1994  break;
1995  }
1996  }
1997  } else if (! o.collective_creates) {
1998  VERBOSE(3,5,"main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '%s'", o.testdir );
1999  /*
2000  * Let's pass in the path to the directory we most recently made so that we can use
2001  * full paths in the other calls.
2002  */
2003  create_remove_directory_tree(1, 0, o.testdir, 0, progress);
2004  }
2005  } else {
2006  if (rank == 0) {
2007  VERBOSE(3,5,"main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '%s'", o.testdir );
2008 
2009  /*
2010  * Let's pass in the path to the directory we most recently made so that we can use
2011  * full paths in the other calls.
2012  */
2013  create_remove_directory_tree(1, 0 , o.testdir, 0, progress);
2014  }
2015  }
2016  }
2017  MPI_Barrier(testComm);
2018  endCreate = GetTimeStamp();
2019  summary_table->rate[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate);
2020  summary_table->time[MDTEST_TREE_CREATE_NUM] = (endCreate - startCreate);
2021  summary_table->items[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree;
2023  VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_CREATE_NUM]);
2024  }
2025 
2026  sprintf(o.unique_mk_dir, "%s.0", o.base_tree_name);
2027  sprintf(o.unique_chdir_dir, "%s.0", o.base_tree_name);
2028  sprintf(o.unique_stat_dir, "%s.0", o.base_tree_name);
2029  sprintf(o.unique_read_dir, "%s.0", o.base_tree_name);
2030  sprintf(o.unique_rm_dir, "%s.0", o.base_tree_name);
2031  o.unique_rm_uni_dir[0] = 0;
2032 
2033  if (! o.unique_dir_per_task) {
2034  VERBOSE(3,-1,"V-3: main: Using unique_mk_dir, '%s'", o.unique_mk_dir );
2035  }
2036 
2037  if (rank < i) {
2038  if (! o.shared_file) {
2039  sprintf(o.mk_name, "mdtest.%d.", (rank+(0*o.nstride))%i);
2040  sprintf(o.stat_name, "mdtest.%d.", (rank+(1*o.nstride))%i);
2041  sprintf(o.read_name, "mdtest.%d.", (rank+(2*o.nstride))%i);
2042  sprintf(o.rm_name, "mdtest.%d.", (rank+(3*o.nstride))%i);
2043  }
2044  if (o.unique_dir_per_task) {
2045  VERBOSE(3,5,"i %d nstride %d", i, o.nstride);
2046  sprintf(o.unique_mk_dir, "mdtest_tree.%d.0", (rank+(0*o.nstride))%i);
2047  sprintf(o.unique_chdir_dir, "mdtest_tree.%d.0", (rank+(1*o.nstride))%i);
2048  sprintf(o.unique_stat_dir, "mdtest_tree.%d.0", (rank+(2*o.nstride))%i);
2049  sprintf(o.unique_read_dir, "mdtest_tree.%d.0", (rank+(3*o.nstride))%i);
2050  sprintf(o.unique_rm_dir, "mdtest_tree.%d.0", (rank+(4*o.nstride))%i);
2051  o.unique_rm_uni_dir[0] = 0;
2052  VERBOSE(5,5,"mk_dir %s chdir %s stat_dir %s read_dir %s rm_dir %s\n", o.unique_mk_dir, o.unique_chdir_dir, o.unique_stat_dir, o.unique_read_dir, o.unique_rm_dir);
2053  }
2054 
2055  VERBOSE(3,-1,"V-3: main: Copied unique_mk_dir, '%s', to topdir", o.unique_mk_dir );
2056 
2057  if (o.dirs_only && ! o.shared_file) {
2058  if (o.pre_delay) {
2059  DelaySecs(o.pre_delay);
2060  }
2061  directory_test(j, i, o.unique_mk_dir, progress);
2062  }
2063  if (o.files_only) {
2064  if (o.pre_delay) {
2065  DelaySecs(o.pre_delay);
2066  }
2067  VERBOSE(3,5,"will file_test on %s", o.unique_mk_dir);
2068 
2069  file_test(j, i, o.unique_mk_dir, progress);
2070  }
2071  }
2072 
2073  /* remove directory structure */
2074  if (! o.unique_dir_per_task) {
2075  VERBOSE(3,-1,"main: Using o.testdir, '%s'", o.testdir );
2076  }
2077 
2078  MPI_Barrier(testComm);
2079  if (o.remove_only) {
2080  progress->items_start = 0;
2081  startCreate = GetTimeStamp();
2082  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
2083  prep_testdir(j, dir_iter);
2084  if (o.unique_dir_per_task) {
2085  if (o.collective_creates && (rank == 0)) {
2086  /*
2087  * This is inside two loops, one of which already uses "i" and the other uses "j".
2088  * I don't know how this ever worked. I'm changing this loop to use "k".
2089  */
2090  for (k=0; k < o.size; k++) {
2091  sprintf(o.base_tree_name, "mdtest_tree.%d", k);
2092 
2093  VERBOSE(3,-1,"main (remove hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", o.testdir );
2094 
2095  /*
2096  * Let's pass in the path to the directory we most recently made so that we can use
2097  * full paths in the other calls.
2098  */
2099  create_remove_directory_tree(0, 0, o.testdir, 0, progress);
2100  if(CHECK_STONE_WALL(progress)){
2101  o.size = k;
2102  break;
2103  }
2104  }
2105  } else if (! o.collective_creates) {
2106  VERBOSE(3,-1,"main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '%s'", o.testdir );
2107 
2108  /*
2109  * Let's pass in the path to the directory we most recently made so that we can use
2110  * full paths in the other calls.
2111  */
2112  create_remove_directory_tree(0, 0, o.testdir, 0, progress);
2113  }
2114  } else {
2115  if (rank == 0) {
2116  VERBOSE(3,-1,"V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '%s'", o.testdir );
2117 
2118  /*
2119  * Let's pass in the path to the directory we most recently made so that we can use
2120  * full paths in the other calls.
2121  */
2122  create_remove_directory_tree(0, 0 , o.testdir, 0, progress);
2123  }
2124  }
2125  }
2126 
2127  MPI_Barrier(testComm);
2128  endCreate = GetTimeStamp();
2129  summary_table->rate[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate);
2130  summary_table->time[MDTEST_TREE_REMOVE_NUM] = endCreate - startCreate;
2131  summary_table->items[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree;
2133  VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_REMOVE_NUM]);
2134  VERBOSE(2,-1,"main (at end of for j loop): Removing o.testdir of '%s'\n", o.testdir );
2135 
2136  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
2137  prep_testdir(j, dir_iter);
2138  if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) == 0) {
2139  //if (( rank == 0 ) && access(o.testdir, F_OK) == 0) {
2140  if (o.backend->rmdir(o.testdir, o.backend_options) == -1) {
2141  EWARNF("unable to remove directory %s", o.testdir);
2142  }
2143  }
2144  }
2145  } else {
2146  summary_table->rate[MDTEST_TREE_REMOVE_NUM] = 0;
2147  }
2148 }
2149 
2151  o = (mdtest_options_t) {
2152  .barriers = 1,
2153  .branch_factor = 1,
2154  .random_buffer_offset = -1
2155  };
2156 }
2157 
2158 mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) {
2159  testComm = world_com;
2160  out_logfile = world_out;
2161  out_resultfile = world_out;
2162 
2163  init_clock(world_com);
2164 
2165  mdtest_init_args();
2166  int i, j;
2167  int numNodes;
2168  int numTasksOnNode0 = 0;
2169  MPI_Group worldgroup, testgroup;
2170  struct {
2171  int first;
2172  int last;
2173  int stride;
2174  } range = {0, 0, 1};
2175  int first = 1;
2176  int last = 0;
2177  int stride = 1;
2178  int iterations = 1;
2179  int created_root_dir = 0; // was the root directory existing or newly created
2180 
2181  verbose = 0;
2182  int no_barriers = 0;
2183  char * path = "./out";
2184  int randomize = 0;
2185  char APIs[1024];
2186  char APIs_legacy[1024];
2187  aiori_supported_apis(APIs, APIs_legacy, MDTEST);
2188  char apiStr[1024];
2189  sprintf(apiStr, "API for I/O [%s]", APIs);
2190  memset(& o.hints, 0, sizeof(o.hints));
2191 
2192  option_help options [] = {
2193  {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & o.api},
2194  {'b', NULL, "branching factor of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.branch_factor},
2195  {'d', NULL, "the directory in which the tests will run", OPTION_OPTIONAL_ARGUMENT, 's', & path},
2196  {'B', NULL, "no barriers between phases", OPTION_OPTIONAL_ARGUMENT, 'd', & no_barriers},
2197  {'C', NULL, "only create files/dirs", OPTION_FLAG, 'd', & o.create_only},
2198  {'T', NULL, "only stat files/dirs", OPTION_FLAG, 'd', & o.stat_only},
2199  {'E', NULL, "only read files/dir", OPTION_FLAG, 'd', & o.read_only},
2200  {'r', NULL, "only remove files or directories left behind by previous runs", OPTION_FLAG, 'd', & o.remove_only},
2201  {'D', NULL, "perform test on directories only (no files)", OPTION_FLAG, 'd', & o.dirs_only},
2202  {'e', NULL, "bytes to read from each file", OPTION_OPTIONAL_ARGUMENT, 'l', & o.read_bytes},
2203  {'f', NULL, "first number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & first},
2204  {'F', NULL, "perform test on files only (no directories)", OPTION_FLAG, 'd', & o.files_only},
2205 #ifdef HAVE_LUSTRE_LUSTREAPI
2206  {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & o.global_dir_layout},
2207 #endif /* HAVE_LUSTRE_LUSTREAPI */
2208  {'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset},
2209  {'i', NULL, "number of iterations the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & iterations},
2210  {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items_per_dir},
2211  {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & o.make_node},
2212  {'l', NULL, "last number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & last},
2213  {'L', NULL, "files only at leaf level of tree", OPTION_FLAG, 'd', & o.leaf_only},
2214  {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items},
2215  {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.nstride},
2216  {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.pre_delay},
2217  {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & o.print_rate_and_time},
2218  {0, "print-all-procs", "all processes print an excerpt of their results", OPTION_FLAG, 'd', & o.print_all_proc},
2219  {'R', NULL, "random access to files (only for stat)", OPTION_FLAG, 'd', & randomize},
2220  {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_seed},
2221  {'s', NULL, "stride between the number of tasks for each test", OPTION_OPTIONAL_ARGUMENT, 'd', & stride},
2222  {'S', NULL, "shared file access (file only, no directories)", OPTION_FLAG, 'd', & o.shared_file},
2223  {'c', NULL, "collective creates: task 0 does all creates", OPTION_FLAG, 'd', & o.collective_creates},
2224  {'t', NULL, "time unique working directory overhead", OPTION_FLAG, 'd', & o.time_unique_dir_overhead},
2225  {'u', NULL, "unique working directory for each task", OPTION_FLAG, 'd', & o.unique_dir_per_task},
2226  {'v', NULL, "verbosity (each instance of option increments by one)", OPTION_FLAG, 'd', & verbose},
2227  {'V', NULL, "verbosity value", OPTION_OPTIONAL_ARGUMENT, 'd', & verbose},
2228  {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & o.write_bytes},
2229  {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase and files)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stone_wall_timer_seconds},
2230  {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & o.stoneWallingStatusFile},
2231  {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & o.verify_read},
2232  {0, "verify-write", "Verify the data after a write by reading it back immediately", OPTION_FLAG, 'd', & o.verify_write},
2233  {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & o.sync_file},
2234  {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync},
2235  {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth},
2236  {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time},
2237  {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags},
2238  {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors},
2239  {0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV},
2240  LAST_OPTION
2241  };
2243  option_parse(argc, argv, global_options);
2244  o.backend = aiori_select(o.api);
2245  if (o.backend == NULL)
2246  ERR("Unrecognized I/O API");
2247  if (! o.backend->enable_mdtest)
2248  ERR("Backend doesn't support MDTest");
2249  o.backend_options = airoi_update_module_options(o.backend, global_options);
2250 
2251  free(global_options->modules);
2252  free(global_options);
2253 
2254  MPI_Comm_rank(testComm, &rank);
2255  MPI_Comm_size(testComm, &o.size);
2256 
2257  if(o.backend->xfer_hints){
2258  o.backend->xfer_hints(& o.hints);
2259  }
2260  if(o.backend->check_params){
2262  }
2263  if (o.backend->initialize){
2265  }
2266 
2267  o.pid = getpid();
2268  o.uid = getuid();
2269 
2270  numNodes = GetNumNodes(testComm);
2271  numTasksOnNode0 = GetNumTasksOnNode0(testComm);
2272 
2273  char cmd_buffer[4096];
2274  strncpy(cmd_buffer, argv[0], 4096);
2275  for (i = 1; i < argc; i++) {
2276  snprintf(&cmd_buffer[strlen(cmd_buffer)], 4096-strlen(cmd_buffer), " '%s'", argv[i]);
2277  }
2278 
2279  VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp());
2280  VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, o.size, numNodes);
2281  VERBOSE(0,-1,"Command line used: %s", cmd_buffer);
2282 
2283  /* adjust special variables */
2284  o.barriers = ! no_barriers;
2285  if (path != NULL){
2286  parse_dirpath(path);
2287  }
2288  if( randomize > 0 ){
2289  if (o.random_seed == 0) {
2290  /* Ensure all procs have the same random number */
2291  o.random_seed = time(NULL);
2292  MPI_Barrier(testComm);
2293  MPI_Bcast(& o.random_seed, 1, MPI_INT, 0, testComm);
2294  }
2295  o.random_seed += rank;
2296  }
2297  if( o.random_buffer_offset == -1 ){
2298  o.random_buffer_offset = time(NULL);
2299  MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, testComm);
2300  }
2301  if ((o.items > 0) && (o.items_per_dir > 0) && (! o.unique_dir_per_task)) {
2303  }else{
2304  o.directory_loops = 1;
2305  }
2307 
2308  // option_print_current(options);
2309  VERBOSE(1,-1, "api : %s", o.api);
2310  VERBOSE(1,-1, "barriers : %s", ( o.barriers ? "True" : "False" ));
2311  VERBOSE(1,-1, "collective_creates : %s", ( o.collective_creates ? "True" : "False" ));
2312  VERBOSE(1,-1, "create_only : %s", ( o.create_only ? "True" : "False" ));
2313  VERBOSE(1,-1, "dirpath(s):" );
2314  for ( i = 0; i < o.path_count; i++ ) {
2315  VERBOSE(1,-1, "\t%s", o.filenames[i] );
2316  }
2317  VERBOSE(1,-1, "dirs_only : %s", ( o.dirs_only ? "True" : "False" ));
2318  VERBOSE(1,-1, "read_bytes : "LLU"", o.read_bytes );
2319  VERBOSE(1,-1, "read_only : %s", ( o.read_only ? "True" : "False" ));
2320  VERBOSE(1,-1, "first : %d", first );
2321  VERBOSE(1,-1, "files_only : %s", ( o.files_only ? "True" : "False" ));
2322 #ifdef HAVE_LUSTRE_LUSTREAPI
2323  VERBOSE(1,-1, "global_dir_layout : %s", ( o.global_dir_layout ? "True" : "False" ));
2324 #endif /* HAVE_LUSTRE_LUSTREAPI */
2325  VERBOSE(1,-1, "iterations : %d", iterations );
2326  VERBOSE(1,-1, "items_per_dir : "LLU"", o.items_per_dir );
2327  VERBOSE(1,-1, "last : %d", last );
2328  VERBOSE(1,-1, "leaf_only : %s", ( o.leaf_only ? "True" : "False" ));
2329  VERBOSE(1,-1, "items : "LLU"", o.items );
2330  VERBOSE(1,-1, "nstride : %d", o.nstride );
2331  VERBOSE(1,-1, "pre_delay : %d", o.pre_delay );
2332  VERBOSE(1,-1, "remove_only : %s", ( o.leaf_only ? "True" : "False" ));
2333  VERBOSE(1,-1, "random_seed : %d", o.random_seed );
2334  VERBOSE(1,-1, "stride : %d", stride );
2335  VERBOSE(1,-1, "shared_file : %s", ( o.shared_file ? "True" : "False" ));
2336  VERBOSE(1,-1, "time_unique_dir_overhead: %s", ( o.time_unique_dir_overhead ? "True" : "False" ));
2337  VERBOSE(1,-1, "stone_wall_timer_seconds: %d", o.stone_wall_timer_seconds);
2338  VERBOSE(1,-1, "stat_only : %s", ( o.stat_only ? "True" : "False" ));
2339  VERBOSE(1,-1, "unique_dir_per_task : %s", ( o.unique_dir_per_task ? "True" : "False" ));
2340  VERBOSE(1,-1, "write_bytes : "LLU"", o.write_bytes );
2341  VERBOSE(1,-1, "sync_file : %s", ( o.sync_file ? "True" : "False" ));
2342  VERBOSE(1,-1, "call_sync : %s", ( o.call_sync ? "True" : "False" ));
2343  VERBOSE(1,-1, "depth : %d", o.depth );
2344  VERBOSE(1,-1, "make_node : %d", o.make_node );
2345 
2346  /* setup total number of items and number of items per dir */
2347  if (o.depth <= 0) {
2348  o.num_dirs_in_tree = 1;
2349  } else {
2350  if (o.branch_factor < 1) {
2351  o.num_dirs_in_tree = 1;
2352  } else if (o.branch_factor == 1) {
2353  o.num_dirs_in_tree = o.depth + 1;
2354  } else {
2355  o.num_dirs_in_tree = (pow(o.branch_factor, o.depth+1) - 1) / (o.branch_factor - 1);
2356  }
2357  }
2358  if (o.items_per_dir > 0) {
2359  if(o.items == 0){
2360  if (o.leaf_only) {
2361  o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth);
2362  } else {
2364  }
2365  }else{
2367  }
2368  } else {
2369  if (o.leaf_only) {
2370  if (o.branch_factor <= 1) {
2371  o.items_per_dir = o.items;
2372  } else {
2373  o.items_per_dir = (uint64_t) (o.items / pow(o.branch_factor, o.depth));
2374  o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth);
2375  }
2376  } else {
2379  }
2380  }
2381 
2382  /* initialize rand_array */
2383  if (o.random_seed > 0) {
2384  srand(o.random_seed);
2385 
2386  uint64_t s;
2387 
2388  o.rand_array = (uint64_t *) safeMalloc( o.items * sizeof(*o.rand_array));
2389 
2390  for (s=0; s < o.items; s++) {
2391  o.rand_array[s] = s;
2392  }
2393 
2394  /* shuffle list randomly */
2395  uint64_t n = o.items;
2396  while (n>1) {
2397  n--;
2398 
2399  /*
2400  * Generate a random number in the range 0 .. n
2401  *
2402  * rand() returns a number from 0 .. RAND_MAX. Divide that
2403  * by RAND_MAX and you get a floating point number in the
2404  * range 0 .. 1. Multiply that by n and you get a number in
2405  * the range 0 .. n.
2406  */
2407  uint64_t k = ( uint64_t ) ((( double )rand() / ( double )RAND_MAX ) * ( double )n );
2408 
2409  /*
2410  * Now move the nth element to the kth (randomly chosen)
2411  * element, and the kth element to the nth element.
2412  */
2413 
2414  uint64_t tmp = o.rand_array[k];
2415  o.rand_array[k] = o.rand_array[n];
2416  o.rand_array[n] = tmp;
2417  }
2418  }
2419 
2420  /* allocate and initialize write buffer with # */
2421  if (o.write_bytes > 0) {
2424  }
2425 
2426  /* setup directory path to work in */
2427  if (o.path_count == 0) { /* special case where no directory path provided with '-d' option */
2428  char *ret = getcwd(o.testdirpath, MAX_PATHLEN);
2429  if (ret == NULL) {
2430  FAIL("Unable to get current working directory on %s", o.testdirpath);
2431  }
2432  o.path_count = 1;
2433  } else {
2434  strcpy(o.testdirpath, o.filenames[rank % o.path_count]);
2435  }
2436 
2437  /* if directory does not exist, create it */
2438  if ((rank < o.path_count) && o.backend->access(o.testdirpath, F_OK, o.backend_options) != 0) {
2439  if (o.backend->mkdir(o.testdirpath, DIRMODE, o.backend_options) != 0) {
2440  EWARNF("Unable to create test directory path %s", o.testdirpath);
2441  }
2442  created_root_dir = 1;
2443  }
2444 
2445  /* display disk usage */
2446  VERBOSE(3,-1,"main (before display_freespace): o.testdirpath is '%s'", o.testdirpath );
2447 
2449  int tasksBlockMapping = QueryNodeMapping(testComm, true);
2450 
2451  /* set the shift to mimic IOR and shift by procs per node */
2452  if (o.nstride > 0) {
2453  if ( numNodes > 1 && tasksBlockMapping ) {
2454  /* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks
2455  however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */
2456  o.nstride *= numTasksOnNode0;
2457  }
2458  VERBOSE(0,5,"Shifting ranks by %d for each phase.", o.nstride);
2459  }
2460 
2461  VERBOSE(3,-1,"main (after display_freespace): o.testdirpath is '%s'", o.testdirpath );
2462 
2463  if (rank == 0) {
2464  if (o.random_seed > 0) {
2465  VERBOSE(0,-1,"random seed: %d", o.random_seed);
2466  }
2467  }
2468 
2469  if (gethostname(o.hostname, MAX_PATHLEN) == -1) {
2470  perror("gethostname");
2471  MPI_Abort(testComm, 2);
2472  }
2473 
2474  if (last == 0) {
2475  first = o.size;
2476  last = o.size;
2477  }
2478 
2479  /* setup summary table for recording results */
2480  o.summary_table = (mdtest_results_t *) safeMalloc(iterations * sizeof(mdtest_results_t));
2481  memset(o.summary_table, 0, iterations * sizeof(mdtest_results_t));
2482 
2483  if (o.unique_dir_per_task) {
2484  sprintf(o.base_tree_name, "mdtest_tree.%d", rank);
2485  } else {
2486  sprintf(o.base_tree_name, "mdtest_tree");
2487  }
2488 
2489  mdtest_results_t * aggregated_results = safeMalloc(iterations * sizeof(mdtest_results_t));
2490 
2491  /* default use shared directory */
2492  strcpy(o.mk_name, "mdtest.shared.");
2493  strcpy(o.stat_name, "mdtest.shared.");
2494  strcpy(o.read_name, "mdtest.shared.");
2495  strcpy(o.rm_name, "mdtest.shared.");
2496 
2497  MPI_Comm_group(testComm, &worldgroup);
2498 
2499  /* Run the tests */
2500  for (i = first; i <= last && i <= o.size; i += stride) {
2501  range.last = i - 1;
2502  MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup);
2503  MPI_Comm_create(testComm, testgroup, &testComm);
2504  if (rank == 0) {
2505  uint64_t items_all = i * o.items;
2506  if(o.num_dirs_in_tree_calc){
2507  items_all *= o.num_dirs_in_tree_calc;
2508  }
2509  if (o.files_only && o.dirs_only) {
2510  VERBOSE(0,-1,"%d tasks, "LLU" files/directories", i, items_all);
2511  } else if (o.files_only) {
2512  if (! o.shared_file) {
2513  VERBOSE(0,-1,"%d tasks, "LLU" files", i, items_all);
2514  }
2515  else {
2516  VERBOSE(0,-1,"%d tasks, 1 file", i);
2517  }
2518  } else if (o.dirs_only) {
2519  VERBOSE(0,-1,"%d tasks, "LLU" directories", i, items_all);
2520  }
2521  }
2522  VERBOSE(1,-1,"");
2523  VERBOSE(1,-1," Operation Duration Rate");
2524  VERBOSE(1,-1," --------- -------- ----");
2525 
2526  for (j = 0; j < iterations; j++) {
2527  // keep track of the current status for stonewalling
2528  mdtest_iteration(i, j, testgroup, & o.summary_table[j]);
2529  }
2530  summarize_results(iterations, aggregated_results);
2531  if(o.saveRankDetailsCSV){
2532  StoreRankInformation(iterations, aggregated_results);
2533  }
2534  if (i == 1 && stride > 1) {
2535  i = 0;
2536  }
2537 
2538  int total_errors = 0;
2539  MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm);
2540  if(rank == 0 && total_errors){
2541  VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors);
2542  }
2543 
2544  MPI_Comm_free(&testComm);
2545  MPI_Group_free(&testgroup);
2546  }
2547 
2548  MPI_Group_free(&worldgroup);
2549  testComm = world_com;
2550 
2551  if (created_root_dir && o.remove_only && o.backend->rmdir(o.testdirpath, o.backend_options) != 0) {
2552  FAIL("Unable to remove test directory path %s", o.testdirpath);
2553  }
2554 
2555  VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp());
2556 
2557  if (o.random_seed > 0) {
2558  free(o.rand_array);
2559  }
2560 
2561  if (o.backend->finalize){
2563  }
2564 
2565  if (o.write_bytes > 0) {
2567  }
2568  free(o.summary_table);
2569 
2570  return aggregated_results;
2571 }
char * stoneWallingStatusFile
Definition: mdtest.c:112
option_module * modules
Definition: option.h:36
Definition: aiori.h:120
double time_before_barrier[MDTEST_TREE_CREATE_NUM]
Definition: mdtest.h:28
uint64_t stonewall_item_sum[MDTEST_LAST_NUM]
Definition: mdtest.h:35
double rate[MDTEST_LAST_NUM]
Definition: mdtest.h:25
char read_name[MAX_PATHLEN]
Definition: mdtest.c:103
#define VERBOSE(root, any,...)
Definition: mdtest.c:206
uint64_t f_blocks
Definition: aiori.h:53
char stat_name[MAX_PATHLEN]
Definition: mdtest.c:102
int directory_loops
Definition: mdtest.c:144
mdtest_test_num_t
Definition: mdtest.h:8
uint64_t items
Definition: mdtest.c:141
uint64_t f_bfree
Definition: aiori.h:54
int print_all_proc
Definition: mdtest.c:147
char unique_rm_dir[MAX_PATHLEN]
Definition: mdtest.c:109
#define LAST_OPTION
Definition: option.h:39
uint64_t num_dirs_in_tree_calc
Definition: mdtest.c:143
char rm_name[MAX_PATHLEN]
Definition: mdtest.c:104
char unique_rm_uni_dir[MAX_PATHLEN]
Definition: mdtest.c:110
int(* mknod)(char *)
Definition: aiori.h:91
void * backend_options
Definition: mdtest.c:175
void * airoi_update_module_options(const ior_aiori_t *backend, options_all_t *opt)
Definition: aiori.c:93
void VerboseMessage(int root_level, int any_level, int line, char *format,...)
Definition: mdtest.c:207
FILE * out_logfile
Definition: utilities.c:72
int option_parse(int argc, char **argv, options_all_t *opt_all)
Definition: option.c:414
static void create_file(const char *path, uint64_t itemNum)
Definition: mdtest.c:348
void md_validate_tests()
Definition: mdtest.c:1691
void mdtest_init_args()
Definition: mdtest.c:2150
int time_unique_dir_overhead
Definition: mdtest.c:154
int stone_wall_timer_seconds
Definition: mdtest.c:157
aiori_xfer_hint_t hints
Definition: mdtest.c:176
char mk_name[MAX_PATHLEN]
Definition: mdtest.c:101
int64_t ReadStoneWallingIterations(char *const filename, MPI_Comm com)
Definition: utilities.c:791
mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *world_out)
Definition: mdtest.c:2158
static void updateResult(mdtest_results_t *res, mdtest_test_num_t test, uint64_t item_count, int t, double *times, double *tBefore)
Definition: mdtest.c:899
uint64_t stonewall_item_min[MDTEST_LAST_NUM]
Definition: mdtest.h:34
void parse_dirpath(char *dirpath_arg)
Definition: mdtest.c:237
#define DIRMODE
Definition: mdtest.c:86
void(* delete)(char *, aiori_mod_opt_t *module_options)
Definition: aiori.h:100
int(* statfs)(const char *, ior_aiori_statfs_t *, aiori_mod_opt_t *module_options)
Definition: aiori.h:104
int unique_dir_per_task
Definition: mdtest.c:153
uint64_t num_dirs_in_tree
Definition: mdtest.c:135
int(* mkdir)(const char *path, mode_t mode, aiori_mod_opt_t *module_options)
Definition: aiori.h:105
uint64_t items_start
Definition: mdtest.c:189
#define min(a, b)
Definition: md-workbench.c:26
int QueryNodeMapping(MPI_Comm comm, int print_nodemap)
Definition: utilities.c:287
uint64_t items_per_dir
Definition: mdtest.c:142
char testdirpath[MAX_PATHLEN]
Definition: mdtest.c:97
uint64_t f_ffree
Definition: aiori.h:57
#define FAIL(...)
Definition: aiori-debug.h:12
void offset_timers(double *t, int tcount)
Definition: mdtest.c:225
uint64_t * rand_array
Definition: mdtest.c:95
unsigned branch_factor
Definition: mdtest.c:126
static mdtest_options_t o
Definition: mdtest.c:180
char * api
Definition: mdtest.c:177
int(* access)(const char *path, int mode, aiori_mod_opt_t *module_options)
Definition: aiori.h:107
static void prep_testdir(int j, int dir_iter)
Definition: mdtest.c:266
void create_remove_items(int currDepth, const int dirs, const int create, const int collective, const char *path, uint64_t dirNum, rank_progress_t *progress)
Definition: mdtest.c:483
static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t *summary_table)
Definition: mdtest.c:1936
char const * mdtest_test_name(int i)
Definition: mdtest.c:1355
void mdtest_read(int random, int dirs, const long dir_iter, char *path)
Definition: mdtest.c:635
int stone_wall_timer_seconds
Definition: mdtest.c:187
void show_file_system_size(char *file_system)
Definition: mdtest.c:1810
#define WRITE
Definition: iordef.h:86
char unique_stat_dir[MAX_PATHLEN]
Definition: mdtest.c:107
char hostname[MAX_PATHLEN]
Definition: mdtest.c:100
int verify_write
Definition: mdtest.c:121
double start_time
Definition: mdtest.c:185
void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank_progress_t *progress)
Definition: mdtest.c:828
int(* rmdir)(const char *path, aiori_mod_opt_t *module_options)
Definition: aiori.h:106
#define READ
Definition: iordef.h:88
void mdtest_stat(const int random, const int dirs, const long dir_iter, const char *path, rank_progress_t *progress)
Definition: mdtest.c:553
char * PrintTimestamp()
Definition: utilities.c:776
#define IOR_CREAT
Definition: aiori.h:32
const ior_aiori_t * aiori_select(const char *api)
Definition: aiori.c:237
char base_tree_name[MAX_PATHLEN]
Definition: mdtest.c:98
void collective_helper(const int dirs, const int create, const char *path, uint64_t itemNum, rank_progress_t *progress)
Definition: mdtest.c:446
void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t *progress)
Definition: mdtest.c:1184
double rate_before_barrier[MDTEST_LAST_NUM]
Definition: mdtest.h:26
double time[MDTEST_LAST_NUM]
Definition: mdtest.h:27
size_t read_bytes
Definition: mdtest.c:158
uint64_t f_files
Definition: aiori.h:56
MPI_Comm testComm
Definition: utilities.c:71
static option_help options[]
Definition: aiori-CEPHFS.c:54
int(* check_params)(aiori_mod_opt_t *)
Definition: aiori.h:113
uint64_t f_bsize
Definition: aiori.h:52
int verify_memory_pattern(int item, char *buffer, size_t bytes, int buff_offset, int pretendRank)
Definition: utilities.c:100
char * write_buffer
Definition: mdtest.c:111
uint64_t items[MDTEST_LAST_NUM]
Definition: mdtest.h:29
void init_clock(MPI_Comm com)
Definition: utilities.c:772
void(* initialize)(aiori_mod_opt_t *options)
Definition: aiori.h:109
void collective_create_remove(const int create, const int dirs, const int ntasks, const char *path, rank_progress_t *progress)
Definition: mdtest.c:750
#define WARN(MSG)
Definition: aiori-debug.h:32
static void summarize_results_rank0(int iterations, mdtest_results_t *all_results, int print_time)
Definition: mdtest.c:1432
char unique_mk_dir[MAX_PATHLEN]
Definition: mdtest.c:105
mdtest_results_t * summary_table
Definition: mdtest.c:169
int GetNumNodes(MPI_Comm comm)
Definition: utilities.c:331
static mdtest_results_t * get_result_index(mdtest_results_t *all_results, int proc, int iter, int interation_count)
Definition: mdtest.c:1428
int(* rename)(const char *oldpath, const char *newpath, aiori_mod_opt_t *module_options)
Definition: aiori.h:111
void(* xfer_hints)(aiori_xfer_hint_t *params)
Definition: aiori.h:96
void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t *progress)
Definition: mdtest.c:912
void create_remove_items_helper(const int dirs, const int create, const char *path, uint64_t itemNum, rank_progress_t *progress)
Definition: mdtest.c:420
void(* close)(aiori_fd_t *, aiori_mod_opt_t *module_options)
Definition: aiori.h:99
int(* stat)(const char *path, struct stat *buf, aiori_mod_opt_t *module_options)
Definition: aiori.h:108
FILE * out_resultfile
Definition: utilities.c:73
options_all_t * airoi_create_all_module_options(option_help *global_options)
Definition: aiori.c:107
int random_buffer_offset
Definition: mdtest.c:128
double GetTimeStamp(void)
Definition: utilities.c:731
void create_remove_directory_tree(int create, int currDepth, char *path, int dirNum, rank_progress_t *progress)
Definition: mdtest.c:1866
#define EWARNF(FORMAT,...)
Definition: aiori-debug.h:45
void aiori_supported_apis(char *APIs, char *APIs_legacy, enum bench_type type)
Definition: aiori.c:127
void generate_memory_pattern(char *buf, size_t bytes, int buff_offset, int rank)
Definition: utilities.c:86
uint64_t items_done
Definition: mdtest.c:190
aiori_fd_t *(* create)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:90
IOR_offset_t(* xfer)(int access, aiori_fd_t *, IOR_size_t *, IOR_offset_t size, IOR_offset_t offset, aiori_mod_opt_t *module_options)
Definition: aiori.h:97
static void remove_file(const char *path, uint64_t itemNum)
Definition: mdtest.c:332
int collective_creates
Definition: mdtest.c:155
void StoreStoneWallingIterations(char *const filename, int64_t count)
Definition: utilities.c:813
char testdir[MAX_PATHLEN]
Definition: mdtest.c:96
#define IOR_WRONLY
Definition: aiori.h:29
void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t *progress, double *t)
Definition: mdtest.c:1132
static void StoreRankInformation(int iterations, mdtest_results_t *agg)
Definition: mdtest.c:1376
#define RELEASE_VERS
Definition: mdtest.c:87
static options_all_t * global_options
Definition: parse_options.c:41
char unique_chdir_dir[MAX_PATHLEN]
Definition: mdtest.c:106
int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, uint64_t *out_max_iter)
Definition: mdtest.c:1106
static void create_remove_dirs(const char *path, bool create, uint64_t itemNum)
Definition: mdtest.c:309
long long int IOR_size_t
Definition: iordef.h:110
size_t write_bytes
Definition: mdtest.c:156
uint64_t stonewall_last_item[MDTEST_LAST_NUM]
Definition: mdtest.h:33
void(* finalize)(aiori_mod_opt_t *options)
Definition: aiori.h:110
int gpu_memory_flags
Definition: mdtest.c:113
char * saveRankDetailsCSV
Definition: mdtest.c:167
bool enable_mdtest
Definition: aiori.h:115
uint64_t items_per_dir
Definition: mdtest.c:192
char ** filenames
Definition: mdtest.c:99
int verbose
Definition: utilities.c:70
char unique_read_dir[MAX_PATHLEN]
Definition: mdtest.c:108
static void phase_end()
Definition: mdtest.c:275
#define MAX_PATHLEN
Definition: utilities.h:31
int print_rate_and_time
Definition: mdtest.c:146
void ShowFileSystemSize(char *filename, const struct ior_aiori *backend, void *backend_options)
Definition: utilities.c:625
aiori_fd_t *(* open)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:92
double stonewall_time[MDTEST_LAST_NUM]
Definition: mdtest.h:32
int verification_error
Definition: mdtest.c:122
#define ERR(MSG)
Definition: aiori-debug.h:92
void DelaySecs(int delay)
Definition: utilities.c:828
#define TEST_DIR
Definition: mdtest.c:88
void(* sync)(aiori_mod_opt_t *)
Definition: aiori.h:114
int fsyncPerWrite
Definition: aiori.h:70
int aiori_warning_as_errors
Definition: ior.c:85
void aligned_buffer_free(void *buf, ior_memory_flags gpu)
Definition: utilities.c:973
int filePerProc
Definition: aiori.h:65
void summarize_results(int iterations, mdtest_results_t *results)
Definition: mdtest.c:1618
void unique_dir_access(int opt, char *to)
Definition: mdtest.c:293
int rank
Definition: utilities.c:68
#define ITEM_COUNT
Definition: mdtest.c:89
int GetNumTasksOnNode0(MPI_Comm comm)
Definition: utilities.c:406
const ior_aiori_t * backend
Definition: mdtest.c:174
#define CHECK_STONE_WALL(p)
Definition: mdtest.c:195
void update_write_memory_pattern(uint64_t item, char *buf, size_t bytes, int buff_offset, int rank)
Definition: utilities.c:78
void * safeMalloc(uint64_t size)
Definition: utilities.c:125
#define LLU
Definition: mdtest.c:91
#define NULL
Definition: iordef.h:70
void * aligned_buffer_alloc(size_t size, ior_memory_flags type)
Definition: utilities.c:924