19 #ifdef HAVE_GETCPU_SYSCALL 22 # include <sys/syscall.h> 37 #include <sys/types.h> 41 #include <cuda_runtime.h> 47 # include <sys/statvfs.h> 48 # elif (defined __APPLE__) 49 # include <sys/param.h> 50 # include <sys/mount.h> 52 # include <sys/statfs.h> 54 # include <sys/time.h> 80 ((uint64_t*) buf)[0] = item;
87 uint64_t * buffi = (uint64_t*) buf;
89 const uint64_t ranki = ((uint64_t)(rank + 1) << 32) + buff_offset;
90 const size_t size = bytes / 8;
92 for(
size_t i=1; i < size; i++){
93 buffi[i] = (i + 1) + ranki;
95 for(
size_t i=(bytes/8)*8; i < bytes; i++){
103 if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (
char) item)){
107 uint64_t * buffi = (uint64_t*) buffer;
109 uint64_t rank_mod = ((uint64_t)(pretendRank + 1) << 32) + buff_offset;
111 for(
size_t i=1; i < bytes/8; i++){
112 uint64_t exp = (i + 1) + rank_mod;
117 for(
size_t i=(bytes/8)*8; i < bytes; i++){
118 if(buffer[i] != (
char) i){
126 void * d = malloc(size);
128 ERR(
"Could not malloc an array");
137 va_start(args, format);
138 vsnprintf(msg, 4096, format, args);
140 fprintf(
out_logfile,
"%s: Process %d: FAILED in %s, %s\n",
154 rc = sscanf(size_str,
" %d %% ", &percent);
157 if (percent > 100 || percent < 0)
158 ERR(
"percentage must be between 0 and 100");
161 page_size = sysconf(_SC_PAGESIZE);
163 page_size = getpagesize();
166 #ifdef _SC_PHYS_PAGES 167 num_pages = sysconf(_SC_PHYS_PAGES);
169 ERR(
"sysconf(_SC_PHYS_PAGES) is not supported");
171 ERR(
"sysconf(_SC_PHYS_PAGES) is not supported");
173 mem = page_size * num_pages;
175 return mem / 100 * percent;
197 "Unknown argument for -l %s; generic assumed\n", options->
buffer_type);
206 ERR(
"Unrecognized I/O API");
223 WARN(
"cannot use O_DIRECT");
224 # define O_DIRECT 000000 226 # define O_DIRECT O_DIRECTIO 243 static time_t currentTime;
244 char* currentTimePtr;
246 if ((currentTime = time(
NULL)) == -1)
247 ERR(
"cannot get current time");
249 #if (_POSIX_C_SOURCE >= 1 || _XOPEN_SOURCE || _BSD_SOURCE || _SVID_SOURCE || _POSIX_SOURCE) 250 static char threadSafeBuff[32];
251 if ((currentTimePtr = ctime_r(¤tTime, threadSafeBuff)) ==
NULL) {
252 ERR(
"cannot read current time");
255 if ((currentTimePtr = ctime(¤tTime)) ==
NULL) {
256 ERR(
"cannot read current time");
260 return (currentTimePtr);
274 for (i = 0; i < ((size /
sizeof(
IOR_size_t)) / 4); i++) {
275 for (j = 0; j < 4; j++) {
290 MPI_Comm_size(comm, &num_ranks);
291 int *node_map = (
int*)malloc(
sizeof(
int) * num_ranks);
296 FAIL(
"gethostname()");
303 MPI_Bcast(roothost,
MAX_PATHLEN, MPI_CHAR, 0, comm);
306 int same_as_root = strcmp(roothost,localhost) == 0;
307 MPI_Gather( &same_as_root, 1, MPI_INT, node_map, 1, MPI_INT, 0, comm);
308 if ( print_nodemap &&
rank==0) {
310 for (
int i = 0; i < num_ranks; i++ ) {
317 ret = node_map[1] == 1;
318 MPI_Bcast(&ret, 1, MPI_INT, 0, comm);
332 if (getenv(
"IOR_FAKE_NODES")){
333 int numNodes = atoi(getenv(
"IOR_FAKE_NODES"));
335 MPI_Comm_rank(comm, & rank);
337 printf(
"Fake number of node: using %d\n", numNodes);
342 MPI_Comm shared_comm;
344 int local_result = 0;
347 MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
348 "MPI_Comm_split_type() error");
349 MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank),
"MPI_Comm_rank() error");
350 local_result = shared_rank == 0? 1 : 0;
351 MPI_CHECK(MPI_Allreduce(&local_result, &numNodes, 1, MPI_INT, MPI_SUM, comm),
352 "MPI_Allreduce() error");
353 MPI_CHECK(MPI_Comm_free(&shared_comm),
"MPI_Comm_free() error");
358 int numTasksOnNode0 = 0;
363 return ((numTasks - 1) / numTasksOnNode0) + 1;
371 MPI_CHECK(MPI_Comm_size(comm, &numTasks),
"cannot get number of tasks");
407 if (getenv(
"IOR_FAKE_TASK_PER_NODES")){
408 int tasksPerNode = atoi(getenv(
"IOR_FAKE_TASK_PER_NODES"));
410 MPI_Comm_rank(comm, & rank);
412 printf(
"Fake tasks per node: using %d\n", tasksPerNode);
417 MPI_Comm shared_comm;
419 int tasks_on_node_rank0 = 0;
420 int local_result = 0;
422 MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
423 "MPI_Comm_split_type() error");
424 MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank),
"MPI_Comm_rank() error");
425 if (
rank == 0 && shared_rank == 0) {
426 MPI_CHECK(MPI_Comm_size(shared_comm, &local_result),
"MPI_Comm_size() error");
428 MPI_CHECK(MPI_Allreduce(&local_result, &tasks_on_node_rank0, 1, MPI_INT, MPI_SUM, comm),
429 "MPI_Allreduce() error");
430 MPI_CHECK(MPI_Comm_free(&shared_comm),
"MPI_Comm_free() error");
432 return tasks_on_node_rank0;
444 MPI_Comm_size(comm, & size);
453 fprintf(
out_logfile,
"V-1: Entering count_tasks_per_node...\n" );
458 FAIL(
"gethostname()");
462 for (i = 0; i < size-1; i++) {
463 MPI_Recv(hostname,
MAX_PATHLEN, MPI_CHAR, MPI_ANY_SOURCE,
464 MPI_ANY_TAG, comm, &status);
465 if (strcmp(hostname, localhost) == 0) {
471 MPI_Send(localhost,
MAX_PATHLEN, MPI_CHAR, 0, 0, comm);
473 MPI_Bcast(&count, 1, MPI_INT, 0, comm);
483 void ExtractHint(
char *settingVal,
char *valueVal,
char *hintString)
485 char *settingPtr, *valuePtr, *tmpPtr2;
488 settingPtr = (
char *)strtok(hintString,
" =");
489 valuePtr = (
char *)strtok(
NULL,
" =\t\r\n");
491 tmpPtr2 = (
char *) strstr(settingPtr,
"IOR_HINT__MPI__");
492 if (settingPtr == tmpPtr2) {
493 settingPtr += strlen(
"IOR_HINT__MPI__");
495 tmpPtr2 = (
char *) strstr(hintString,
"IOR_HINT__GPFS__");
497 if (settingPtr == tmpPtr2) {
498 settingPtr += strlen(
"IOR_HINT__GPFS__");
500 fprintf(
out_logfile,
"WARNING: Unable to set unknown hint type (not implemented.)\n");
504 strcpy(settingVal, settingPtr);
505 strcpy(valueVal, valuePtr);
511 void SetHints(MPI_Info * mpiHints,
char *hintsFileName)
528 MPI_CHECK(MPI_Info_create(mpiHints),
"cannot create info object");
531 for (i = 0; environ[i] !=
NULL; i++) {
533 if (strncmp(environ[i],
"IOR_HINT", strlen(
"IOR_HINT")) == 0) {
534 strcpy(hintString, environ[i]);
536 MPI_CHECK(MPI_Info_set(*mpiHints, settingVal, valueVal),
537 "cannot set info object");
542 if (hintsFileName !=
NULL && strcmp(hintsFileName,
"") != 0) {
545 fd = fopen(hintsFileName,
"r");
547 WARN(
"cannot open hints file");
552 (hintString,
"IOR_HINT",
553 strlen(
"IOR_HINT")) == 0) {
557 (*mpiHints, settingVal,
559 "cannot set info object");
564 ERR(
"cannot close hints file");
574 char key[MPI_MAX_INFO_VAL];
575 char value[MPI_MAX_INFO_VAL];
578 MPI_CHECK(MPI_Info_get_nkeys(*mpiHints, &nkeys),
579 "cannot get info object keys");
581 for (i = 0; i < nkeys; i++) {
582 MPI_CHECK(MPI_Info_get_nthkey(*mpiHints, i, key),
583 "cannot get info object key");
584 MPI_CHECK(MPI_Info_get(*mpiHints, key, MPI_MAX_INFO_VAL - 1,
586 "cannot get info object value");
600 rc = sscanf(size_str,
"%lld%c", &size, &range);
602 switch ((
int)range) {
616 }
else if (rc == 0) {
629 WARN(
"Backend doesn't implement statfs");
632 int ret = backend->
statfs(filename, & stat, backend_options);
634 WARN(
"Backend returned error during statfs");
637 long long int totalFileSystemSize;
638 long long int freeFileSystemSize;
639 long long int totalInodes;
640 long long int freeInodes;
641 double totalFileSystemSizeHR;
642 double usedFileSystemPercentage;
643 double usedInodePercentage;
644 char *fileSystemUnitStr;
648 usedFileSystemPercentage = (1 - ((double)freeFileSystemSize / (
double)totalFileSystemSize)) * 100;
649 totalFileSystemSizeHR = (double)totalFileSystemSize / (
double)(1<<30);
654 usedInodePercentage = (1 - ((double)freeInodes / (
double)totalInodes)) * 100;
656 fileSystemUnitStr =
"GiB";
657 if (totalFileSystemSizeHR > 1024) {
658 totalFileSystemSizeHR = (double)totalFileSystemSize / (
double)((
long long)1<<40);
659 fileSystemUnitStr =
"TiB";
664 "FS", totalFileSystemSizeHR, fileSystemUnitStr,
665 usedFileSystemPercentage);
667 (
double)totalInodes / (
double)(1<<20),
668 usedInodePercentage);
672 fprintf(
out_resultfile,
"\"Capacity\": \"%.1f %s\", \"Used Capacity\": \"%2.1f%%\",",
673 totalFileSystemSizeHR, fileSystemUnitStr,
674 usedFileSystemPercentage);
675 fprintf(
out_resultfile,
"\"Inodes\": \"%.1f Mi\", \"Used Inodes\" : \"%2.1f%%\"\n",
676 (
double)totalInodes / (
double)(1<<20),
677 usedInodePercentage);
688 int Regex(
char *
string,
char *pattern)
695 regcomp(®Ex, pattern, REG_EXTENDED);
696 if (regexec(®Ex,
string, 1, ®Match, 0) == 0) {
709 int uname(
struct utsname *name)
711 DWORD nodeNameSize =
sizeof(name->nodename) - 1;
713 memset(name, 0,
sizeof(
struct utsname));
714 if (!GetComputerNameEx
715 (ComputerNameDnsFullyQualified, name->nodename, &nodeNameSize))
716 ERR(
"GetComputerNameEx failed");
718 strncpy(name->sysname,
"Windows",
sizeof(name->sysname) - 1);
720 strncpy(name->release,
"-",
sizeof(name->release) - 1);
721 strncpy(name->version,
"-",
sizeof(name->version) - 1);
722 strncpy(name->machine,
"-",
sizeof(name->machine) - 1);
734 struct timeval timer;
736 if (gettimeofday(&timer, (
struct timezone *)
NULL) != 0)
737 ERR(
"cannot use gettimeofday()");
738 timeVal = (double)timer.tv_sec + ((
double)timer.tv_usec / 1000000);
752 double roottimestamp;
754 MPI_CHECK(MPI_Barrier(com),
"barrier error");
756 MPI_CHECK(MPI_Reduce(×tamp, &min, 1, MPI_DOUBLE,
758 "cannot reduce tasks' times");
759 MPI_CHECK(MPI_Reduce(×tamp, &max, 1, MPI_DOUBLE,
761 "cannot reduce tasks' times");
765 MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, com),
766 "cannot broadcast root's time");
777 static char datestring[80];
778 time_t cur_timestamp;
781 fprintf(
out_logfile,
"V-1: Entering PrintTimestamp...\n" );
785 cur_timestamp = time(
NULL);
786 strftime(datestring, 80,
"%m/%d/%Y %T", localtime(&cur_timestamp));
794 MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com);
797 FILE * out = fopen(filename,
"r");
800 MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com);
803 int ret = fscanf(out,
"%lld", & data);
808 MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com);
817 FILE * out = fopen(filename,
"w");
819 FAIL(
"Cannot write to the stonewalling file!");
821 fprintf(out,
"%lld", (
long long) count);
829 if (
rank == 0 && delay > 0) {
831 fprintf(
out_logfile,
"delaying %d seconds . . .\n", delay);
845 char m_str[8], g_str[8], t_str[8];
851 strcpy(m_str,
"MiB");
852 strcpy(g_str,
"GiB");
853 strcpy(t_str,
"TiB");
865 snprintf(valueStr,
MAX_STR-1,
"%.2f %s",
866 (
double)((
double)value / t), t_str);
868 snprintf(valueStr,
MAX_STR-1,
"%d %s", (
int)(value / t), t_str);
870 }
else if (value >= g) {
872 snprintf(valueStr,
MAX_STR-1,
"%.2f %s",
873 (
double)((
double)value / g), g_str);
875 snprintf(valueStr,
MAX_STR-1,
"%d %s", (
int)(value / g), g_str);
877 }
else if (value >= m) {
879 snprintf(valueStr,
MAX_STR-1,
"%.2f %s",
880 (
double)((
double)value / m), m_str);
882 snprintf(valueStr,
MAX_STR-1,
"%d %s", (
int)(value / m), m_str);
884 }
else if (value >= 0) {
885 snprintf(valueStr,
MAX_STR-1,
"%d bytes", (
int)value);
887 snprintf(valueStr,
MAX_STR-1,
"-");
892 #if defined(HAVE_GETCPU_SYSCALL) 897 return syscall(SYS_getcpu, core, chip,
NULL);
899 #elif defined(HAVE_RDTSCP_ASM) 904 __asm__
volatile(
"rdtscp" :
"=a" (a),
"=d" (d),
"=c" (c));
905 *chip = (c & 0xFFF000)>>12;
907 return ((
unsigned long)a) | (((
unsigned long)d) << 32);;
912 #warning GetProcessorAndCore is implemented as a dummy 933 if (cudaMallocManaged((
void**) & buf, size, cudaMemAttachGlobal) != cudaSuccess){
934 ERR(
"Cannot allocate buffer on GPU");
938 ERR(
"No CUDA supported, cannot allocate on the GPU");
941 #ifdef HAVE_GPU_DIRECT 942 if (cudaMalloc((
void**) & buf, size) != cudaSuccess){
943 ERR(
"Cannot allocate buffer on GPU");
947 ERR(
"No GPUDirect supported, cannot allocate on the GPU");
952 long pageSize = sysconf(_SC_PAGESIZE);
954 size_t pageSize = getpagesize();
957 pageMask = pageSize - 1;
958 buf =
safeMalloc(size + pageSize +
sizeof(
void *));
960 tmp = buf +
sizeof(
char *);
961 aligned = tmp + pageSize - ((size_t) tmp & pageMask);
964 tmp = aligned -
sizeof(
void *);
967 return (
void *)aligned;
977 if (cudaFree(buf) != cudaSuccess){
978 WARN(
"Cannot free buffer on GPU");
982 ERR(
"No CUDA supported, cannot free on the GPU");
985 free(*(
void **)((
char *)buf -
sizeof(
char *)));
char * HumanReadable(IOR_offset_t value, int base)
int GetNumTasks(MPI_Comm comm)
unsigned long GetProcessorAndCore(int *chip, int *core)
void ShowHints(MPI_Info *mpiHints)
unsigned int incompressibleSeed
void * airoi_update_module_options(const ior_aiori_t *backend, options_all_t *opt)
char *(* get_version)(void)
int64_t ReadStoneWallingIterations(char *const filename, MPI_Comm com)
enum OutputFormat_t outputFormat
int(* statfs)(const char *, ior_aiori_statfs_t *, aiori_mod_opt_t *module_options)
int QueryNodeMapping(MPI_Comm comm, int print_nodemap)
int setTimeStampSignature
IOR_offset_t StringToBytes(char *size_str)
#define MPI_CHECK(MPI_STATUS, MSG)
const ior_aiori_t * aiori_select(const char *api)
static double TimeDeviation(MPI_Comm com)
static option_help options[]
int verify_memory_pattern(int item, char *buffer, size_t bytes, int buff_offset, int pretendRank)
void init_clock(MPI_Comm com)
char * CurrentTimeString(void)
void updateParsedOptions(IOR_param_t *options, options_all_t *global_options)
int GetNumNodes(MPI_Comm comm)
double GetTimeStamp(void)
void generate_memory_pattern(char *buf, size_t bytes, int buff_offset, int rank)
static const ior_aiori_t * backend
void StoreStoneWallingIterations(char *const filename, int64_t count)
enum PACKET_TYPE dataPacketType
static options_all_t * global_options
int64_t string_to_bytes(char *size_str)
void ExtractHint(char *settingVal, char *valueVal, char *hintString)
void ShowFileSystemSize(char *filename, const struct ior_aiori *backend, void *backend_options)
const struct ior_aiori * backend
void SetHints(MPI_Info *mpiHints, char *hintsFileName)
void set_o_direct_flag(int *flag)
void FailMessage(int rank, const char *location, char *format,...)
void DelaySecs(int delay)
int Regex(char *string, char *pattern)
size_t NodeMemoryStringToBytes(char *size_str)
void aligned_buffer_free(void *buf, ior_memory_flags gpu)
void DumpBuffer(void *buffer, size_t size)
long long int IOR_offset_t
int GetNumTasksOnNode0(MPI_Comm comm)
void update_write_memory_pattern(uint64_t item, char *buf, size_t bytes, int buff_offset, int rank)
void * safeMalloc(uint64_t size)
void * aligned_buffer_alloc(size_t size, ior_memory_flags type)