00001
00098 #include <stdio.h>
00099 #include <stdarg.h>
00100 #include <time.h>
00101 #include <string.h>
00102
00103 #ifdef _CRAYMPP
00104 #define IPC_USE_SHMEM
00105 #include <intrinsics.h>
00106 #include <mpp/shmem.h>
00107
00108
00109 00110
00111 #ifdef _CRAYT3E
00112 #undef _CRAYT3E
00113 #include <mpp/sync_proto.h>
00114 #define _CRAYT3E
00115 #else
00116 #include <mpp/sync_proto.h>
00117 #endif
00118 #endif
00119
00120
00121 #ifndef IPC_USE_SHMEM
00122 #include <string.h>
00123 #endif
00124
00125 #include "ipc.h"
00126
00127
00128
00129
00130
00131
00133 #define IPC_PE_MSG_DELAY 0.1
00134
00136 #define IPC_MAXFILENAMELENGTH 1024
00137
00138
00139
00140
00141
00142
00143
00144 int ipc_msg_level = IPC_STD;
00145 const char* ipc_msg_level_docstring =
00146 "Level of notification messages to display. Standard levels:\n\n"
00147
00148 " IPC_None \n"
00149 " IPC_Error \n"
00150 " IPC_Warning \n"
00151 " IPC_Caution \n"
00152 " IPC_Alert \n"
00153 " IPC_Summary \n"
00154 " IPC_Std \n"
00155 " IPC_Verbose \n"
00156 " IPC_Overwhelm\n\n"
00157
00158 "The last one is special in that it overrides the values of\n"
00159 "ipc_msg_level_forceall and ipc_msg_synch_level, forcing\n"
00160 "all PEs to present all messages and to synchronize while doing\n"
00161 "so.";
00162
00163
00164 int ipc_msg_forceall_level = IPC_NONE;
00165 const char* ipc_msg_forceall_level_docstring =
00166 "Messages with a debug level higher than this level (see ipc_msg_level\n"
00167 "for definitions) will be printed by all PEs who make the call, regardless\n"
00168 "of whether the call declares that only one should print. Useful for\n"
00169 "debugging on multiple PEs when some PEs are reporting errors or warnings\n"
00170 "in the totals yet no messages were displayed.";
00171
00172
00173 int ipc_msg_synch_level = IPC_ERROR;
00174 const char* ipc_msg_synch_level_docstring =
00175 "Attempt to print messages with a msg level higher than this level (see\n"
00176 "ipc_msg_level for definitions) in order of PE number, rather than\n"
00177 "interleaving the output from different PEs. The synchronization is\n"
00178 "currently accomplished only by having each PE delay for a short\n"
00179 "time proportional to its PE number. As a consequence, it slows\n"
00180 "down execution while not guaranteeing perfect order of output.\n"
00181 "However, it has the advantage of not failing catastrophically in\n"
00182 "error conditions where PEs become out of synch due to missed\n"
00183 "barriers, which is important for an error message handler.";
00184
00185
00186 int ipc_exit_on_error_num = 0;
00187 const char* ipc_exit_on_error_num_docstring =
00188 "If nonzero, when this many errors have been reached, the program\n"
00189 "will exit automatically.";
00190
00191
00192 int ipc_max_warnings = 100;
00193 const char* ipc_max_warnings_docstring =
00194 "Maximum number of warnings to be printed, per PE. If this limit is\n"
00195 "reached, further warnings will still increment the warning counter\n"
00196 "but no messages will be printed.";
00197
00198
00199 int ipc_max_errors = 100;
00200 const char* ipc_max_errors_docstring =
00201 "Maximum number of error messages to be printed, per PE. If this limit\n"
00202 "is reached, further errors will still increment the error counter\n"
00203 "but no messages will be printed.";
00204
00205
00206
00207
00208
00209
00210
00211 FILE *ipc_logfile = NULL;
00212 int ipc_warnings = 0;
00213 int ipc_errors = 0;
00214
00215
00216
00217
00218
00219
00220
00221 ipc_status ipc_put_base(void *target_data, void *source_data,
00222 ipc_datatype datatype, size_t count, int process);
00223
00224 ipc_status ipc_get_base(void *target_data, void *source_data,
00225 ipc_datatype datatype, size_t count, int process);
00226
00227
00228
00229
00230
00231
00232
00233
00234
00236 int ipc_datatype_size(ipc_datatype datatype)
00237 {
00238 switch(datatype){
00239 case IPC_RAW8: return sizeof(i8 );
00240 case IPC_RAW32: return sizeof(i32 );
00241 #ifndef NO_I64
00242 case IPC_RAW64: return sizeof(i64 );
00243 #endif
00244 case IPC_CHAR: return sizeof(signed char );
00245 case IPC_SHORT: return sizeof(signed short );
00246 case IPC_INT: return sizeof(signed int );
00247 case IPC_LONG: return sizeof(signed long );
00248 #ifndef LONG_LONG_UNAVAILABLE
00249 case IPC_LONG_LONG: return sizeof(signed long long);
00250 #endif
00251 case IPC_UNSIGNED_CHAR: return sizeof(unsigned char );
00252 case IPC_UNSIGNED_SHORT: return sizeof(unsigned short );
00253 case IPC_UNSIGNED: return sizeof(unsigned int );
00254 case IPC_UNSIGNED_LONG: return sizeof(unsigned long );
00255 case IPC_FLOAT: return sizeof(float );
00256 case IPC_DOUBLE: return sizeof(double );
00257 #ifndef LONG_DOUBLE_UNAVAILABLE
00258 case IPC_LONG_DOUBLE: return sizeof(long double );
00259 #endif
00260 default:
00261 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_datatype %d unknown",datatype);
00262 return(sizeof(int));
00263 }
00264 }
00265
00266
00267
00272 int ipc_num_processes(void)
00273 {
00274 #ifdef IPC_USE_SHMEM
00275 return _num_pes();
00276 #else
00277 return 1;
00278 #endif
00279 }
00280
00281
00282
00284 int ipc_my_process(void)
00285 {
00286 #ifdef IPC_USE_SHMEM
00287 return _my_pe();
00288 #else
00289 return 0;
00290 #endif
00291 }
00292
00293
00294
00296 void ipc_barrier(void)
00297 {
00298 #ifdef IPC_USE_SHMEM
00299
00300 shmem_barrier_all();
00301 #else
00302 if (ipc_num_processes() != 1) {
00303 ipc_notify(IPC_ONE,IPC_ERROR,"ipc_barrier with multiple processes (%d) unimplemented",ipc_num_processes());
00304 exit(-1);
00305 }
00306 #endif
00307 }
00308
00309
00310
00312 void ipc_set_barrier(void)
00313 {
00314 #ifdef IPC_USE_SHMEM
00315
00316 #ifdef __cplusplus
00317 ipc_notify(IPC_ONE,IPC_ERROR,"ipc_set_barrier not implemented");
00318 #else
00319 set_barrier();
00320 #endif
00321
00322 #else
00323
00324 if (ipc_num_processes() != 1) {
00325 ipc_notify(IPC_ONE,IPC_ERROR,"ipc_set_barrier with multiple processes (%d) unimplemented",ipc_num_processes());
00326 exit(-1);
00327 }
00328
00329 #endif
00330 }
00331
00332
00333
00334
00335
00336
00342 #define IPC_PUT_RAW(bits) \
00343 ipc_status ipc_put ## bits(void *data, size_t count, int process) \
00344 { return ipc_put_base(data,data,IPC_RAW ## bits,count,process); }
00345 IPC_PUT_RAW(8)
00346 IPC_PUT_RAW(32)
00347 IPC_PUT_RAW(64)
00348 #undef IPC_PUT_RAW
00349
00350 #define IPC_GET_RAW(bits) \
00351 ipc_status ipc_get ## bits(void *data, size_t count, int process) \
00352 { return ipc_get_base(data,data,IPC_RAW ## bits,count,process); }
00353 IPC_GET_RAW(8)
00354 IPC_GET_RAW(32)
00355 IPC_GET_RAW(64)
00356 #undef IPC_GET_RAW
00357
00358
00359 #ifndef __cplusplus
00360 00361
00363 ipc_status ipc_put(void *data, ipc_datatype datatype, size_t count, int process)
00364 { return ipc_put_base(data,data,datatype,count,process); }
00365
00366 ipc_status ipc_get(void *data, ipc_datatype datatype, size_t count, int process)
00367 { return ipc_get_base(data,data,datatype,count,process); }
00368
00369 ipc_status ipc_put_to(void *target_data, void *source_data, ipc_datatype datatype, size_t count, int process)
00370 { return ipc_put_base(target_data,source_data,datatype,count,process); }
00371
00372 ipc_status ipc_get_to(void *target_data, void *source_data, ipc_datatype datatype, size_t count, int process)
00373 { return ipc_get_base(target_data,source_data,datatype,count,process); }
00375 #endif
00376
00377
00378 #ifdef __cplusplus
00379 00380 00381 00382 00383 00384 00385 00386 00387 00388
00390
00391 #ifdef NDEBUG
00392 #define IPC_CHECK_DATATYPE(name,ipc_type,data) (void)datatype
00393 #else
00394 #define IPC_CHECK_DATATYPE(name,ipc_type,data) \
00395 if (datatype != ipc_type && \
00396 !((datatype==IPC_RAW8 && sizeof(data)==1) || \
00397 (datatype==IPC_RAW32 && sizeof(data)==4) || \
00398 (datatype==IPC_RAW64 && sizeof(data)==8))) \
00399 ipc_notify(3,IPC_WARNING,"ipc_" #name " called with incorrect datatype (%d != %d, sizeof(data)=%d != %d)",\
00400 ipc_type,datatype,sizeof(data),ipc_datatype_size(ipc_type))
00401 #endif
00402
00403 #define IPC_CALL(name, c_type,ipc_type) \
00404 ipc_status ipc_ ## name (c_type *data, size_t count, int process) \
00405 { return ipc_ ## name ## _base(data,data,ipc_type,count,process); } \
00406 \
00407 ipc_status ipc_ ## name ## _to (c_type *target, c_type *source, size_t count, int process) \
00408 { return ipc_ ## name ## _base(target,source,ipc_type,count,process); } \
00409 \
00410 ipc_status ipc_ ## name \
00411 (c_type *data, ipc_datatype datatype, size_t count, int process) \
00412 { \
00413 IPC_CHECK_DATATYPE(name,ipc_type,data); \
00414 return ipc_ ## name ## _base(data,data,ipc_type,count,process); \
00415 } \
00416 \
00417 ipc_status ipc_ ## name ## _to \
00418 (c_type *target, c_type *source, ipc_datatype datatype, size_t count, int process) \
00419 { \
00420 IPC_CHECK_DATATYPE(name,ipc_type,source); \
00421 return ipc_ ## name ## _base(target,source,ipc_type,count,process); \
00422 }
00423
00424 IPC_CALL(put, signed short ,IPC_SHORT )
00425 IPC_CALL(put, signed int ,IPC_INT )
00426 IPC_CALL(put, signed long ,IPC_LONG )
00427 #ifndef LONG_LONG_UNAVAILABLE
00428 IPC_CALL(put, signed long long ,IPC_LONG_LONG )
00429 #endif
00430 IPC_CALL(put, unsigned char ,IPC_UNSIGNED_CHAR )
00431 IPC_CALL(put, unsigned short ,IPC_UNSIGNED_SHORT )
00432 IPC_CALL(put, unsigned int ,IPC_UNSIGNED )
00433 IPC_CALL(put, unsigned long ,IPC_UNSIGNED_LONG )
00434 IPC_CALL(put, float ,IPC_FLOAT )
00435 IPC_CALL(put, double ,IPC_DOUBLE )
00436 IPC_CALL(put, long double ,IPC_LONG_DOUBLE )
00437
00438 IPC_CALL(get, signed short ,IPC_SHORT )
00439 IPC_CALL(get, signed int ,IPC_INT )
00440 IPC_CALL(get, signed long ,IPC_LONG )
00441 #ifndef LONG_LONG_UNAVAILABLE
00442 IPC_CALL(get, signed long long ,IPC_LONG_LONG )
00443 #endif
00444 IPC_CALL(get, unsigned char ,IPC_UNSIGNED_CHAR )
00445 IPC_CALL(get, unsigned short ,IPC_UNSIGNED_SHORT )
00446 IPC_CALL(get, unsigned int ,IPC_UNSIGNED )
00447 IPC_CALL(get, unsigned long ,IPC_UNSIGNED_LONG )
00448 IPC_CALL(get, float ,IPC_FLOAT )
00449 IPC_CALL(get, double ,IPC_DOUBLE )
00450 IPC_CALL(get, long double ,IPC_LONG_DOUBLE )
00451 #undef IPC_CALL
00452
00453
00454 #endif
00455
00456
00457
00458
00459
00460
00461
00462 ipc_status ipc_put_base(void *target_data, void *source_data,
00463 ipc_datatype datatype, size_t count, int process)
00464 {
00465
00466 #ifdef IPC_USE_SHMEM
00467
00468 switch(ipc_datatype_size(datatype)){
00469 case 4:
00470 shmem_put32(target_data,source_data,count,process);
00471 break;
00472 case 8:
00473 shmem_put64(target_data,source_data,count,process);
00474 break;
00475 default:
00476 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_put called with unknown datatype: %d",datatype);
00477
00478 }
00479 #else
00480
00481 if (process != ipc_my_process()) {
00482 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_put to remote process unimplemented");
00483 exit(-1);
00484 }
00485 else if (target_data != source_data)
00486 memmove(target_data, source_data, count*ipc_datatype_size(datatype));
00487 #endif
00488
00489 return IPC_NO_ERROR;
00490 }
00491
00492
00493
00495 ipc_status ipc_get_base(void *target_data, void *source_data,
00496 ipc_datatype datatype, size_t count, int process)
00497 {
00498
00499 #ifdef IPC_USE_SHMEM
00500
00501 switch(ipc_datatype_size(datatype)){
00502 case 4:
00503 shmem_get32(target_data,source_data,count,process);
00504 break;
00505 case 8:
00506 shmem_get64(target_data,source_data,count,process);
00507 break;
00508 default:
00509 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_get called with unknown datatype: %d",datatype);
00510
00511 }
00512
00513 #else
00514
00515 if (process != ipc_my_process()) {
00516 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_get from remote process unimplemented");
00517 exit(-1);
00518 }
00519
00520 if (target_data != source_data)
00521 memmove(target_data, source_data, count*ipc_datatype_size(datatype));
00522 #endif
00523
00524 return IPC_NO_ERROR;
00525 }
00526
00527
00528
00529
00530 00531 00532 00533 00534
00535
00536
00548 void ipc_pe_msg_delay( double scale )
00549 {
00550 clock_t start=clock();
00551 while ((clock()-start)/(double)(CLOCKS_PER_SEC) < IPC_PE_MSG_DELAY*ipc_my_process()*scale);
00552 }
00553
00554
00555
00561 void ipc_init(void)
00562 {
00563 #ifdef _CRAYT3D
00564 shmem_set_cache_inv();
00565 #endif
00566 }
00567
00568
00569
00576 void ipc_init_logfile(const char *basefilename)
00577 {
00578 static char oldname[IPC_MAXFILENAMELENGTH];
00579 char buf[IPC_MAXFILENAMELENGTH];
00580
00581 if (basefilename) {
00582 snprintf(buf,IPC_MAXFILENAMELENGTH,"%s.log",basefilename);
00583
00584 if (ipc_logfile && strncmp(basefilename,oldname,IPC_MAXFILENAMELENGTH)) {
00585 fclose(ipc_logfile);
00586 ipc_logfile=NULL;
00587 }
00588
00589 if (!ipc_logfile)
00590 ipc_logfile=fopen(buf,"w");
00591
00592 if (!ipc_logfile)
00593 ipc_notify(IPC_ALL,IPC_WARNING,"Runtime file %s could not be opened; log messages will be lost",buf);
00594
00595 snprintf(oldname,IPC_MAXFILENAMELENGTH,"%s",basefilename);
00596 }
00597 }
00598
00599
00600
00607 void ipc_log(int print_pe, const char *format, ...)
00608 {
00609 va_list args;
00610 va_start (args, format);
00611 if ((print_pe==IPC_ALL || print_pe==ipc_my_process()) && ipc_logfile)
00612 vfprintf (ipc_logfile,format, args);
00613 va_end (args);
00614 }
00615
00616
00617
00623 void ipc_error( void ) {}
00624 void ipc_warning( void ) {}
00625
00626
00627
00629 int ipc_notify(int print_pe, int message_level, const char *format, ...)
00630 {
00631
00632 const int forcealltoprint = ipc_msg_forceall_level >= message_level || ipc_msg_level >=IPC_OVERWHELM;
00633 const int allpesprint = forcealltoprint || print_pe==IPC_ALL;
00634 const int thispeprints = allpesprint || print_pe==ipc_my_process();
00635
00636 const int iserror = IPC_ERROR >= message_level;
00637 const int iswarning = IPC_WARNING >= message_level && !iserror;
00638
00639 const int pasterrorlimit = iserror && ipc_errors++ >= ipc_max_errors;
00640 const int pastwarnlimit = iswarning && ipc_warnings++ >= ipc_max_warnings;
00641 const int pastlimit = pasterrorlimit || pastwarnlimit;
00642
00643 const int aterrorlimit = iserror && ipc_errors == ipc_max_errors;
00644 const int atwarnlimit = iswarning && ipc_warnings == ipc_max_warnings;
00645 const int atlimit = aterrorlimit || atwarnlimit;
00646
00647 const int printthismsg = thispeprints && ipc_msg_level >= message_level && !pastlimit;
00648 const int usedelay = ipc_num_processes() > 1 && (ipc_msg_synch_level >= message_level || ipc_msg_level>=IPC_OVERWHELM);
00649 const int labelpes = ipc_num_processes() > 1;
00650
00651 FILE* outputstream = (message_level <= IPC_ALERT ? stderr : stdout);
00652
00653 va_list args;
00654 va_start (args, format);
00655
00656
00657
00658 if (printthismsg) {
00659 if (usedelay) ipc_pe_msg_delay( 1.0 );
00660 if (labelpes) fprintf(outputstream,"PE = %3d: ",ipc_my_process());
00661 if (iserror) { fprintf(outputstream,"Error -- " ); ipc_error(); }
00662 if (iswarning){ fprintf(outputstream,"Warning -- "); ipc_warning(); }
00663
00664 vfprintf(outputstream, format, args);
00665 fprintf( outputstream,".\n");
00666
00667 if (atlimit)
00668 ipc_notify(print_pe,IPC_CAUTION,"Maximum number of %s reached (%d); the rest will be discarded",
00669 (iserror ? "errors" : "warnings"),
00670 (iserror ? ipc_errors : ipc_warnings));
00671 }
00672 va_end (args);
00673
00674
00675 if (iserror && ipc_exit_on_error_num && (ipc_errors >= ipc_exit_on_error_num))
00676 ipc_abort(IPC_EXIT_TOO_MANY_ERRORS,"Error limit reached",ipc_errors);
00677
00678
00679 return 0;
00680 }
00681
00682
00683
00689 void ipc_exit(int status,const char *format, ...)
00690 {
00691 va_list args;
00692 va_start (args, format);
00693
00694
00695 if (ipc_num_processes() > 1) {
00696 ipc_pe_msg_delay( 1.0 );
00697 fprintf(stderr,"PE = %3d: ",ipc_my_process());
00698 }
00699
00700 fprintf(stderr,"Exiting -- ");
00701
00702
00703 vfprintf(stderr, format, args);
00704 va_end (args);
00705
00706 fprintf(stderr,". There were %d error(s) and %d warning(s).\n",ipc_errors,ipc_warnings);
00707
00708
00709 if (ipc_my_process()==0 && ipc_logfile)
00710 fclose(ipc_logfile);
00711
00712
00713 ipc_barrier();
00714 exit(status);
00715 }
00716
00717
00718
00722 void ipc_abort(int status,const char *format, ...)
00723 {
00724 va_list args;
00725 va_start (args, format);
00726
00727
00728 fflush(NULL);
00729
00730
00731 if (ipc_num_processes() > 1) {
00732
00733 00734
00735 clock_t start=clock();
00736 while ((clock()-start)/(double)(CLOCKS_PER_SEC) < 5);
00737
00738 ipc_pe_msg_delay( 1.0 );
00739 fprintf(stderr,"PE = %3d: ",ipc_my_process());
00740 }
00741
00742 fprintf(stderr,"Aborting -- ");
00743
00744
00745 vfprintf(stderr, format, args);
00746 va_end (args);
00747
00748 fprintf(stderr,". There were %d error(s) and %d warning(s).\n",ipc_errors,ipc_warnings);
00749
00750
00751 if (ipc_my_process()==0 && ipc_logfile)
00752 fclose(ipc_logfile);
00753
00754 #ifdef _CRAYT3E
00755
00756 globalexit(status);
00757 #else
00758
00759 exit(status);
00760 #endif
00761 }
00762
00763