00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef AMROC_TIMING_H
00010 #define AMROC_TIMING_H
00011
00019 #include <iostream>
00020 #include <cstdio>
00021 #include <vector>
00022
00023 #ifdef DAGH_NO_MPI
00024
00025 #include <sys/time.h>
00026 #include <time.h>
00027 inline static double MPI_Wtime() {
00028 struct timeval t;
00029 gettimeofday(&t,0);
00030 return (double)t.tv_sec + ((double)t.tv_usec)*0.000001;
00031 }
00032
00033 #endif
00034
00035 #include "CommServer.h"
00036
00037 class Timing;
00038 extern Timing timing;
00039
00040 #ifdef TIMING_AMR
00041
00042 #define START_WATCH timing.start();
00043 #define END_WATCH(what) timing.stop(Timing::what);
00044
00045 #define END_WATCH_INITIALIZATION END_WATCH(INITIALIZATION)
00046 #define END_WATCH_FLAGGING END_WATCH(FLAGGING)
00047 #define END_WATCH_COMPOSING_WHOLE END_WATCH(COMPOSING_WHOLE)
00048 #define END_WATCH_RECOMPOSING_WHOLE END_WATCH(RECOMPOSING_WHOLE)
00049 #define END_WATCH_RECOMPOSING_INTERPOLATION END_WATCH(RECOMPOSING_INTERPOLATION)
00050 #define END_WATCH_BOUNDARIES_SYNC END_WATCH(BOUNDARIES_SYNC)
00051 #define END_WATCH_BOUNDARIES_INTERPOLATION END_WATCH(BOUNDARIES_INTERPOLATION)
00052 #define END_WATCH_BOUNDARIES_EXTERNAL END_WATCH(BOUNDARIES_EXTERNAL)
00053 #define END_WATCH_INTEGRATION(which) timing.stop((which==1)?Timing::INTEGRATION_ESTIMATE:(which==2)?Timing::INTEGRATION_SHADOW:Timing::INTEGRATION_MAIN);
00054 #define END_WATCH_SOURCE_INTEGRATION END_WATCH(SOURCE_INTEGRATION)
00055 #define END_WATCH_FIXUP_WHOLE END_WATCH(FIXUP_WHOLE)
00056 #define END_WATCH_FIXUP_SYNC END_WATCH(FIXUP_SYNC)
00057 #define END_WATCH_CLUSTERING END_WATCH(CLUSTERING)
00058 #define END_WATCH_OUPUT END_WATCH(OUTPUT)
00059 #define END_WATCH_EXAMINE END_WATCH(EXAMINE1)
00060
00061 #define START_WATCH_WHOLE timing.start_timing();
00062 #define END_WATCH_WHOLE timing.stop(Timing::MISC);
00063
00064 #define COUNT(what) timing.count(Timing::what);
00065 #define COUNT_FREQ(what,freq) timing.count(Timing::what,freq);
00066 #else
00067
00068 #define START_WATCH_WHOLE
00069 #define START_WATCH
00070 #define END_WATCH(what)
00071
00072 #define END_WATCH_INITIALIZATION
00073 #define END_WATCH_FLAGGING
00074 #define END_WATCH_RECOMPOSING_WHOLE
00075 #define END_WATCH_RECOMPOSING_INTERPOLATION
00076 #define END_WATCH_BOUNDARIES_SYNC
00077 #define END_WATCH_BOUNDARIES_INTERPOLATION
00078 #define END_WATCH_BOUNDARIES_EXTERNAL
00079 #define END_WATCH_INTEGRATION(which)
00080 #define END_WATCH_SOURCE_INTEGRATION
00081 #define END_WATCH_FIXUP_WHOLE
00082 #define END_WATCH_FIXUP_SYNC
00083 #define END_WATCH_CLUSTERING
00084 #define END_WATCH_OUPUT
00085 #define END_WATCH_EXAMINE
00086 #define END_WATCH_WHOLE
00087 #define COUNT(what)
00088 #define COUNT_FREQ(what,freq)
00089
00090 #endif
00091
00099 class Timing {
00100 public:
00104 enum TimingAccounts {
00105 WHOLE,
00106
00107 MISC,
00108
00109
00110 INITIALIZATION, FLAGGING, CLUSTERING, GATHERCFL, OUTPUT,
00111 EXAMINE1, EXAMINE2, EXAMINE3, EXAMINE4,
00112
00113 PARTITION_INIT, PARTITION_CALC,
00114
00115 COMPOSING_WHOLE, COMPOSING_GLOBALLISTS_MERGE, COMPOSING_GDBCREATE, COMPOSING_COMMSERVERS,
00116
00117 RECOMPOSING_WHOLE, RECOMPOSING_INTERPOLATION, RECOMPOSING_DATASYNC,
00118 RECOMPOSING_BOXARRAYCOMM, RECOMPOSING_GRIDBOXCOMM, RECOMPOSING_COMMSERVERS,
00119 RECOMPOSING_GLOBALLISTS_NESTING, RECOMPOSING_GLOBALLISTS_REFINE, RECOMPOSING_GLOBALLISTS_MERGE,
00120 RECOMPOSING_GLOBALLISTS_GFLISTS, RECOMPOSING_GDBCREATE,
00121
00122 MEMORY_RESTART_CHECKPOINT, MEMORY_RESTART_WHOLE, MEMORY_RESTART_GDBCREATE, MEMORY_RESTART_COMMSERVERS,
00123
00124 RESTART_CHECKPOINT, RESTART_WHOLE, RESTART_GDBCREATE, RESTART_COMMSERVERS,
00125
00126 BOUNDARIES_SYNC, BOUNDARIES_INTERPOLATION, BOUNDARIES_EXTERNAL,
00127
00128 INTEGRATION_MAIN, INTEGRATION_ESTIMATE,
00129 INTEGRATION_SHADOW, SOURCE_INTEGRATION,
00130
00131 EX_INIT, EX_SSEND, EX_SRECV,
00132 GFSYNC_GETINFO, GFSYNC_CHECKREADY,
00133 GFSYNC_IWAIT, GFSYNC_ITEST, GFSYNC_OWAIT, GFSYNC_OTEST,
00134
00135 GFSYNC_READGHOSTS, GFSYNC_READGHOSTS_SELF, GFSYNC_WRITEGHOSTS,
00136 GFSYNC_READDATA, GFSYNC_WRITEDATA,
00137
00138 UPDATE_PATCH,
00139
00140 FIXUP_WHOLE, FIXUP_CORRECTION, FIXUP_SYNC,
00141
00142 GFM_FINDING_CELLS, GFM_GEOMETRY, GFM_TRANSFORM,
00143 GFM_EXTRAPOLATION, GFM_SETBNDRY,
00144 GFM_AUXILIARY_VALUES, GFM_SETBNDRY_WHOLE,
00145
00146 LS_SET_WHOLE, LS_SYNC, LS_CPT_TRANSFORM, LS_CPT_FLOODFILL,
00147
00148 ELC_RECEIVEBOUNDARY, ELC_SENDPRESSURE,
00149
00150 FLUID_CPL_RECEIVE_OVERHEAD, FLUID_CPL_SEND_OVERHEAD, FLUID_CPL_INTERPOLATE,
00151 FLUID_CPL_VELOCITY_SEARCH, FLUID_CPL_PRESSURE_CALCULATE, FLUID_CPL_ELC_GEOMETRY,
00152
00153
00154 _ACMAX };
00155
00159 enum CountingAccounts {
00160 GF_SYNC, GF_PATCHITERATOR,
00161
00162 CS_TEST, CS_WAIT,
00163
00164
00165 _CCMAX };
00166
00167 public:
00168
00169
00170
00175 inline void count(enum CountingAccounts account, int i=1)
00176 { counts[account] += i; }
00177
00178
00179
00180
00183 inline void start_timing()
00184 { tos = ×tack[0]; ntos = 0; *tos = 0.0; start_time = MPI_Wtime(); }
00185
00188 inline void start()
00189 { add_time(); tos++; ntos++; assert (ntos<MAXRECURSIONS); *tos = 0.0; }
00190
00194 inline void stop(enum TimingAccounts account)
00195 { add_time(); times[account] += *tos; calls[account]++; tos--; ntos--; assert (ntos>=-1); }
00196
00197 protected:
00198 inline void add_time() {
00199 double end_time = MPI_Wtime();
00200 *tos += end_time - start_time; start_time = end_time;
00201 }
00202
00203 public:
00204
00205
00206
00209 static void collect(MPI_Comm Comm) {
00210 #ifdef TIMING_AMR
00211 timing.collect_timing(Comm);
00212 #endif
00213 }
00214
00215 static void print(std::ostream& os) {
00216 #ifdef TIMING_AMR
00217 timing.print_local_times = false;
00218 timing.print_timing(os);
00219 #endif
00220 }
00221
00222 static void print_local(std::ostream& os) {
00223 #ifdef TIMING_AMR
00224 timing.print_local_times = true;
00225 timing.print_timing(os);
00226 #endif
00227 }
00228
00231 void collect_timing(MPI_Comm Comm, enum TimingAccounts ac,
00232 double* stat_times) {
00233 #ifdef DAGH_NO_MPI
00234 stat_times[0] = times[ac];
00235 stat_times[1] = times[ac];
00236 stat_times[2] = times[ac];
00237 #else
00238 MPI_Reduce(×[ac], &stat_times[0], 1, MPI_DOUBLE,
00239 MPI_MAX, 0, Comm);
00240 MPI_Reduce(×[ac], &stat_times[1], 1, MPI_DOUBLE,
00241 MPI_MIN, 0, Comm);
00242 MPI_Reduce(×[ac], &stat_times[2], 1, MPI_DOUBLE,
00243 MPI_SUM, 0, Comm);
00244
00245 int procs;
00246 MPI_Comm_size(Comm, &procs);
00247 stat_times[2] /= procs;
00248 #endif
00249 }
00250
00253 void collect_timing(MPI_Comm Comm) {
00254
00255 times[WHOLE] = calcsum(times, MISC, _ACMAX-1, 0.0);
00256 calls[WHOLE] = calcsum(calls, MISC, _ACMAX-1, (unsigned)0);
00257
00258
00259 stat_times_max.resize(_ACMAX);
00260 stat_times_min.resize(_ACMAX);
00261 stat_times_avg.resize(_ACMAX);
00262 #ifdef DAGH_NO_MPI
00263 stat_times_max = times;
00264 stat_times_min = times;
00265 stat_times_avg = times;
00266 #else
00267 MPI_Reduce(×[0], &stat_times_max[0], _ACMAX, MPI_DOUBLE,
00268 MPI_MAX, 0, Comm);
00269 MPI_Reduce(×[0], &stat_times_min[0], _ACMAX, MPI_DOUBLE,
00270 MPI_MIN, 0, Comm);
00271 MPI_Reduce(×[0], &stat_times_avg[0], _ACMAX, MPI_DOUBLE,
00272 MPI_SUM, 0, Comm);
00273
00274 int procs;
00275 MPI_Comm_size(Comm, &procs);
00276 for (int i=0; i<_ACMAX; i++)
00277 stat_times_avg[i] /= procs;
00278 #endif
00279 }
00280
00284 void print_timing(std::ostream& os) {
00285 cl(os,"CommServer - MPI_Waitsome ", CS_WAIT);
00286 cl(os," MPI_Testsome ", CS_TEST,true);
00287 cl(os,"GridFunction - Sync ", GF_SYNC);
00288 cl(os," PatchIterator ", GF_PATCHITERATOR,true);
00289 pl(os,"Initialization ", INITIALIZATION);
00290 pl(os,"Gather max. CFL in parallel ", GATHERCFL,true);
00291 pl(os,"Integration - Main ", INTEGRATION_MAIN);
00292 pl(os," Estimation ", INTEGRATION_ESTIMATE,true);
00293 pl(os," Coarsen ", INTEGRATION_SHADOW,true);
00294 pl(os," Source ", SOURCE_INTEGRATION,true);
00295 pl(os,"Fixup - Overhead ", FIXUP_WHOLE,true);
00296 pl(os," Correction ", FIXUP_CORRECTION,true);
00297 pl(os," Syncing ", FIXUP_SYNC,true);
00298 pl(os,"Boundary Val. - Physical ", BOUNDARIES_EXTERNAL);
00299 pl(os," Syncing ", BOUNDARIES_SYNC,true);
00300 pl(os," Interpolation", BOUNDARIES_INTERPOLATION,true);
00301 pl(os,"Regridding - Flagging ", FLAGGING,true);
00302 pl(os," Clustering ", CLUSTERING,true);
00303 pl(os,"Partition - Init ", PARTITION_INIT,true);
00304 pl(os," Calculate ", PARTITION_CALC,true);
00305 pl(os,"Composition - Overhead ", COMPOSING_WHOLE,true);
00306 pl(os," GlobalGridBoxList merge", COMPOSING_GLOBALLISTS_MERGE,true);
00307 pl(os," GridDataBlock create", COMPOSING_GDBCREATE,true);
00308 pl(os," GF CommServer creation", COMPOSING_COMMSERVERS,true);
00309 pl(os,"Recomposition - Overhead ", RECOMPOSING_WHOLE,true);
00310 pl(os," Interpolation", RECOMPOSING_INTERPOLATION,true);
00311 pl(os," Data syncing ", RECOMPOSING_DATASYNC,true);
00312 pl(os," GlobalBBoxList comm.", RECOMPOSING_BOXARRAYCOMM,true);
00313 pl(os," GlobalGridBoxList comm.", RECOMPOSING_GRIDBOXCOMM,true);
00314 pl(os," GlobalBBoxList nesting", RECOMPOSING_GLOBALLISTS_NESTING,true);
00315 pl(os," GlobalBBoxList refine ", RECOMPOSING_GLOBALLISTS_REFINE,true);
00316 pl(os," GlobalGridBoxList merge", RECOMPOSING_GLOBALLISTS_MERGE,true);
00317 pl(os," GF GridBoxLists create", RECOMPOSING_GLOBALLISTS_GFLISTS,true);
00318 pl(os," GridDataBlock create", RECOMPOSING_GDBCREATE,true);
00319 pl(os," GF CommServer creation", RECOMPOSING_COMMSERVERS,true);
00320 pl(os,"MemoryRestart - Checkpointing", MEMORY_RESTART_CHECKPOINT,true);
00321 pl(os,"MemoryRestart - Overhead ", MEMORY_RESTART_WHOLE,true);
00322 pl(os," GridDataBlock create", MEMORY_RESTART_GDBCREATE,true);
00323 pl(os," GF CommServer creation", MEMORY_RESTART_COMMSERVERS,true);
00324 pl(os,"Restart - Checkpointing ", RESTART_CHECKPOINT,true);
00325 pl(os,"Restart - Overhead ", RESTART_WHOLE,true);
00326 pl(os," GridDataBlock create", RESTART_GDBCREATE,true);
00327 pl(os," GF CommServer creation", RESTART_COMMSERVERS,true);
00328 pl(os,"ExchangeServ. - Init ", EX_INIT,true);
00329 pl(os," start sends ", EX_SSEND,true);
00330 pl(os," start recvs ", EX_SRECV,true);
00331 pl(os,"GFSync - gather depend. info ", GFSYNC_GETINFO,true);
00332 pl(os," check dependencies ", GFSYNC_CHECKREADY,true);
00333 pl(os," instant wait for msg ", GFSYNC_IWAIT,true);
00334 pl(os," instant test for msg ", GFSYNC_ITEST,true);
00335 pl(os," overlapped wait for msg ", GFSYNC_OWAIT,true);
00336 pl(os," overlapped test for msg ", GFSYNC_OTEST,true);
00337 pl(os," read ghosts ", GFSYNC_READGHOSTS,true);
00338 pl(os," read ghosts self ", GFSYNC_READGHOSTS_SELF,true);
00339 pl(os," write ghosts ", GFSYNC_WRITEGHOSTS,true);
00340 pl(os," read data ", GFSYNC_READDATA,true);
00341 pl(os," AMRSolver update patch ", UPDATE_PATCH,true);
00342 pl(os,"Ghost-fluid values - Overhead", GFM_SETBNDRY_WHOLE,true);
00343 pl(os," Finding internal cells ", GFM_FINDING_CELLS,true);
00344 pl(os," Calculating geometry info", GFM_GEOMETRY,true);
00345 pl(os," Calc. extrapolated values", GFM_TRANSFORM,true);
00346 pl(os," Extra- / Interpolation ", GFM_EXTRAPOLATION,true);
00347 pl(os," Setting auxiliary values ", GFM_AUXILIARY_VALUES,true);
00348 pl(os," Applying bndry values ", GFM_SETBNDRY,true);
00349 pl(os,"Level sets - Overhead ", LS_SET_WHOLE,true);
00350 pl(os," CPT - point_transform() ", LS_CPT_TRANSFORM,true);
00351 pl(os," CPT - flood_fill() ", LS_CPT_FLOODFILL,true);
00352 pl(os," Sync level set functions ", LS_SYNC,true);
00353 pl(os,"ELC - receive_mesh() ", ELC_RECEIVEBOUNDARY,true);
00354 pl(os," send_pressure() ", ELC_SENDPRESSURE,true);
00355 pl(os,"Coupling Send - Overhead ", FLUID_CPL_SEND_OVERHEAD,true);
00356 pl(os," pressure calculation() ", FLUID_CPL_PRESSURE_CALCULATE,true);
00357 pl(os," pressure interpolation() ", FLUID_CPL_INTERPOLATE,true);
00358 pl(os," ELC: face normal¢ers ", FLUID_CPL_ELC_GEOMETRY,true);
00359 pl(os,"Coupling Receive - Overhead ", FLUID_CPL_RECEIVE_OVERHEAD,true);
00360 pl(os," velocity search&constr. ", FLUID_CPL_VELOCITY_SEARCH,true);
00361 pl(os,"Output ", OUTPUT,true);
00362 pl(os,"Examined in detail 1 ", EXAMINE1,true);
00363 pl(os,"Examined in detail 2 ", EXAMINE2,true);
00364 pl(os,"Examined in detail 3 ", EXAMINE3,true);
00365 pl(os,"Examined in detail 4 ", EXAMINE4,true);
00366 pl(os,"Misc ", MISC);
00367 pl(os,"Whole time ", WHOLE);
00368 }
00369
00370 protected:
00374 template<typename T>
00375 inline T calcsum(typename std::vector<T> &data,
00376 int start, int end, T sum) {
00377 for (int i=start; i<=end; i++) sum += data[i];
00378 return sum;
00379 }
00380
00383 inline void pl(std::ostream& OS, const char *name,
00384 enum TimingAccounts ac, bool opt=false) {
00385 char str[500];
00386 if (print_local_times) {
00387 if (opt && times[ac]==0.) return;
00388 std::sprintf(str, " %s : %4.3fs (%3.2f%%) calls: %4ix",
00389 name,
00390 times[ac], (times[WHOLE]>0. ? 100.0*times[ac]/times[WHOLE] : 0.),
00391 calls[ac]
00392 );
00393 } else {
00394 if (opt && stat_times_avg[ac]==0.) return;
00395 std::sprintf(str, " %s : %4.3fs (%3.2f%%) balance: %2.3f diff: %4.2fs (%9.2fs - %4.2fs)",
00396 name,
00397 stat_times_avg[ac], (stat_times_avg[WHOLE]>0. ?
00398 100.0*stat_times_avg[ac]/stat_times_avg[WHOLE] : 0.),
00399 (stat_times_avg[ac]>0. ? stat_times_max[ac] / stat_times_avg[ac] : 1.),
00400 stat_times_max[ac] - stat_times_min[ac],
00401 stat_times_max[ac], stat_times_min[ac]
00402 );
00403 }
00404 OS << str << std::endl;
00405 }
00406
00407 inline void cl(std::ostream& OS, const char *name, enum CountingAccounts ac, bool opt=false) {
00408 if (opt && counts[ac]==0) return;
00409 OS << " " << name << " : " << counts[ac] << "x"
00410 << std::endl;
00411 }
00412
00413 public:
00414 Timing() {
00415 times.resize(_ACMAX, 0.0);
00416 calls.resize(_ACMAX, 0);
00417 counts.resize(_CCMAX, 0);
00418 timestack.resize(MAXRECURSIONS);
00419 }
00420
00421 protected:
00422 std::vector<double> times;
00423 std::vector<unsigned int> calls;
00424 std::vector<int> counts;
00425
00426 enum {MAXRECURSIONS=300};
00427 std::vector<double> timestack;
00428 double *tos;
00429 int ntos;
00430
00432 bool print_local_times;
00433 std::vector<double> stat_times_max;
00434 std::vector<double> stat_times_min;
00435 std::vector<double> stat_times_avg;
00436
00437 double start_time;
00438 };
00439
00440 #endif