Jim Cownie | 33f7b24 | 2014-04-09 15:40:23 +0000 | [diff] [blame] | 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is dual licensed under the MIT and the University of Illinois Open |
| 6 | // Source Licenses. See LICENSE.txt for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | |
| 11 | #include "offload_timer.h" |
| 12 | |
| 13 | #ifdef __INTEL_COMPILER |
| 14 | #include <ia32intrin.h> |
| 15 | #else // __INTEL_COMPILER |
| 16 | #include <x86intrin.h> |
| 17 | #endif // __INTEL_COMPILER |
| 18 | |
| 19 | #include "offload_host.h" |
| 20 | #include <sstream> |
| 21 | #include <iostream> |
| 22 | #include <iomanip> |
| 23 | |
| 24 | int timer_enabled = 0; |
| 25 | |
| 26 | #ifdef TIMING_SUPPORT |
| 27 | |
| 28 | int offload_report_level = 0; |
| 29 | int offload_report_enabled = 1; |
| 30 | |
| 31 | static const int host_timer_prefix_spaces[] = { |
| 32 | /*c_offload_host_setup_buffers*/ 0, |
| 33 | /*c_offload_host_initialize*/ 2, |
| 34 | /*c_offload_host_target_acquire*/ 2, |
| 35 | /*c_offload_host_wait_deps*/ 2, |
| 36 | /*c_offload_host_setup_buffers*/ 2, |
| 37 | /*c_offload_host_alloc_buffers*/ 4, |
| 38 | /*c_offload_host_setup_misc_data*/ 2, |
| 39 | /*c_offload_host_alloc_data_buffer*/ 4, |
| 40 | /*c_offload_host_send_pointers*/ 2, |
| 41 | /*c_offload_host_gather_inputs*/ 2, |
| 42 | /*c_offload_host_map_in_data_buffer*/ 4, |
| 43 | /*c_offload_host_unmap_in_data_buffer*/ 4, |
| 44 | /*c_offload_host_start_compute*/ 2, |
| 45 | /*c_offload_host_wait_compute*/ 2, |
| 46 | /*c_offload_host_start_buffers_reads*/ 2, |
| 47 | /*c_offload_host_scatter_outputs*/ 2, |
| 48 | /*c_offload_host_map_out_data_buffer*/ 4, |
| 49 | /*c_offload_host_unmap_out_data_buffer*/ 4, |
| 50 | /*c_offload_host_wait_buffers_reads*/ 2, |
| 51 | /*c_offload_host_destroy_buffers*/ 2 |
| 52 | }; |
| 53 | |
| 54 | const static int target_timer_prefix_spaces[] = { |
| 55 | /*c_offload_target_total_time*/ 0, |
| 56 | /*c_offload_target_descriptor_setup*/ 2, |
| 57 | /*c_offload_target_func_lookup*/ 2, |
| 58 | /*c_offload_target_func_time*/ 2, |
| 59 | /*c_offload_target_scatter_inputs*/ 4, |
| 60 | /*c_offload_target_add_buffer_refs*/ 6, |
| 61 | /*c_offload_target_compute*/ 4, |
| 62 | /*c_offload_target_gather_outputs*/ 4, |
| 63 | /*c_offload_target_release_buffer_refs*/ 6 |
| 64 | }; |
| 65 | |
| 66 | static OffloadHostTimerData* timer_data_head; |
| 67 | static OffloadHostTimerData* timer_data_tail; |
| 68 | static mutex_t timer_data_mutex; |
| 69 | |
| 70 | static void offload_host_phase_name(std::stringstream &ss, int p_node); |
| 71 | static void offload_target_phase_name(std::stringstream &ss, int p_node); |
| 72 | |
| 73 | extern void Offload_Timer_Print(void) |
| 74 | { |
| 75 | std::string buf; |
| 76 | std::stringstream ss; |
| 77 | const char *stars = |
| 78 | "**************************************************************"; |
| 79 | |
| 80 | ss << "\n\n" << stars << "\n"; |
| 81 | ss << " "; |
| 82 | ss << report_get_message_str(c_report_title) << "\n"; |
| 83 | ss << stars << "\n"; |
| 84 | double frequency = cpu_frequency; |
| 85 | |
| 86 | for (OffloadHostTimerData *pnode = timer_data_head; |
| 87 | pnode != 0; pnode = pnode->next) { |
| 88 | ss << " "; |
| 89 | ss << report_get_message_str(c_report_from_file) << " "<< pnode->file; |
| 90 | ss << report_get_message_str(c_report_line) << " " << pnode->line; |
| 91 | ss << "\n"; |
| 92 | for (int i = 0; i < c_offload_host_max_phase ; i++) { |
| 93 | ss << " "; |
| 94 | offload_host_phase_name(ss, i); |
| 95 | ss << " " << std::fixed << std::setprecision(5); |
| 96 | ss << (double)pnode->phases[i].total / frequency << "\n"; |
| 97 | } |
| 98 | |
| 99 | for (int i = 0; i < c_offload_target_max_phase ; i++) { |
| 100 | double time = 0; |
| 101 | if (pnode->target.frequency != 0) { |
| 102 | time = (double) pnode->target.phases[i].total / |
| 103 | (double) pnode->target.frequency; |
| 104 | } |
| 105 | ss << " "; |
| 106 | offload_target_phase_name(ss, i); |
| 107 | ss << " " << std::fixed << std::setprecision(5); |
| 108 | ss << time << "\n"; |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | buf = ss.str(); |
| 113 | fprintf(stdout, buf.data()); |
| 114 | fflush(stdout); |
| 115 | } |
| 116 | |
| 117 | extern void Offload_Report_Prolog(OffloadHostTimerData *pnode) |
| 118 | { |
| 119 | double frequency = cpu_frequency; |
| 120 | std::string buf; |
| 121 | std::stringstream ss; |
| 122 | |
| 123 | if (pnode) { |
| 124 | // [Offload] [Mic 0] [File] file.c |
| 125 | ss << "[" << report_get_message_str(c_report_offload) << "] ["; |
| 126 | ss << report_get_message_str(c_report_mic) << " "; |
| 127 | ss << pnode->card_number << "] ["; |
| 128 | ss << report_get_message_str(c_report_file); |
| 129 | ss << "] " << pnode->file << "\n"; |
| 130 | |
| 131 | // [Offload] [Mic 0] [Line] 1234 |
| 132 | ss << "[" << report_get_message_str(c_report_offload) << "] ["; |
| 133 | ss << report_get_message_str(c_report_mic) << " "; |
| 134 | ss << pnode->card_number << "] ["; |
| 135 | ss << report_get_message_str(c_report_line); |
| 136 | ss << "] " << pnode->line << "\n"; |
| 137 | |
| 138 | // [Offload] [Mic 0] [Tag] Tag 1 |
| 139 | ss << "[" << report_get_message_str(c_report_offload) << "] ["; |
| 140 | ss << report_get_message_str(c_report_mic) << " "; |
| 141 | ss << pnode->card_number << "] ["; |
| 142 | ss << report_get_message_str(c_report_tag); |
| 143 | ss << "] " << report_get_message_str(c_report_tag); |
| 144 | ss << " " << pnode->offload_number << "\n"; |
| 145 | |
| 146 | buf = ss.str(); |
| 147 | fprintf(stdout, buf.data()); |
| 148 | fflush(stdout); |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | extern void Offload_Report_Epilog(OffloadHostTimerData * timer_data) |
| 153 | { |
| 154 | double frequency = cpu_frequency; |
| 155 | std::string buf; |
| 156 | std::stringstream ss; |
| 157 | |
| 158 | OffloadHostTimerData *pnode = timer_data; |
| 159 | |
| 160 | if (!pnode) { |
| 161 | return; |
| 162 | } |
| 163 | ss << "[" << report_get_message_str(c_report_offload) << "] ["; |
| 164 | ss << report_get_message_str(c_report_host) << "] ["; |
| 165 | ss << report_get_message_str(c_report_tag) << " "; |
| 166 | ss << pnode->offload_number << "] ["; |
| 167 | ss << report_get_message_str(c_report_cpu_time) << "] "; |
| 168 | ss << std::fixed << std::setprecision(6); |
| 169 | ss << (double) pnode->phases[0].total / frequency; |
| 170 | ss << report_get_message_str(c_report_seconds) << "\n"; |
| 171 | |
| 172 | if (offload_report_level >= OFFLOAD_REPORT_2) { |
| 173 | ss << "[" << report_get_message_str(c_report_offload) << "] ["; |
| 174 | ss << report_get_message_str(c_report_mic); |
| 175 | ss << " " << pnode->card_number; |
| 176 | ss << "] [" << report_get_message_str(c_report_tag) << " "; |
| 177 | ss << pnode->offload_number << "] ["; |
| 178 | ss << report_get_message_str(c_report_cpu_to_mic_data) << "] "; |
| 179 | ss << pnode->sent_bytes << " "; |
| 180 | ss << report_get_message_str(c_report_bytes) << "\n"; |
| 181 | } |
| 182 | |
| 183 | double time = 0; |
| 184 | if (pnode->target.frequency != 0) { |
| 185 | time = (double) pnode->target.phases[0].total / |
| 186 | (double) pnode->target.frequency; |
| 187 | } |
| 188 | ss << "[" << report_get_message_str(c_report_offload) << "] ["; |
| 189 | ss << report_get_message_str(c_report_mic) << " "; |
| 190 | ss << pnode->card_number<< "] ["; |
| 191 | ss << report_get_message_str(c_report_tag) << " "; |
| 192 | ss << pnode->offload_number << "] ["; |
| 193 | ss << report_get_message_str(c_report_mic_time) << "] "; |
| 194 | ss << std::fixed << std::setprecision(6) << time; |
| 195 | ss << report_get_message_str(c_report_seconds) << "\n"; |
| 196 | |
| 197 | if (offload_report_level >= OFFLOAD_REPORT_2) { |
| 198 | ss << "[" << report_get_message_str(c_report_offload) << "] ["; |
| 199 | ss << report_get_message_str(c_report_mic); |
| 200 | ss << " " << pnode->card_number; |
| 201 | ss << "] [" << report_get_message_str(c_report_tag) << " "; |
| 202 | ss << pnode->offload_number << "] ["; |
| 203 | ss << report_get_message_str(c_report_mic_to_cpu_data) << "] "; |
| 204 | ss << pnode->received_bytes << " "; |
| 205 | ss << report_get_message_str(c_report_bytes) << "\n"; |
| 206 | } |
| 207 | ss << "\n"; |
| 208 | |
| 209 | buf = ss.str(); |
| 210 | fprintf(stdout, buf.data()); |
| 211 | fflush(stdout); |
| 212 | |
| 213 | offload_report_free_data(timer_data); |
| 214 | } |
| 215 | |
| 216 | extern void offload_report_free_data(OffloadHostTimerData * timer_data) |
| 217 | { |
| 218 | OffloadHostTimerData *pnode_last = NULL; |
| 219 | |
| 220 | for (OffloadHostTimerData *pnode = timer_data_head; |
| 221 | pnode != 0; pnode = pnode->next) { |
| 222 | if (timer_data == pnode) { |
| 223 | if (pnode_last) { |
| 224 | pnode_last->next = pnode->next; |
| 225 | } |
| 226 | else { |
| 227 | timer_data_head = pnode->next; |
| 228 | } |
| 229 | OFFLOAD_FREE(pnode); |
| 230 | break; |
| 231 | } |
| 232 | pnode_last = pnode; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | static void fill_buf_with_spaces(std::stringstream &ss, int num) |
| 237 | { |
| 238 | for (; num > 0; num--) { |
| 239 | ss << " "; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | static void offload_host_phase_name(std::stringstream &ss, int p_node) |
| 244 | { |
| 245 | int prefix_spaces; |
| 246 | int str_length; |
| 247 | int tail_length; |
| 248 | const int message_length = 40; |
| 249 | char const *str; |
| 250 | |
| 251 | str = report_get_host_stage_str(p_node); |
| 252 | prefix_spaces = host_timer_prefix_spaces[p_node]; |
| 253 | fill_buf_with_spaces(ss, prefix_spaces); |
| 254 | str_length = strlen(str); |
| 255 | ss << str; |
| 256 | tail_length = message_length - prefix_spaces - str_length; |
| 257 | tail_length = tail_length > 0? tail_length : 1; |
| 258 | fill_buf_with_spaces(ss, tail_length); |
| 259 | } |
| 260 | |
| 261 | static void offload_target_phase_name(std::stringstream &ss, int p_node) |
| 262 | { |
| 263 | int prefix_spaces; |
| 264 | int str_length; |
| 265 | const int message_length = 40; |
| 266 | int tail_length; |
| 267 | char const *str; |
| 268 | |
| 269 | str = report_get_target_stage_str(p_node); |
| 270 | prefix_spaces = target_timer_prefix_spaces[p_node]; |
| 271 | fill_buf_with_spaces(ss, prefix_spaces); |
| 272 | str_length = strlen(str); |
| 273 | ss << str; |
| 274 | tail_length = message_length - prefix_spaces - str_length; |
| 275 | tail_length = (tail_length > 0)? tail_length : 1; |
| 276 | fill_buf_with_spaces(ss, tail_length); |
| 277 | } |
| 278 | |
| 279 | void offload_timer_start(OffloadHostTimerData * timer_data, |
| 280 | OffloadHostPhase p_type) |
| 281 | { |
| 282 | timer_data->phases[p_type].start = _rdtsc(); |
| 283 | } |
| 284 | |
| 285 | void offload_timer_stop(OffloadHostTimerData * timer_data, |
| 286 | OffloadHostPhase p_type) |
| 287 | { |
| 288 | timer_data->phases[p_type].total += _rdtsc() - |
| 289 | timer_data->phases[p_type].start; |
| 290 | } |
| 291 | |
| 292 | void offload_timer_fill_target_data(OffloadHostTimerData * timer_data, |
| 293 | void *buf) |
| 294 | { |
| 295 | uint64_t *data = (uint64_t*) buf; |
| 296 | |
| 297 | timer_data->target.frequency = *data++; |
| 298 | for (int i = 0; i < c_offload_target_max_phase; i++) { |
| 299 | timer_data->target.phases[i].total = *data++; |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | void offload_timer_fill_host_sdata(OffloadHostTimerData * timer_data, |
| 304 | uint64_t sent_bytes) |
| 305 | { |
| 306 | if (timer_data) { |
| 307 | timer_data->sent_bytes += sent_bytes; |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | void offload_timer_fill_host_rdata(OffloadHostTimerData * timer_data, |
| 312 | uint64_t received_bytes) |
| 313 | { |
| 314 | if (timer_data) { |
| 315 | timer_data->received_bytes += received_bytes; |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | void offload_timer_fill_host_mic_num(OffloadHostTimerData * timer_data, |
| 320 | int card_number) |
| 321 | { |
| 322 | if (timer_data) { |
| 323 | timer_data->card_number = card_number; |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | OffloadHostTimerData* offload_timer_init(const char *file, int line) |
| 328 | { |
| 329 | static bool first_time = true; |
| 330 | OffloadHostTimerData* timer_data = NULL; |
| 331 | |
| 332 | timer_data_mutex.lock(); |
| 333 | { |
| 334 | if (timer_enabled || |
| 335 | (offload_report_level && offload_report_enabled)) { |
| 336 | timer_data = (OffloadHostTimerData*) |
| 337 | OFFLOAD_MALLOC(sizeof(OffloadHostTimerData), 0); |
| 338 | memset(timer_data, 0, sizeof(OffloadHostTimerData)); |
| 339 | |
| 340 | timer_data->offload_number = OFFLOAD_DEBUG_INCR_OFLD_NUM() - 1; |
| 341 | |
| 342 | if (timer_data_head == 0) { |
| 343 | timer_data_head = timer_data; |
| 344 | timer_data_tail = timer_data; |
| 345 | } |
| 346 | else { |
| 347 | timer_data_tail->next = timer_data; |
| 348 | timer_data_tail = timer_data; |
| 349 | } |
| 350 | |
| 351 | timer_data->file = file; |
| 352 | timer_data->line = line; |
| 353 | } |
| 354 | } |
| 355 | timer_data_mutex.unlock(); |
| 356 | return timer_data; |
| 357 | } |
| 358 | |
| 359 | #endif // TIMING_SUPPORT |