blob: fb27db080c9fec9c41e79995fee108a7c0068eee [file] [log] [blame]
Jim Cownie33f7b242014-04-09 15:40:23 +00001//===----------------------------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9
10
11#include "offload_timer.h"
12
13#ifdef __INTEL_COMPILER
14#include <ia32intrin.h>
15#else // __INTEL_COMPILER
16#include <x86intrin.h>
17#endif // __INTEL_COMPILER
18
19#include "offload_host.h"
20#include <sstream>
21#include <iostream>
22#include <iomanip>
23
24int timer_enabled = 0;
25
26#ifdef TIMING_SUPPORT
27
28int offload_report_level = 0;
29int offload_report_enabled = 1;
30
31static const int host_timer_prefix_spaces[] = {
32 /*c_offload_host_setup_buffers*/ 0,
33 /*c_offload_host_initialize*/ 2,
34 /*c_offload_host_target_acquire*/ 2,
35 /*c_offload_host_wait_deps*/ 2,
36 /*c_offload_host_setup_buffers*/ 2,
37 /*c_offload_host_alloc_buffers*/ 4,
38 /*c_offload_host_setup_misc_data*/ 2,
39 /*c_offload_host_alloc_data_buffer*/ 4,
40 /*c_offload_host_send_pointers*/ 2,
41 /*c_offload_host_gather_inputs*/ 2,
42 /*c_offload_host_map_in_data_buffer*/ 4,
43 /*c_offload_host_unmap_in_data_buffer*/ 4,
44 /*c_offload_host_start_compute*/ 2,
45 /*c_offload_host_wait_compute*/ 2,
46 /*c_offload_host_start_buffers_reads*/ 2,
47 /*c_offload_host_scatter_outputs*/ 2,
48 /*c_offload_host_map_out_data_buffer*/ 4,
49 /*c_offload_host_unmap_out_data_buffer*/ 4,
50 /*c_offload_host_wait_buffers_reads*/ 2,
51 /*c_offload_host_destroy_buffers*/ 2
52};
53
54const static int target_timer_prefix_spaces[] = {
55/*c_offload_target_total_time*/ 0,
56/*c_offload_target_descriptor_setup*/ 2,
57/*c_offload_target_func_lookup*/ 2,
58/*c_offload_target_func_time*/ 2,
59/*c_offload_target_scatter_inputs*/ 4,
60/*c_offload_target_add_buffer_refs*/ 6,
61/*c_offload_target_compute*/ 4,
62/*c_offload_target_gather_outputs*/ 4,
63/*c_offload_target_release_buffer_refs*/ 6
64};
65
66static OffloadHostTimerData* timer_data_head;
67static OffloadHostTimerData* timer_data_tail;
68static mutex_t timer_data_mutex;
69
70static void offload_host_phase_name(std::stringstream &ss, int p_node);
71static void offload_target_phase_name(std::stringstream &ss, int p_node);
72
73extern void Offload_Timer_Print(void)
74{
75 std::string buf;
76 std::stringstream ss;
77 const char *stars =
78 "**************************************************************";
79
80 ss << "\n\n" << stars << "\n";
81 ss << " ";
82 ss << report_get_message_str(c_report_title) << "\n";
83 ss << stars << "\n";
84 double frequency = cpu_frequency;
85
86 for (OffloadHostTimerData *pnode = timer_data_head;
87 pnode != 0; pnode = pnode->next) {
88 ss << " ";
89 ss << report_get_message_str(c_report_from_file) << " "<< pnode->file;
90 ss << report_get_message_str(c_report_line) << " " << pnode->line;
91 ss << "\n";
92 for (int i = 0; i < c_offload_host_max_phase ; i++) {
93 ss << " ";
94 offload_host_phase_name(ss, i);
95 ss << " " << std::fixed << std::setprecision(5);
96 ss << (double)pnode->phases[i].total / frequency << "\n";
97 }
98
99 for (int i = 0; i < c_offload_target_max_phase ; i++) {
100 double time = 0;
101 if (pnode->target.frequency != 0) {
102 time = (double) pnode->target.phases[i].total /
103 (double) pnode->target.frequency;
104 }
105 ss << " ";
106 offload_target_phase_name(ss, i);
107 ss << " " << std::fixed << std::setprecision(5);
108 ss << time << "\n";
109 }
110 }
111
112 buf = ss.str();
113 fprintf(stdout, buf.data());
114 fflush(stdout);
115}
116
117extern void Offload_Report_Prolog(OffloadHostTimerData *pnode)
118{
119 double frequency = cpu_frequency;
120 std::string buf;
121 std::stringstream ss;
122
123 if (pnode) {
124 // [Offload] [Mic 0] [File] file.c
125 ss << "[" << report_get_message_str(c_report_offload) << "] [";
126 ss << report_get_message_str(c_report_mic) << " ";
127 ss << pnode->card_number << "] [";
128 ss << report_get_message_str(c_report_file);
129 ss << "] " << pnode->file << "\n";
130
131 // [Offload] [Mic 0] [Line] 1234
132 ss << "[" << report_get_message_str(c_report_offload) << "] [";
133 ss << report_get_message_str(c_report_mic) << " ";
134 ss << pnode->card_number << "] [";
135 ss << report_get_message_str(c_report_line);
136 ss << "] " << pnode->line << "\n";
137
138 // [Offload] [Mic 0] [Tag] Tag 1
139 ss << "[" << report_get_message_str(c_report_offload) << "] [";
140 ss << report_get_message_str(c_report_mic) << " ";
141 ss << pnode->card_number << "] [";
142 ss << report_get_message_str(c_report_tag);
143 ss << "] " << report_get_message_str(c_report_tag);
144 ss << " " << pnode->offload_number << "\n";
145
146 buf = ss.str();
147 fprintf(stdout, buf.data());
148 fflush(stdout);
149 }
150}
151
152extern void Offload_Report_Epilog(OffloadHostTimerData * timer_data)
153{
154 double frequency = cpu_frequency;
155 std::string buf;
156 std::stringstream ss;
157
158 OffloadHostTimerData *pnode = timer_data;
159
160 if (!pnode) {
161 return;
162 }
163 ss << "[" << report_get_message_str(c_report_offload) << "] [";
164 ss << report_get_message_str(c_report_host) << "] [";
165 ss << report_get_message_str(c_report_tag) << " ";
166 ss << pnode->offload_number << "] [";
167 ss << report_get_message_str(c_report_cpu_time) << "] ";
168 ss << std::fixed << std::setprecision(6);
169 ss << (double) pnode->phases[0].total / frequency;
170 ss << report_get_message_str(c_report_seconds) << "\n";
171
172 if (offload_report_level >= OFFLOAD_REPORT_2) {
173 ss << "[" << report_get_message_str(c_report_offload) << "] [";
174 ss << report_get_message_str(c_report_mic);
175 ss << " " << pnode->card_number;
176 ss << "] [" << report_get_message_str(c_report_tag) << " ";
177 ss << pnode->offload_number << "] [";
178 ss << report_get_message_str(c_report_cpu_to_mic_data) << "] ";
179 ss << pnode->sent_bytes << " ";
180 ss << report_get_message_str(c_report_bytes) << "\n";
181 }
182
183 double time = 0;
184 if (pnode->target.frequency != 0) {
185 time = (double) pnode->target.phases[0].total /
186 (double) pnode->target.frequency;
187 }
188 ss << "[" << report_get_message_str(c_report_offload) << "] [";
189 ss << report_get_message_str(c_report_mic) << " ";
190 ss << pnode->card_number<< "] [";
191 ss << report_get_message_str(c_report_tag) << " ";
192 ss << pnode->offload_number << "] [";
193 ss << report_get_message_str(c_report_mic_time) << "] ";
194 ss << std::fixed << std::setprecision(6) << time;
195 ss << report_get_message_str(c_report_seconds) << "\n";
196
197 if (offload_report_level >= OFFLOAD_REPORT_2) {
198 ss << "[" << report_get_message_str(c_report_offload) << "] [";
199 ss << report_get_message_str(c_report_mic);
200 ss << " " << pnode->card_number;
201 ss << "] [" << report_get_message_str(c_report_tag) << " ";
202 ss << pnode->offload_number << "] [";
203 ss << report_get_message_str(c_report_mic_to_cpu_data) << "] ";
204 ss << pnode->received_bytes << " ";
205 ss << report_get_message_str(c_report_bytes) << "\n";
206 }
207 ss << "\n";
208
209 buf = ss.str();
210 fprintf(stdout, buf.data());
211 fflush(stdout);
212
213 offload_report_free_data(timer_data);
214}
215
216extern void offload_report_free_data(OffloadHostTimerData * timer_data)
217{
218 OffloadHostTimerData *pnode_last = NULL;
219
220 for (OffloadHostTimerData *pnode = timer_data_head;
221 pnode != 0; pnode = pnode->next) {
222 if (timer_data == pnode) {
223 if (pnode_last) {
224 pnode_last->next = pnode->next;
225 }
226 else {
227 timer_data_head = pnode->next;
228 }
229 OFFLOAD_FREE(pnode);
230 break;
231 }
232 pnode_last = pnode;
233 }
234}
235
236static void fill_buf_with_spaces(std::stringstream &ss, int num)
237{
238 for (; num > 0; num--) {
239 ss << " ";
240 }
241}
242
243static void offload_host_phase_name(std::stringstream &ss, int p_node)
244{
245 int prefix_spaces;
246 int str_length;
247 int tail_length;
248 const int message_length = 40;
249 char const *str;
250
251 str = report_get_host_stage_str(p_node);
252 prefix_spaces = host_timer_prefix_spaces[p_node];
253 fill_buf_with_spaces(ss, prefix_spaces);
254 str_length = strlen(str);
255 ss << str;
256 tail_length = message_length - prefix_spaces - str_length;
257 tail_length = tail_length > 0? tail_length : 1;
258 fill_buf_with_spaces(ss, tail_length);
259}
260
261static void offload_target_phase_name(std::stringstream &ss, int p_node)
262{
263 int prefix_spaces;
264 int str_length;
265 const int message_length = 40;
266 int tail_length;
267 char const *str;
268
269 str = report_get_target_stage_str(p_node);
270 prefix_spaces = target_timer_prefix_spaces[p_node];
271 fill_buf_with_spaces(ss, prefix_spaces);
272 str_length = strlen(str);
273 ss << str;
274 tail_length = message_length - prefix_spaces - str_length;
275 tail_length = (tail_length > 0)? tail_length : 1;
276 fill_buf_with_spaces(ss, tail_length);
277}
278
279void offload_timer_start(OffloadHostTimerData * timer_data,
280 OffloadHostPhase p_type)
281{
282 timer_data->phases[p_type].start = _rdtsc();
283}
284
285void offload_timer_stop(OffloadHostTimerData * timer_data,
286 OffloadHostPhase p_type)
287{
288 timer_data->phases[p_type].total += _rdtsc() -
289 timer_data->phases[p_type].start;
290}
291
292void offload_timer_fill_target_data(OffloadHostTimerData * timer_data,
293 void *buf)
294{
295 uint64_t *data = (uint64_t*) buf;
296
297 timer_data->target.frequency = *data++;
298 for (int i = 0; i < c_offload_target_max_phase; i++) {
299 timer_data->target.phases[i].total = *data++;
300 }
301}
302
303void offload_timer_fill_host_sdata(OffloadHostTimerData * timer_data,
304 uint64_t sent_bytes)
305{
306 if (timer_data) {
307 timer_data->sent_bytes += sent_bytes;
308 }
309}
310
311void offload_timer_fill_host_rdata(OffloadHostTimerData * timer_data,
312 uint64_t received_bytes)
313{
314 if (timer_data) {
315 timer_data->received_bytes += received_bytes;
316 }
317}
318
319void offload_timer_fill_host_mic_num(OffloadHostTimerData * timer_data,
320 int card_number)
321{
322 if (timer_data) {
323 timer_data->card_number = card_number;
324 }
325}
326
327OffloadHostTimerData* offload_timer_init(const char *file, int line)
328{
329 static bool first_time = true;
330 OffloadHostTimerData* timer_data = NULL;
331
332 timer_data_mutex.lock();
333 {
334 if (timer_enabled ||
335 (offload_report_level && offload_report_enabled)) {
336 timer_data = (OffloadHostTimerData*)
337 OFFLOAD_MALLOC(sizeof(OffloadHostTimerData), 0);
338 memset(timer_data, 0, sizeof(OffloadHostTimerData));
339
340 timer_data->offload_number = OFFLOAD_DEBUG_INCR_OFLD_NUM() - 1;
341
342 if (timer_data_head == 0) {
343 timer_data_head = timer_data;
344 timer_data_tail = timer_data;
345 }
346 else {
347 timer_data_tail->next = timer_data;
348 timer_data_tail = timer_data;
349 }
350
351 timer_data->file = file;
352 timer_data->line = line;
353 }
354 }
355 timer_data_mutex.unlock();
356 return timer_data;
357}
358
359#endif // TIMING_SUPPORT