blob: 069b604e1ea5a7c40cd765efb6c6b8d04bb47a0a [file] [log] [blame]
Jim Cownie33f7b242014-04-09 15:40:23 +00001//===----------------------------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9
10
11#include "offload_engine.h"
12#include <signal.h>
13#include <errno.h>
14
15#include <algorithm>
16#include <vector>
17
18#include "offload_host.h"
19#include "offload_table.h"
20
21const char* Engine::m_func_names[Engine::c_funcs_total] =
22{
23 "server_compute",
24#ifdef MYO_SUPPORT
25 "server_myoinit",
26 "server_myofini",
27#endif // MYO_SUPPORT
28 "server_init",
29 "server_var_table_size",
30 "server_var_table_copy"
31};
32
33// Symbolic representation of system signals. Fix for CQ233593
34const char* Engine::c_signal_names[Engine::c_signal_max] =
35{
36 "Unknown SIGNAL",
37 "SIGHUP", /* 1, Hangup (POSIX). */
38 "SIGINT", /* 2, Interrupt (ANSI). */
39 "SIGQUIT", /* 3, Quit (POSIX). */
40 "SIGILL", /* 4, Illegal instruction (ANSI). */
41 "SIGTRAP", /* 5, Trace trap (POSIX). */
42 "SIGABRT", /* 6, Abort (ANSI). */
43 "SIGBUS", /* 7, BUS error (4.2 BSD). */
44 "SIGFPE", /* 8, Floating-point exception (ANSI). */
45 "SIGKILL", /* 9, Kill, unblockable (POSIX). */
46 "SIGUSR1", /* 10, User-defined signal 1 (POSIX). */
47 "SIGSEGV", /* 11, Segmentation violation (ANSI). */
48 "SIGUSR2", /* 12, User-defined signal 2 (POSIX). */
49 "SIGPIPE", /* 13, Broken pipe (POSIX). */
50 "SIGALRM", /* 14, Alarm clock (POSIX). */
51 "SIGTERM", /* 15, Termination (ANSI). */
52 "SIGSTKFLT", /* 16, Stack fault. */
53 "SIGCHLD", /* 17, Child status has changed (POSIX). */
54 "SIGCONT", /* 18, Continue (POSIX). */
55 "SIGSTOP", /* 19, Stop, unblockable (POSIX). */
56 "SIGTSTP", /* 20, Keyboard stop (POSIX). */
57 "SIGTTIN", /* 21, Background read from tty (POSIX). */
58 "SIGTTOU", /* 22, Background write to tty (POSIX). */
59 "SIGURG", /* 23, Urgent condition on socket (4.2 BSD). */
60 "SIGXCPU", /* 24, CPU limit exceeded (4.2 BSD). */
61 "SIGXFSZ", /* 25, File size limit exceeded (4.2 BSD). */
62 "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD). */
63 "SIGPROF", /* 27, Profiling alarm clock (4.2 BSD). */
64 "SIGWINCH", /* 28, Window size change (4.3 BSD, Sun). */
65 "SIGIO", /* 29, I/O now possible (4.2 BSD). */
66 "SIGPWR", /* 30, Power failure restart (System V). */
67 "SIGSYS" /* 31, Bad system call. */
68};
69
70void Engine::init(void)
71{
72 if (!m_ready) {
73 mutex_locker_t locker(m_lock);
74
75 if (!m_ready) {
76 // start process if not done yet
77 if (m_process == 0) {
78 init_process();
79 }
80
81 // load penging images
82 load_libraries();
83
84 // and (re)build pointer table
85 init_ptr_data();
86
87 // it is ready now
88 m_ready = true;
89 }
90 }
91}
92
93void Engine::init_process(void)
94{
95 COIENGINE engine;
96 COIRESULT res;
97 const char **environ;
98
99 // create environment for the target process
100 environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
101 if (environ != 0) {
102 for (const char **p = environ; *p != 0; p++) {
103 OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
104 }
105 }
106
107 // Create execution context in the specified device
108 OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
109 m_physical_index);
110 res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
111 check_result(res, c_get_engine_handle, m_index, res);
112
113 // Target executable should be available by the time when we
114 // attempt to initialize the device
115 if (__target_exe == 0) {
116 LIBOFFLOAD_ERROR(c_no_target_exe);
117 exit(1);
118 }
119
120 OFFLOAD_DEBUG_TRACE(2,
121 "Loading target executable \"%s\" from %p, size %lld\n",
122 __target_exe->name, __target_exe->data, __target_exe->size);
123
124 res = COI::ProcessCreateFromMemory(
125 engine, // in_Engine
126 __target_exe->name, // in_pBinaryName
127 __target_exe->data, // in_pBinaryBuffer
128 __target_exe->size, // in_BinaryBufferLength,
129 0, // in_Argc
130 0, // in_ppArgv
131 environ == 0, // in_DupEnv
132 environ, // in_ppAdditionalEnv
133 mic_proxy_io, // in_ProxyActive
134 mic_proxy_fs_root, // in_ProxyfsRoot
135 mic_buffer_size, // in_BufferSpace
136 mic_library_path, // in_LibrarySearchPath
137 __target_exe->origin, // in_FileOfOrigin
138 __target_exe->offset, // in_FileOfOriginOffset
139 &m_process // out_pProcess
140 );
141 check_result(res, c_process_create, m_index, res);
142
143 // get function handles
144 res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
145 m_func_names, m_funcs);
146 check_result(res, c_process_get_func_handles, m_index, res);
147
148 // initialize device side
149 pid_t pid = init_device();
150
151 // For IDB
152 if (__dbg_is_attached) {
153 // TODO: we have in-memory executable now.
154 // Check with IDB team what should we provide them now?
155 if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
156 strcpy(__dbg_target_exe_name, __target_exe->name);
157 }
158 __dbg_target_so_pid = pid;
159 __dbg_target_id = m_physical_index;
160 __dbg_target_so_loaded();
161 }
162}
163
164void Engine::fini_process(bool verbose)
165{
166 if (m_process != 0) {
167 uint32_t sig;
168 int8_t ret;
169
170 // destroy target process
171 OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
172 m_index);
173
174 COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
175 m_process = 0;
176
177 if (res == COI_SUCCESS) {
178 OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
179 sig, ret);
180 if (verbose) {
181 if (sig != 0) {
182 LIBOFFLOAD_ERROR(
183 c_mic_process_exit_sig, m_index, sig,
184 c_signal_names[sig >= c_signal_max ? 0 : sig]);
185 }
186 else {
187 LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
188 }
189 }
190
191 // for idb
192 if (__dbg_is_attached) {
193 __dbg_target_so_unloaded();
194 }
195 }
196 else {
197 if (verbose) {
198 LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
199 }
200 }
201 }
202}
203
204void Engine::load_libraries()
205{
206 // load libraries collected so far
207 for (TargetImageList::iterator it = m_images.begin();
208 it != m_images.end(); it++) {
209 OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n",
210 it->name, it->data, it->size);
211
212 // load library to the device
213 COILIBRARY lib;
214 COIRESULT res;
215 res = COI::ProcessLoadLibraryFromMemory(m_process,
216 it->data,
217 it->size,
218 it->name,
219 mic_library_path,
220 it->origin,
221 it->offset,
222 COI_LOADLIBRARY_V1_FLAGS,
223 &lib);
224
225 if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
226 check_result(res, c_load_library, m_index, res);
227 }
228 }
229 m_images.clear();
230}
231
232static bool target_entry_cmp(
233 const VarList::BufEntry &l,
234 const VarList::BufEntry &r
235)
236{
237 const char *l_name = reinterpret_cast<const char*>(l.name);
238 const char *r_name = reinterpret_cast<const char*>(r.name);
239 return strcmp(l_name, r_name) < 0;
240}
241
242static bool host_entry_cmp(
243 const VarTable::Entry *l,
244 const VarTable::Entry *r
245)
246{
247 return strcmp(l->name, r->name) < 0;
248}
249
250void Engine::init_ptr_data(void)
251{
252 COIRESULT res;
253 COIEVENT event;
254
255 // Prepare table of host entries
256 std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(),
257 __offload_vars.end());
258
259 // no need to do anything further is host table is empty
260 if (host_table.size() <= 0) {
261 return;
262 }
263
264 // Get var table entries from the target.
265 // First we need to get size for the buffer to copy data
266 struct {
267 int64_t nelems;
268 int64_t length;
269 } params;
270
271 res = COI::PipelineRunFunction(get_pipeline(),
272 m_funcs[c_func_var_table_size],
273 0, 0, 0,
274 0, 0,
275 0, 0,
276 &params, sizeof(params),
277 &event);
278 check_result(res, c_pipeline_run_func, m_index, res);
279
280 res = COI::EventWait(1, &event, -1, 1, 0, 0);
281 check_result(res, c_event_wait, res);
282
283 if (params.length == 0) {
284 return;
285 }
286
287 // create buffer for target entries and copy data to host
288 COIBUFFER buffer;
289 res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1,
290 &m_process, &buffer);
291 check_result(res, c_buf_create, m_index, res);
292
293 COI_ACCESS_FLAGS flags = COI_SINK_WRITE;
294 res = COI::PipelineRunFunction(get_pipeline(),
295 m_funcs[c_func_var_table_copy],
296 1, &buffer, &flags,
297 0, 0,
298 &params.nelems, sizeof(params.nelems),
299 0, 0,
300 &event);
301 check_result(res, c_pipeline_run_func, m_index, res);
302
303 res = COI::EventWait(1, &event, -1, 1, 0, 0);
304 check_result(res, c_event_wait, res);
305
306 // patch names in target data
307 VarList::BufEntry *target_table;
308 COIMAPINSTANCE map_inst;
309 res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0,
310 0, &map_inst,
311 reinterpret_cast<void**>(&target_table));
312 check_result(res, c_buf_map, res);
313
314 VarList::table_patch_names(target_table, params.nelems);
315
316 // and sort entries
317 std::sort(target_table, target_table + params.nelems, target_entry_cmp);
318 std::sort(host_table.begin(), host_table.end(), host_entry_cmp);
319
320 // merge host and target entries and enter matching vars map
321 std::vector<const VarTable::Entry*>::const_iterator hi =
322 host_table.begin();
323 std::vector<const VarTable::Entry*>::const_iterator he =
324 host_table.end();
325 const VarList::BufEntry *ti = target_table;
326 const VarList::BufEntry *te = target_table + params.nelems;
327
328 while (hi != he && ti != te) {
329 int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
330 if (res == 0) {
331 // add matching entry to var map
332 std::pair<PtrSet::iterator, bool> res =
333 m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size));
334
335 // store address for new entries
336 if (res.second) {
337 PtrData *ptr = const_cast<PtrData*>(res.first.operator->());
338 ptr->mic_addr = ti->addr;
339 ptr->is_static = true;
340 }
341
342 hi++;
343 ti++;
344 }
345 else if (res < 0) {
346 hi++;
347 }
348 else {
349 ti++;
350 }
351 }
352
353 // cleanup
354 res = COI::BufferUnmap(map_inst, 0, 0, 0);
355 check_result(res, c_buf_unmap, res);
356
357 res = COI::BufferDestroy(buffer);
358 check_result(res, c_buf_destroy, res);
359}
360
361COIRESULT Engine::compute(
362 const std::list<COIBUFFER> &buffers,
363 const void* data,
364 uint16_t data_size,
365 void* ret,
366 uint16_t ret_size,
367 uint32_t num_deps,
368 const COIEVENT* deps,
369 COIEVENT* event
370) /* const */
371{
372 COIBUFFER *bufs;
373 COI_ACCESS_FLAGS *flags;
374 COIRESULT res;
375
376 // convert buffers list to array
377 int num_bufs = buffers.size();
378 if (num_bufs > 0) {
379 bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER));
380 flags = (COI_ACCESS_FLAGS*) alloca(num_bufs *
381 sizeof(COI_ACCESS_FLAGS));
382
383 int i = 0;
384 for (std::list<COIBUFFER>::const_iterator it = buffers.begin();
385 it != buffers.end(); it++) {
386 bufs[i] = *it;
387
388 // TODO: this should be fixed
389 flags[i++] = COI_SINK_WRITE;
390 }
391 }
392 else {
393 bufs = 0;
394 flags = 0;
395 }
396
397 // start computation
398 res = COI::PipelineRunFunction(get_pipeline(),
399 m_funcs[c_func_compute],
400 num_bufs, bufs, flags,
401 num_deps, deps,
402 data, data_size,
403 ret, ret_size,
404 event);
405 return res;
406}
407
408pid_t Engine::init_device(void)
409{
410 struct init_data {
411 int device_index;
412 int devices_total;
413 int console_level;
414 int offload_report_level;
415 } data;
416 COIRESULT res;
417 COIEVENT event;
418 pid_t pid;
419
420 OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init,
421 "Initializing device with logical index %d "
422 "and physical index %d\n",
423 m_index, m_physical_index);
424
425 // setup misc data
426 data.device_index = m_index;
427 data.devices_total = mic_engines_total;
428 data.console_level = console_enabled;
429 data.offload_report_level = offload_report_level;
430
431 res = COI::PipelineRunFunction(get_pipeline(),
432 m_funcs[c_func_init],
433 0, 0, 0, 0, 0,
434 &data, sizeof(data),
435 &pid, sizeof(pid),
436 &event);
437 check_result(res, c_pipeline_run_func, m_index, res);
438
439 res = COI::EventWait(1, &event, -1, 1, 0, 0);
440 check_result(res, c_event_wait, res);
441
442 OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid);
443
444 return pid;
445}
446
447// data associated with each thread
448struct Thread {
449 Thread(long* addr_coipipe_counter) {
450 m_addr_coipipe_counter = addr_coipipe_counter;
451 memset(m_pipelines, 0, sizeof(m_pipelines));
452 }
453
454 ~Thread() {
455#ifndef TARGET_WINNT
456 __sync_sub_and_fetch(m_addr_coipipe_counter, 1);
457#else // TARGET_WINNT
458 _InterlockedDecrement(m_addr_coipipe_counter);
459#endif // TARGET_WINNT
460 for (int i = 0; i < mic_engines_total; i++) {
461 if (m_pipelines[i] != 0) {
462 COI::PipelineDestroy(m_pipelines[i]);
463 }
464 }
465 }
466
467 COIPIPELINE get_pipeline(int index) const {
468 return m_pipelines[index];
469 }
470
471 void set_pipeline(int index, COIPIPELINE pipeline) {
472 m_pipelines[index] = pipeline;
473 }
474
475 AutoSet& get_auto_vars() {
476 return m_auto_vars;
477 }
478
479private:
480 long* m_addr_coipipe_counter;
481 AutoSet m_auto_vars;
482 COIPIPELINE m_pipelines[MIC_ENGINES_MAX];
483};
484
485COIPIPELINE Engine::get_pipeline(void)
486{
487 Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
488 if (thread == 0) {
489 thread = new Thread(&m_proc_number);
490 thread_setspecific(mic_thread_key, thread);
491 }
492
493 COIPIPELINE pipeline = thread->get_pipeline(m_index);
494 if (pipeline == 0) {
495 COIRESULT res;
496 int proc_num;
497
498#ifndef TARGET_WINNT
499 proc_num = __sync_fetch_and_add(&m_proc_number, 1);
500#else // TARGET_WINNT
501 proc_num = _InterlockedIncrement(&m_proc_number);
502#endif // TARGET_WINNT
503
504 if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
505 LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
506 LIBOFFLOAD_ABORT;
507 }
508 // create pipeline for this thread
509 res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
510 check_result(res, c_pipeline_create, m_index, res);
511
512 thread->set_pipeline(m_index, pipeline);
513 }
514 return pipeline;
515}
516
517AutoSet& Engine::get_auto_vars(void)
518{
519 Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
520 if (thread == 0) {
521 thread = new Thread(&m_proc_number);
522 thread_setspecific(mic_thread_key, thread);
523 }
524
525 return thread->get_auto_vars();
526}
527
528void Engine::destroy_thread_data(void *data)
529{
530 delete static_cast<Thread*>(data);
531}