blob: 38d5139212be529df451f6f32ea0745c26e91f74 [file] [log] [blame]
Jim Cownie33f7b242014-04-09 15:40:23 +00001//===----------------------------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9
10
11// Forward declaration as the following 2 functions are declared as friend in offload_engine.h
12// CLANG does not like static to been after friend declaration.
13static void __offload_init_library_once(void);
14static void __offload_fini_library(void);
15
16#include "offload_host.h"
17#ifdef MYO_SUPPORT
18#include "offload_myo_host.h"
19#endif
20
21#include <malloc.h>
22#ifndef TARGET_WINNT
23#include <alloca.h>
24#include <elf.h>
25#endif // TARGET_WINNT
26#include <errno.h>
27#include <fcntl.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/stat.h>
31#include <sys/types.h>
32#include <sys/stat.h>
33
34#include <algorithm>
35#include <bitset>
36
37#if defined(HOST_WINNT)
38#define PATH_SEPARATOR ";"
39#else
40#define PATH_SEPARATOR ":"
41#endif
42
43#define GET_OFFLOAD_NUMBER(timer_data) \
44 timer_data? timer_data->offload_number : 0
45
46#ifdef TARGET_WINNT
47// Small subset of ELF declarations for Windows which is needed to compile
48// this file. ELF header is used to understand what binary type is contained
49// in the target image - shared library or executable.
50
51typedef uint16_t Elf64_Half;
52typedef uint32_t Elf64_Word;
53typedef uint64_t Elf64_Addr;
54typedef uint64_t Elf64_Off;
55
56#define EI_NIDENT 16
57
58#define ET_EXEC 2
59#define ET_DYN 3
60
61typedef struct
62{
63 unsigned char e_ident[EI_NIDENT];
64 Elf64_Half e_type;
65 Elf64_Half e_machine;
66 Elf64_Word e_version;
67 Elf64_Addr e_entry;
68 Elf64_Off e_phoff;
69 Elf64_Off e_shoff;
70 Elf64_Word e_flags;
71 Elf64_Half e_ehsize;
72 Elf64_Half e_phentsize;
73 Elf64_Half e_phnum;
74 Elf64_Half e_shentsize;
75 Elf64_Half e_shnum;
76 Elf64_Half e_shstrndx;
77} Elf64_Ehdr;
78#endif // TARGET_WINNT
79
80// Host console and file logging
81const char *prefix;
82int console_enabled = 0;
83int offload_number = 0;
84
85static const char *htrace_envname = "H_TRACE";
86static const char *offload_report_envname = "OFFLOAD_REPORT";
87static char *timer_envname = "H_TIME";
88
89// Trace information
90static const char* vardesc_direction_as_string[] = {
91 "NOCOPY",
92 "IN",
93 "OUT",
94 "INOUT"
95};
96static const char* vardesc_type_as_string[] = {
97 "unknown",
98 "data",
99 "data_ptr",
100 "func_ptr",
101 "void_ptr",
102 "string_ptr",
103 "dv",
104 "dv_data",
105 "dv_data_slice",
106 "dv_ptr",
107 "dv_ptr_data",
108 "dv_ptr_data_slice",
109 "cean_var",
110 "cean_var_ptr",
111 "c_data_ptr_array",
112 "c_func_ptr_array",
113 "c_void_ptr_array",
114 "c_string_ptr_array"
115};
116
117Engine* mic_engines = 0;
118uint32_t mic_engines_total = 0;
119pthread_key_t mic_thread_key;
120MicEnvVar mic_env_vars;
121uint64_t cpu_frequency = 0;
122
123// MIC_STACKSIZE
124uint32_t mic_stack_size = 12 * 1024 * 1024;
125
126// MIC_BUFFERSIZE
127uint64_t mic_buffer_size = 0;
128
129// MIC_LD_LIBRARY_PATH
130char* mic_library_path = 0;
131
132// MIC_PROXY_IO
133bool mic_proxy_io = true;
134
135// MIC_PROXY_FS_ROOT
136char* mic_proxy_fs_root = 0;
137
138// Threshold for creating buffers with large pages. Buffer is created
139// with large pages hint if its size exceeds the threshold value.
140// By default large pages are disabled right now (by setting default
141// value for threshold to MAX) due to HSD 4114629.
142uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL;
143static const char *mic_use_2mb_buffers_envname =
144 "MIC_USE_2MB_BUFFERS";
145
146static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024;
147static const char *mic_use_async_buffer_write_envname =
148 "MIC_USE_ASYNC_BUFFER_WRITE";
149
150static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024;
151static const char *mic_use_async_buffer_read_envname =
152 "MIC_USE_ASYNC_BUFFER_READ";
153
154// device initialization type
155OffloadInitType __offload_init_type = c_init_on_offload_all;
156static const char *offload_init_envname = "OFFLOAD_INIT";
157
158// active wait
159static bool __offload_active_wait = true;
160static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
161
162// OMP_DEFAULT_DEVICE
163int __omp_device_num = 0;
164static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
165
166// The list of pending target libraries
167static bool __target_libs;
168static TargetImageList __target_libs_list;
169static mutex_t __target_libs_lock;
170static mutex_t stack_alloc_lock;
171
172// Target executable
173TargetImage* __target_exe;
174
175static char * offload_get_src_base(void * ptr, uint8_t type)
176{
177 char *base;
178 if (VAR_TYPE_IS_PTR(type)) {
179 base = *static_cast<char**>(ptr);
180 }
181 else if (VAR_TYPE_IS_SCALAR(type)) {
182 base = static_cast<char*>(ptr);
183 }
184 else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
185 ArrDesc *dvp;
186 if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
187 const arr_desc *ap = static_cast<const arr_desc*>(ptr);
188 dvp = (type == c_dv_data_slice) ?
189 reinterpret_cast<ArrDesc*>(ap->base) :
190 *reinterpret_cast<ArrDesc**>(ap->base);
191 }
192 else {
193 dvp = (type == c_dv_data) ?
194 static_cast<ArrDesc*>(ptr) :
195 *static_cast<ArrDesc**>(ptr);
196 }
197 base = reinterpret_cast<char*>(dvp->Base);
198 }
199 else {
200 base = NULL;
201 }
202 return base;
203}
204
205void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res)
206{
207 // special case for the 'process died' error
208 if (res == COI_PROCESS_DIED) {
209 m_device.fini_process(true);
210 }
211 else {
212 switch (msg) {
213 case c_buf_create:
214 if (res == COI_OUT_OF_MEMORY) {
215 msg = c_buf_create_out_of_mem;
216 }
217 /* fallthru */
218
219 case c_buf_create_from_mem:
220 case c_buf_get_address:
221 case c_pipeline_create:
222 case c_pipeline_run_func:
223 LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res);
224 break;
225
226 case c_buf_read:
227 case c_buf_write:
228 case c_buf_copy:
229 case c_buf_map:
230 case c_buf_unmap:
231 case c_buf_destroy:
232 case c_buf_set_state:
233 LIBOFFLOAD_ERROR(msg, res);
234 break;
235
236 default:
237 break;
238 }
239 }
240
241 exit(1);
242}
243
244_Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
245{
246 switch (res) {
247 case COI_SUCCESS:
248 return OFFLOAD_SUCCESS;
249
250 case COI_PROCESS_DIED:
251 return OFFLOAD_PROCESS_DIED;
252
253 case COI_OUT_OF_MEMORY:
254 return OFFLOAD_OUT_OF_MEMORY;
255
256 default:
257 return OFFLOAD_ERROR;
258 }
259}
260
261bool OffloadDescriptor::alloc_ptr_data(
262 PtrData* &ptr_data,
263 void *base,
264 int64_t disp,
265 int64_t size,
266 int64_t alloc_disp,
267 int align
268)
269{
270 // total length of base
271 int64_t length = disp + size;
272 bool is_new;
273
274 OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
275 base, length);
276
277 // add new entry
278 ptr_data = m_device.insert_ptr_data(base, length, is_new);
279 if (is_new) {
280
281 OFFLOAD_TRACE(3, "Added new association\n");
282
283 if (length > 0) {
284 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
285 COIRESULT res;
286
287 // align should be a power of 2
288 if (align > 0 && (align & (align - 1)) == 0) {
289 // offset within mic_buffer. Can do offset optimization
290 // only when source address alignment satisfies requested
291 // alignment on the target (cq172736).
292 if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
293 ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095;
294 }
295 }
296
297 // buffer size and flags
298 uint64_t buffer_size = length + ptr_data->mic_offset;
299 uint32_t buffer_flags = 0;
300
301 // create buffer with large pages if data length exceeds
302 // large page threshold
303 if (length >= __offload_use_2mb_buffers) {
304 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
305 }
306
307 // create CPU buffer
308 OFFLOAD_DEBUG_TRACE_1(3,
309 GET_OFFLOAD_NUMBER(get_timer_data()),
310 c_offload_create_buf_host,
311 "Creating buffer from source memory %p, "
312 "length %lld\n", base, length);
313
314 // result is not checked because we can continue without cpu
315 // buffer. In this case we will use COIBufferRead/Write instead
316 // of COIBufferCopy.
317 COI::BufferCreateFromMemory(length,
318 COI_BUFFER_NORMAL,
319 0,
320 base,
321 1,
322 &m_device.get_process(),
323 &ptr_data->cpu_buf);
324
325 OFFLOAD_DEBUG_TRACE_1(3,
326 GET_OFFLOAD_NUMBER(get_timer_data()),
327 c_offload_create_buf_mic,
328 "Creating buffer for sink: size %lld, offset %d, "
329 "flags =0x%x\n", buffer_size - alloc_disp,
330 ptr_data->mic_offset, buffer_flags);
331
332 // create MIC buffer
333 res = COI::BufferCreate(buffer_size - alloc_disp,
334 COI_BUFFER_NORMAL,
335 buffer_flags,
336 0,
337 1,
338 &m_device.get_process(),
339 &ptr_data->mic_buf);
340 if (res != COI_SUCCESS) {
341 if (m_status != 0) {
342 m_status->result = translate_coi_error(res);
343 }
344 else if (m_is_mandatory) {
345 report_coi_error(c_buf_create, res);
346 }
347 ptr_data->alloc_ptr_data_lock.unlock();
348 return false;
349 }
350
351 // make buffer valid on the device.
352 res = COI::BufferSetState(ptr_data->mic_buf,
353 m_device.get_process(),
354 COI_BUFFER_VALID,
355 COI_BUFFER_NO_MOVE,
356 0, 0, 0);
357 if (res != COI_SUCCESS) {
358 if (m_status != 0) {
359 m_status->result = translate_coi_error(res);
360 }
361 else if (m_is_mandatory) {
362 report_coi_error(c_buf_set_state, res);
363 }
364 ptr_data->alloc_ptr_data_lock.unlock();
365 return false;
366 }
367
368 res = COI::BufferSetState(ptr_data->mic_buf,
369 COI_PROCESS_SOURCE,
370 COI_BUFFER_INVALID,
371 COI_BUFFER_NO_MOVE,
372 0, 0, 0);
373 if (res != COI_SUCCESS) {
374 if (m_status != 0) {
375 m_status->result = translate_coi_error(res);
376 }
377 else if (m_is_mandatory) {
378 report_coi_error(c_buf_set_state, res);
379 }
380 ptr_data->alloc_ptr_data_lock.unlock();
381 return false;
382 }
383 }
384
385 ptr_data->alloc_disp = alloc_disp;
386 ptr_data->alloc_ptr_data_lock.unlock();
387 }
388 else {
389 mutex_locker_t locker(ptr_data->alloc_ptr_data_lock);
390
391 OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, "
392 "is_static %d\n",
393 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
394 ptr_data->is_static);
395
396 // This is not a new entry. Make sure that provided address range fits
397 // into existing one.
398 MemRange addr_range(base, length - ptr_data->alloc_disp);
399 if (!ptr_data->cpu_addr.contains(addr_range)) {
400 LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
401 exit(1);
402 }
403
404 // if the entry is associated with static data it may not have buffers
405 // created because they are created on demand.
406 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
407 return false;
408 }
409 }
410
411 return true;
412}
413
414bool OffloadDescriptor::find_ptr_data(
415 PtrData* &ptr_data,
416 void *base,
417 int64_t disp,
418 int64_t size,
419 bool report_error
420)
421{
422 // total length of base
423 int64_t length = disp + size;
424
425 OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
426 "length %lld\n", base, length);
427
428 // find existing association in pointer table
429 ptr_data = m_device.find_ptr_data(base);
430 if (ptr_data == 0) {
431 if (report_error) {
432 LIBOFFLOAD_ERROR(c_no_ptr_data, base);
433 exit(1);
434 }
435 OFFLOAD_TRACE(3, "Association does not exist\n");
436 return true;
437 }
438
439 OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
440 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
441 ptr_data->is_static);
442
443 // make sure that provided address range fits into existing one
444 MemRange addr_range(base, length);
445 if (!ptr_data->cpu_addr.contains(addr_range)) {
446 if (report_error) {
447 LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
448 exit(1);
449 }
450 OFFLOAD_TRACE(3, "Existing association partially overlaps with "
451 "data address range\n");
452 ptr_data = 0;
453 return true;
454 }
455
456 // if the entry is associated with static data it may not have buffers
457 // created because they are created on demand.
458 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
459 return false;
460 }
461
462 return true;
463}
464
465bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
466{
467 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
468
469 if (ptr_data->cpu_buf == 0) {
470 OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n",
471 ptr_data->cpu_addr.start());
472
473 COIRESULT res = COI::BufferCreateFromMemory(
474 ptr_data->cpu_addr.length(),
475 COI_BUFFER_NORMAL,
476 0,
477 const_cast<void*>(ptr_data->cpu_addr.start()),
478 1, &m_device.get_process(),
479 &ptr_data->cpu_buf);
480
481 if (res != COI_SUCCESS) {
482 if (m_status != 0) {
483 m_status->result = translate_coi_error(res);
484 return false;
485 }
486 report_coi_error(c_buf_create_from_mem, res);
487 }
488 }
489
490 if (ptr_data->mic_buf == 0) {
491 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n",
492 ptr_data->mic_addr);
493
494 COIRESULT res = COI::BufferCreateFromMemory(
495 ptr_data->cpu_addr.length(),
496 COI_BUFFER_NORMAL,
497 COI_SINK_MEMORY,
498 reinterpret_cast<void*>(ptr_data->mic_addr),
499 1, &m_device.get_process(),
500 &ptr_data->mic_buf);
501
502 if (res != COI_SUCCESS) {
503 if (m_status != 0) {
504 m_status->result = translate_coi_error(res);
505 return false;
506 }
507 report_coi_error(c_buf_create_from_mem, res);
508 }
509 }
510
511 return true;
512}
513
514bool OffloadDescriptor::init_mic_address(PtrData *ptr_data)
515{
516 if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
517 COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
518 &ptr_data->mic_addr);
519 if (res != COI_SUCCESS) {
520 if (m_status != 0) {
521 m_status->result = translate_coi_error(res);
522 }
523 else if (m_is_mandatory) {
524 report_coi_error(c_buf_get_address, res);
525 }
526 return false;
527 }
528 }
529 return true;
530}
531
532bool OffloadDescriptor::nullify_target_stack(
533 COIBUFFER targ_buf,
534 uint64_t size
535)
536{
537 char * ptr = (char*)malloc(size);
538 COIRESULT res;
539
540 memset(ptr, 0, size);
541 res = COI::BufferWrite(
542 targ_buf,
543 0,
544 ptr,
545 size,
546 COI_COPY_UNSPECIFIED,
547 0, 0, 0);
548 free(ptr);
549 if (res != COI_SUCCESS) {
550 if (m_status != 0) {
551 m_status->result = translate_coi_error(res);
552 return false;
553 }
554 report_coi_error(c_buf_write, res);
555 }
556 return true;
557}
558
559bool OffloadDescriptor::offload_stack_memory_manager(
560 const void * stack_begin,
561 int routine_id,
562 int buf_size,
563 int align,
564 bool *is_new)
565{
566 mutex_locker_t locker(stack_alloc_lock);
567
568 PersistData * new_el;
569 PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
570 PersistDataList::iterator it_end;
571 int erase = 0;
572
573 *is_new = false;
574
575 for (PersistDataList::iterator it = m_device.m_persist_list.begin();
576 it != m_device.m_persist_list.end(); it++) {
577 PersistData cur_el = *it;
578
579 if (stack_begin > it->stack_cpu_addr) {
580 // this stack data must be destroyed
581 m_destroy_stack.push_front(cur_el.stack_ptr_data);
582 it_end = it;
583 erase++;
584 }
585 else if (stack_begin == it->stack_cpu_addr) {
586 if (routine_id != it-> routine_id) {
587 // this stack data must be destroyed
588 m_destroy_stack.push_front(cur_el.stack_ptr_data);
589 it_end = it;
590 erase++;
591 break;
592 }
593 else {
594 // stack data is reused
595 m_stack_ptr_data = it->stack_ptr_data;
596 if (erase > 0) {
597 // all obsolete stack sections must be erased from the list
598 m_device.m_persist_list.erase(it_begin, ++it_end);
599
600 m_in_datalen +=
601 erase * sizeof(new_el->stack_ptr_data->mic_addr);
602 }
603 OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
604 m_stack_ptr_data->mic_addr);
605 return true;
606 }
607 }
608 else if (stack_begin < it->stack_cpu_addr) {
609 break;
610 }
611 }
612
613 if (erase > 0) {
614 // all obsolete stack sections must be erased from the list
615 m_device.m_persist_list.erase(it_begin, ++it_end);
616 m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
617 }
618 // new stack table is created
619 new_el = new PersistData(stack_begin, routine_id, buf_size);
620 // create MIC buffer
621 COIRESULT res;
622 uint32_t buffer_flags = 0;
623
624 // create buffer with large pages if data length exceeds
625 // large page threshold
626 if (buf_size >= __offload_use_2mb_buffers) {
627 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
628 }
629 res = COI::BufferCreate(buf_size,
630 COI_BUFFER_NORMAL,
631 buffer_flags,
632 0,
633 1,
634 &m_device.get_process(),
635 &new_el->stack_ptr_data->mic_buf);
636 if (res != COI_SUCCESS) {
637 if (m_status != 0) {
638 m_status->result = translate_coi_error(res);
639 }
640 else if (m_is_mandatory) {
641 report_coi_error(c_buf_create, res);
642 }
643 return false;
644 }
645 // make buffer valid on the device.
646 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
647 m_device.get_process(),
648 COI_BUFFER_VALID,
649 COI_BUFFER_NO_MOVE,
650 0, 0, 0);
651 if (res != COI_SUCCESS) {
652 if (m_status != 0) {
653 m_status->result = translate_coi_error(res);
654 }
655 else if (m_is_mandatory) {
656 report_coi_error(c_buf_set_state, res);
657 }
658 return false;
659 }
660 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
661 COI_PROCESS_SOURCE,
662 COI_BUFFER_INVALID,
663 COI_BUFFER_NO_MOVE,
664 0, 0, 0);
665 if (res != COI_SUCCESS) {
666 if (m_status != 0) {
667 m_status->result = translate_coi_error(res);
668 }
669 else if (m_is_mandatory) {
670 report_coi_error(c_buf_set_state, res);
671 }
672 return false;
673 }
674 // persistence algorithm requires target stack initialy to be nullified
675 if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
676 return false;
677 }
678
679 m_stack_ptr_data = new_el->stack_ptr_data;
680 init_mic_address(m_stack_ptr_data);
681 OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n",
682 m_stack_ptr_data->mic_addr);
683 m_device.m_persist_list.push_front(*new_el);
684 init_mic_address(new_el->stack_ptr_data);
685 *is_new = true;
686 return true;
687}
688
689bool OffloadDescriptor::setup_descriptors(
690 VarDesc *vars,
691 VarDesc2 *vars2,
692 int vars_total,
693 int entry_id,
694 const void *stack_addr
695)
696{
697 COIRESULT res;
698
699 OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
700
701 // make a copy of variable descriptors
702 m_vars_total = vars_total;
703 if (vars_total > 0) {
704 m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc));
705 memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc));
706 m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra));
707 }
708
709 // dependencies
710 m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total + 1));
711 if (m_vars_total > 0) {
712 m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total);
713 }
714
715 // copyin/copyout data length
716 m_in_datalen = 0;
717 m_out_datalen = 0;
718
719 // First pass over variable descriptors
720 // - Calculate size of the input and output non-pointer data
721 // - Allocate buffers for input and output pointers
722 for (int i = 0; i < m_vars_total; i++) {
723 void* alloc_base = NULL;
724 int64_t alloc_disp = 0;
725 int64_t alloc_size;
726 bool src_is_for_mic = (m_vars[i].direction.out ||
727 m_vars[i].into == NULL);
728
729 const char *var_sname = "";
730 if (vars2 != NULL && i < vars_total) {
731 if (vars2[i].sname != NULL) {
732 var_sname = vars2[i].sname;
733 }
734 }
735 OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n",
736 i, var_sname,
737 vardesc_direction_as_string[m_vars[i].direction.bits],
738 vardesc_type_as_string[m_vars[i].type.src]);
739 if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
740 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
741 vardesc_type_as_string[m_vars[i].type.dst]);
742 }
743 OFFLOAD_TRACE(2,
744 " type_src=%d, type_dstn=%d, direction=%d, "
745 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
746 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
747 m_vars[i].type.src,
748 m_vars[i].type.dst,
749 m_vars[i].direction.bits,
750 m_vars[i].alloc_if,
751 m_vars[i].free_if,
752 m_vars[i].align,
753 m_vars[i].mic_offset,
754 m_vars[i].flags.bits,
755 m_vars[i].offset,
756 m_vars[i].size,
757 m_vars[i].count,
758 m_vars[i].ptr,
759 m_vars[i].into);
760
761 if (m_vars[i].alloc != NULL) {
762 // array descriptor
763 const arr_desc *ap =
764 static_cast<const arr_desc*>(m_vars[i].alloc);
765
766 // debug dump
767 __arr_desc_dump(" ", "ALLOC", ap, 0);
768
769 __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
770
771 alloc_base = reinterpret_cast<void*>(ap->base);
772 }
773
774 m_vars_extra[i].cpu_disp = 0;
775 m_vars_extra[i].cpu_offset = 0;
776 m_vars_extra[i].src_data = 0;
777 m_vars_extra[i].read_rng_src = 0;
778 m_vars_extra[i].read_rng_dst = 0;
779 // flag is_arr_ptr_el is 1 only for var_descs generated
780 // for c_data_ptr_array type
781 if (i < vars_total) {
782 m_vars_extra[i].is_arr_ptr_el = 0;
783 }
784
785 switch (m_vars[i].type.src) {
786 case c_data_ptr_array:
787 {
788 const arr_desc *ap;
789 const VarDesc3 *vd3 =
790 static_cast<const VarDesc3*>(m_vars[i].ptr);
791 int flags = vd3->array_fields;
792 OFFLOAD_TRACE(2,
793 " pointer array flags = %04x\n", flags);
794 OFFLOAD_TRACE(2,
795 " pointer array type is %s\n",
796 vardesc_type_as_string[flags & 0x3f]);
797 ap = static_cast<const arr_desc*>(vd3->ptr_array);
798 __arr_desc_dump(" ", "ptr array", ap, 0);
799 if (m_vars[i].into) {
800 ap = static_cast<const arr_desc*>(m_vars[i].into);
801 __arr_desc_dump(
802 " ", "into array", ap, 0);
803 }
804 if ((flags & (1<<flag_align_is_array)) != 0) {
805 ap = static_cast<const arr_desc*>(vd3->align_array);
806 __arr_desc_dump(
807 " ", "align array", ap, 0);
808 }
809 if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
810 ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
811 __arr_desc_dump(
812 " ", "alloc_if array", ap, 0);
813 }
814 if ((flags & (1<<flag_free_if_is_array)) != 0) {
815 ap = static_cast<const arr_desc*>(vd3->free_if_array);
816 __arr_desc_dump(
817 " ", "free_if array", ap, 0);
818 }
819 if ((flags & (1<<flag_extent_start_is_array)) != 0) {
820 ap = static_cast<const arr_desc*>(vd3->extent_start);
821 __arr_desc_dump(
822 " ", "extent_start array", ap, 0);
823 } else if ((flags &
824 (1<<flag_extent_start_is_scalar)) != 0) {
825 OFFLOAD_TRACE(2,
826 " extent_start scalar = %d\n",
827 (int64_t)vd3->extent_start);
828 }
829 if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
830 ap = static_cast<const arr_desc*>
831 (vd3->extent_elements);
832 __arr_desc_dump(
833 " ", "extent_elements array", ap, 0);
834 } else if ((flags &
835 (1<<flag_extent_elements_is_scalar)) != 0) {
836 OFFLOAD_TRACE(2,
837 " extent_elements scalar = %d\n",
838 (int64_t)vd3->extent_elements);
839 }
840 if ((flags & (1<<flag_into_start_is_array)) != 0) {
841 ap = static_cast<const arr_desc*>(vd3->into_start);
842 __arr_desc_dump(
843 " ", "into_start array", ap, 0);
844 } else if ((flags &
845 (1<<flag_into_start_is_scalar)) != 0) {
846 OFFLOAD_TRACE(2,
847 " into_start scalar = %d\n",
848 (int64_t)vd3->into_start);
849 }
850 if ((flags & (1<<flag_into_elements_is_array)) != 0) {
851 ap = static_cast<const arr_desc*>(vd3->into_elements);
852 __arr_desc_dump(
853 " ", "into_elements array", ap, 0);
854 } else if ((flags &
855 (1<<flag_into_elements_is_scalar)) != 0) {
856 OFFLOAD_TRACE(2,
857 " into_elements scalar = %d\n",
858 (int64_t)vd3->into_elements);
859 }
860 if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
861 ap = static_cast<const arr_desc*>(vd3->alloc_start);
862 __arr_desc_dump(
863 " ", "alloc_start array", ap, 0);
864 } else if ((flags &
865 (1<<flag_alloc_start_is_scalar)) != 0) {
866 OFFLOAD_TRACE(2,
867 " alloc_start scalar = %d\n",
868 (int64_t)vd3->alloc_start);
869 }
870 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
871 ap = static_cast<const arr_desc*>(vd3->alloc_elements);
872 __arr_desc_dump(
873 " ", "alloc_elements array", ap, 0);
874 } else if ((flags &
875 (1<<flag_alloc_elements_is_scalar)) != 0) {
876 OFFLOAD_TRACE(2,
877 " alloc_elements scalar = %d\n",
878 (int64_t)vd3->alloc_elements);
879 }
880 }
881 if (!gen_var_descs_for_pointer_array(i)) {
882 return false;
883 }
884 break;
885
886 case c_data:
887 case c_void_ptr:
888 case c_cean_var:
889 // In all uses later
890 // VarDesc.size will have the length of the data to be
891 // transferred
892 // VarDesc.disp will have an offset from base
893 if (m_vars[i].type.src == c_cean_var) {
894 // array descriptor
895 const arr_desc *ap =
896 static_cast<const arr_desc*>(m_vars[i].ptr);
897
898 // debug dump
899 __arr_desc_dump("", "IN/OUT", ap, 0);
900
901 // offset and length are derived from the array descriptor
902 __arr_data_offset_and_length(ap, m_vars[i].disp,
903 m_vars[i].size);
904 if (!is_arr_desc_contiguous(ap)) {
905 m_vars[i].flags.is_noncont_src = 1;
906 m_vars_extra[i].read_rng_src =
907 init_read_ranges_arr_desc(ap);
908 }
909 // all necessary information about length and offset is
910 // transferred in var descriptor. There is no need to send
911 // array descriptor to the target side.
912 m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
913 }
914 else {
915 m_vars[i].size *= m_vars[i].count;
916 m_vars[i].disp = 0;
917 }
918
919 if (m_vars[i].direction.bits) {
920 // make sure that transfer size > 0
921 if (m_vars[i].size <= 0) {
922 LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size);
923 exit(1);
924 }
925
926 if (m_vars[i].flags.is_static) {
927 PtrData *ptr_data;
928
929 // find data associated with variable
930 if (!find_ptr_data(ptr_data,
931 m_vars[i].ptr,
932 m_vars[i].disp,
933 m_vars[i].size,
934 false)) {
935 return false;
936 }
937
938 if (ptr_data != 0) {
939 // offset to base from the beginning of the buffer
940 // memory
941 m_vars[i].offset =
942 (char*) m_vars[i].ptr -
943 (char*) ptr_data->cpu_addr.start();
944 }
945 else {
946 m_vars[i].flags.is_static = false;
947 if (m_vars[i].into == NULL) {
948 m_vars[i].flags.is_static_dstn = false;
949 }
950 }
951 m_vars_extra[i].src_data = ptr_data;
952 }
953
954 if (m_is_openmp) {
955 if (m_vars[i].flags.is_static) {
956 // Static data is transferred only by omp target
957 // update construct which passes zeros for
958 // alloc_if and free_if.
959 if (m_vars[i].alloc_if || m_vars[i].free_if) {
960 m_vars[i].direction.bits = c_parameter_nocopy;
961 }
962 }
963 else {
964 AutoData *auto_data;
965 if (m_vars[i].alloc_if) {
966 auto_data = m_device.insert_auto_data(
967 m_vars[i].ptr, m_vars[i].size);
968 auto_data->add_reference();
969 }
970 else {
971 // TODO: what should be done if var is not in
972 // the table?
973 auto_data = m_device.find_auto_data(
974 m_vars[i].ptr);
975 }
976
977 // For automatic variables data is transferred
978 // only if alloc_if == 0 && free_if == 0
979 // or reference count is 1
980 if ((m_vars[i].alloc_if || m_vars[i].free_if) &&
981 auto_data != 0 &&
982 auto_data->get_reference() != 1) {
983 m_vars[i].direction.bits = c_parameter_nocopy;
984 }
985
986 // save data for later use
987 m_vars_extra[i].auto_data = auto_data;
988 }
989 }
990
991 if (m_vars[i].direction.in &&
992 !m_vars[i].flags.is_static) {
993 m_in_datalen += m_vars[i].size;
994
995 // for non-static target destination defined as CEAN
996 // expression we pass to target its size and dist
997 if (m_vars[i].into == NULL &&
998 m_vars[i].type.src == c_cean_var) {
999 m_in_datalen += 2 * sizeof(uint64_t);
1000 }
1001 m_need_runfunction = true;
1002 }
1003 if (m_vars[i].direction.out &&
1004 !m_vars[i].flags.is_static) {
1005 m_out_datalen += m_vars[i].size;
1006 m_need_runfunction = true;
1007 }
1008 }
1009 break;
1010
1011 case c_dv:
1012 if (m_vars[i].direction.bits ||
1013 m_vars[i].alloc_if ||
1014 m_vars[i].free_if) {
1015 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
1016
1017 // debug dump
1018 __dv_desc_dump("IN/OUT", dvp);
1019
1020 // send dope vector contents excluding base
1021 m_in_datalen += m_vars[i].size - sizeof(uint64_t);
1022 m_need_runfunction = true;
1023 }
1024 break;
1025
1026 case c_string_ptr:
1027 if ((m_vars[i].direction.bits ||
1028 m_vars[i].alloc_if ||
1029 m_vars[i].free_if) &&
1030 m_vars[i].size == 0) {
1031 m_vars[i].size = 1;
1032 m_vars[i].count =
1033 strlen(*static_cast<char**>(m_vars[i].ptr)) + 1;
1034 }
1035 /* fallthru */
1036
1037 case c_data_ptr:
1038 if (m_vars[i].flags.is_stack_buf &&
1039 !m_vars[i].direction.bits &&
1040 m_vars[i].alloc_if) {
1041 // this var_desc is for stack buffer
1042 bool is_new;
1043
1044 if (!offload_stack_memory_manager(
1045 stack_addr, entry_id,
1046 m_vars[i].count, m_vars[i].align, &is_new)) {
1047 return false;
1048 }
1049 if (is_new) {
1050 m_compute_buffers.push_back(
1051 m_stack_ptr_data->mic_buf);
1052 m_device.m_persist_list.front().cpu_stack_addr =
1053 static_cast<char*>(m_vars[i].ptr);
1054 }
1055 else {
1056 m_vars[i].flags.sink_addr = 1;
1057 m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
1058 }
1059 m_vars[i].size = m_destroy_stack.size();
1060 m_vars_extra[i].src_data = m_stack_ptr_data;
1061 // need to add reference for buffer
1062 m_need_runfunction = true;
1063 break;
1064 }
1065 /* fallthru */
1066
1067 case c_cean_var_ptr:
1068 case c_dv_ptr:
1069 if (m_vars[i].type.src == c_cean_var_ptr) {
1070 // array descriptor
1071 const arr_desc *ap =
1072 static_cast<const arr_desc*>(m_vars[i].ptr);
1073
1074 // debug dump
1075 __arr_desc_dump("", "IN/OUT", ap, 1);
1076
1077 // offset and length are derived from the array descriptor
1078 __arr_data_offset_and_length(ap, m_vars[i].disp,
1079 m_vars[i].size);
1080
1081 if (!is_arr_desc_contiguous(ap)) {
1082 m_vars[i].flags.is_noncont_src = 1;
1083 m_vars_extra[i].read_rng_src =
1084 init_read_ranges_arr_desc(ap);
1085 }
1086 // all necessary information about length and offset is
1087 // transferred in var descriptor. There is no need to send
1088 // array descriptor to the target side.
1089 m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
1090 }
1091 else if (m_vars[i].type.src == c_dv_ptr) {
1092 // need to send DV to the device unless it is 'nocopy'
1093 if (m_vars[i].direction.bits ||
1094 m_vars[i].alloc_if ||
1095 m_vars[i].free_if) {
1096 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
1097
1098 // debug dump
1099 __dv_desc_dump("IN/OUT", dvp);
1100
1101 m_vars[i].direction.bits = c_parameter_in;
1102 }
1103
1104 // no displacement
1105 m_vars[i].disp = 0;
1106 }
1107 else {
1108 // c_data_ptr or c_string_ptr
1109 m_vars[i].size *= m_vars[i].count;
1110 m_vars[i].disp = 0;
1111 }
1112
1113 if (m_vars[i].direction.bits ||
1114 m_vars[i].alloc_if ||
1115 m_vars[i].free_if) {
1116 PtrData *ptr_data;
1117
1118 // check that buffer length >= 0
1119 if (m_vars[i].alloc_if &&
1120 m_vars[i].disp + m_vars[i].size < 0) {
1121 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
1122 exit(1);
1123 }
1124
1125 // base address
1126 void *base = *static_cast<void**>(m_vars[i].ptr);
1127
1128 // allocate buffer if we have no INTO and don't need
1129 // allocation for the ptr at target
1130 if (src_is_for_mic) {
1131 if (m_vars[i].flags.is_stack_buf) {
1132 // for stack persistent objects ptr data is created
1133 // by var_desc with number 0.
1134 // Its ptr_data is stored at m_stack_ptr_data
1135 ptr_data = m_stack_ptr_data;
1136 m_vars[i].flags.sink_addr = 1;
1137 }
1138 else if (m_vars[i].alloc_if) {
1139 // add new entry
1140 if (!alloc_ptr_data(
1141 ptr_data,
1142 base,
1143 (alloc_base != NULL) ?
1144 alloc_disp : m_vars[i].disp,
1145 (alloc_base != NULL) ?
1146 alloc_size : m_vars[i].size,
1147 alloc_disp,
1148 (alloc_base != NULL) ?
1149 0 : m_vars[i].align)) {
1150 return false;
1151 }
1152
1153 if (ptr_data->add_reference() == 0 &&
1154 ptr_data->mic_buf != 0) {
1155 // add buffer to the list of buffers that
1156 // are passed to dispatch call
1157 m_compute_buffers.push_back(
1158 ptr_data->mic_buf);
1159 }
1160 else {
1161 // will send buffer address to device
1162 m_vars[i].flags.sink_addr = 1;
1163 }
1164
1165 if (!ptr_data->is_static) {
1166 // need to add reference for buffer
1167 m_need_runfunction = true;
1168 }
1169 }
1170 else {
1171 bool error_if_not_found = true;
1172 if (m_is_openmp) {
1173 // For omp target update variable is ignored
1174 // if it does not exist.
1175 if (!m_vars[i].alloc_if &&
1176 !m_vars[i].free_if) {
1177 error_if_not_found = false;
1178 }
1179 }
1180
1181 // use existing association from pointer table
1182 if (!find_ptr_data(ptr_data,
1183 base,
1184 m_vars[i].disp,
1185 m_vars[i].size,
1186 error_if_not_found)) {
1187 return false;
1188 }
1189
1190 if (m_is_openmp) {
1191 // make var nocopy if it does not exist
1192 if (ptr_data == 0) {
1193 m_vars[i].direction.bits =
1194 c_parameter_nocopy;
1195 }
1196 }
1197
1198 if (ptr_data != 0) {
1199 m_vars[i].flags.sink_addr = 1;
1200 }
1201 }
1202
1203 if (ptr_data != 0) {
1204 if (m_is_openmp) {
1205 // data is transferred only if
1206 // alloc_if == 0 && free_if == 0
1207 // or reference count is 1
1208 if ((m_vars[i].alloc_if ||
1209 m_vars[i].free_if) &&
1210 ptr_data->get_reference() != 1) {
1211 m_vars[i].direction.bits =
1212 c_parameter_nocopy;
1213 }
1214 }
1215
1216 if (ptr_data->alloc_disp != 0) {
1217 m_vars[i].flags.alloc_disp = 1;
1218 m_in_datalen += sizeof(alloc_disp);
1219 }
1220
1221 if (m_vars[i].flags.sink_addr) {
1222 // get buffers's address on the sink
1223 if (!init_mic_address(ptr_data)) {
1224 return false;
1225 }
1226
1227 m_in_datalen += sizeof(ptr_data->mic_addr);
1228 }
1229
1230 if (!ptr_data->is_static && m_vars[i].free_if) {
1231 // need to decrement buffer reference on target
1232 m_need_runfunction = true;
1233 }
1234
1235 // offset to base from the beginning of the buffer
1236 // memory
1237 m_vars[i].offset = (char*) base -
1238 (char*) ptr_data->cpu_addr.start();
1239
1240 // copy other pointer properties to var descriptor
1241 m_vars[i].mic_offset = ptr_data->mic_offset;
1242 m_vars[i].flags.is_static = ptr_data->is_static;
1243 }
1244 }
1245 else {
1246 if (!find_ptr_data(ptr_data,
1247 base,
1248 m_vars[i].disp,
1249 m_vars[i].size,
1250 false)) {
1251 return false;
1252 }
1253 if (ptr_data) {
1254 m_vars[i].offset =
1255 (char*) base -
1256 (char*) ptr_data->cpu_addr.start();
1257 }
1258 }
1259
1260 // save pointer data
1261 m_vars_extra[i].src_data = ptr_data;
1262 }
1263 break;
1264
1265 case c_func_ptr:
1266 if (m_vars[i].direction.in) {
1267 m_in_datalen += __offload_funcs.max_name_length();
1268 }
1269 if (m_vars[i].direction.out) {
1270 m_out_datalen += __offload_funcs.max_name_length();
1271 }
1272 m_need_runfunction = true;
1273 break;
1274
1275 case c_dv_data:
1276 case c_dv_ptr_data:
1277 case c_dv_data_slice:
1278 case c_dv_ptr_data_slice:
1279 ArrDesc *dvp;
1280 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
1281 const arr_desc *ap;
1282 ap = static_cast<const arr_desc*>(m_vars[i].ptr);
1283
1284 dvp = (m_vars[i].type.src == c_dv_data_slice) ?
1285 reinterpret_cast<ArrDesc*>(ap->base) :
1286 *reinterpret_cast<ArrDesc**>(ap->base);
1287 }
1288 else {
1289 dvp = (m_vars[i].type.src == c_dv_data) ?
1290 static_cast<ArrDesc*>(m_vars[i].ptr) :
1291 *static_cast<ArrDesc**>(m_vars[i].ptr);
1292 }
1293
1294 // if allocatable dope vector isn't allocated don't
1295 // transfer its data
1296 if (!__dv_is_allocated(dvp)) {
1297 m_vars[i].direction.bits = c_parameter_nocopy;
1298 m_vars[i].alloc_if = 0;
1299 m_vars[i].free_if = 0;
1300 }
1301 if (m_vars[i].direction.bits ||
1302 m_vars[i].alloc_if ||
1303 m_vars[i].free_if) {
1304 const arr_desc *ap;
1305
1306 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
1307 ap = static_cast<const arr_desc*>(m_vars[i].ptr);
1308
1309 // debug dump
1310 __arr_desc_dump("", "IN/OUT", ap, 0);
1311 }
1312 if (!__dv_is_contiguous(dvp)) {
1313 m_vars[i].flags.is_noncont_src = 1;
1314 m_vars_extra[i].read_rng_src =
1315 init_read_ranges_dv(dvp);
1316 }
1317
1318 // size and displacement
1319 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
1320 // offset and length are derived from the
1321 // array descriptor
1322 __arr_data_offset_and_length(ap,
1323 m_vars[i].disp,
1324 m_vars[i].size);
1325 if (m_vars[i].direction.bits) {
1326 if (!is_arr_desc_contiguous(ap)) {
1327 if (m_vars[i].flags.is_noncont_src) {
1328 LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
1329 return false;
1330 }
1331 m_vars[i].flags.is_noncont_src = 1;
1332 m_vars_extra[i].read_rng_src =
1333 init_read_ranges_arr_desc(ap);
1334 }
1335 }
1336 }
1337 else {
1338 if (m_vars[i].flags.has_length) {
1339 m_vars[i].size =
1340 __dv_data_length(dvp, m_vars[i].count);
1341 }
1342 else {
1343 m_vars[i].size = __dv_data_length(dvp);
1344 }
1345 m_vars[i].disp = 0;
1346 }
1347
1348 // check that length >= 0
1349 if (m_vars[i].alloc_if &&
1350 (m_vars[i].disp + m_vars[i].size < 0)) {
1351 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
1352 exit(1);
1353 }
1354
1355 // base address
1356 void *base = reinterpret_cast<void*>(dvp->Base);
1357 PtrData *ptr_data;
1358
1359 // allocate buffer if we have no INTO and don't need
1360 // allocation for the ptr at target
1361 if (src_is_for_mic) {
1362 if (m_vars[i].alloc_if) {
1363 // add new entry
1364 if (!alloc_ptr_data(
1365 ptr_data,
1366 base,
1367 (alloc_base != NULL) ?
1368 alloc_disp : m_vars[i].disp,
1369 (alloc_base != NULL) ?
1370 alloc_size : m_vars[i].size,
1371 alloc_disp,
1372 (alloc_base != NULL) ?
1373 0 : m_vars[i].align)) {
1374 return false;
1375 }
1376
1377 if (ptr_data->add_reference() == 0 &&
1378 ptr_data->mic_buf != 0) {
1379 // add buffer to the list of buffers
1380 // that are passed to dispatch call
1381 m_compute_buffers.push_back(
1382 ptr_data->mic_buf);
1383 }
1384 else {
1385 // will send buffer address to device
1386 m_vars[i].flags.sink_addr = 1;
1387 }
1388
1389 if (!ptr_data->is_static) {
1390 // need to add reference for buffer
1391 m_need_runfunction = true;
1392 }
1393 }
1394 else {
1395 bool error_if_not_found = true;
1396 if (m_is_openmp) {
1397 // For omp target update variable is ignored
1398 // if it does not exist.
1399 if (!m_vars[i].alloc_if &&
1400 !m_vars[i].free_if) {
1401 error_if_not_found = false;
1402 }
1403 }
1404
1405 // use existing association from pointer table
1406 if (!find_ptr_data(ptr_data,
1407 base,
1408 m_vars[i].disp,
1409 m_vars[i].size,
1410 error_if_not_found)) {
1411 return false;
1412 }
1413
1414 if (m_is_openmp) {
1415 // make var nocopy if it does not exist
1416 if (ptr_data == 0) {
1417 m_vars[i].direction.bits =
1418 c_parameter_nocopy;
1419 }
1420 }
1421
1422 if (ptr_data != 0) {
1423 // need to update base in dope vector on device
1424 m_vars[i].flags.sink_addr = 1;
1425 }
1426 }
1427
1428 if (ptr_data != 0) {
1429 if (m_is_openmp) {
1430 // data is transferred only if
1431 // alloc_if == 0 && free_if == 0
1432 // or reference count is 1
1433 if ((m_vars[i].alloc_if ||
1434 m_vars[i].free_if) &&
1435 ptr_data->get_reference() != 1) {
1436 m_vars[i].direction.bits =
1437 c_parameter_nocopy;
1438 }
1439 }
1440
1441 if (ptr_data->alloc_disp != 0) {
1442 m_vars[i].flags.alloc_disp = 1;
1443 m_in_datalen += sizeof(alloc_disp);
1444 }
1445
1446 if (m_vars[i].flags.sink_addr) {
1447 // get buffers's address on the sink
1448 if (!init_mic_address(ptr_data)) {
1449 return false;
1450 }
1451
1452 m_in_datalen += sizeof(ptr_data->mic_addr);
1453 }
1454
1455 if (!ptr_data->is_static && m_vars[i].free_if) {
1456 // need to decrement buffer reference on target
1457 m_need_runfunction = true;
1458 }
1459
1460 // offset to base from the beginning of the buffer
1461 // memory
1462 m_vars[i].offset =
1463 (char*) base -
1464 (char*) ptr_data->cpu_addr.start();
1465
1466 // copy other pointer properties to var descriptor
1467 m_vars[i].mic_offset = ptr_data->mic_offset;
1468 m_vars[i].flags.is_static = ptr_data->is_static;
1469 }
1470 }
1471 else { // !src_is_for_mic
1472 if (!find_ptr_data(ptr_data,
1473 base,
1474 m_vars[i].disp,
1475 m_vars[i].size,
1476 false)) {
1477 return false;
1478 }
1479 m_vars[i].offset = !ptr_data ? 0 :
1480 (char*) base -
1481 (char*) ptr_data->cpu_addr.start();
1482 }
1483
1484 // save pointer data
1485 m_vars_extra[i].src_data = ptr_data;
1486 }
1487 break;
1488
1489 default:
1490 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
1491 LIBOFFLOAD_ABORT;
1492 }
1493 if (m_vars[i].type.src == c_data_ptr_array) {
1494 continue;
1495 }
1496
1497 if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
1498 m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
1499 m_device.m_persist_list.front().cpu_stack_addr;
1500 }
1501 // if source is used at CPU save its offset and disp
1502 if (m_vars[i].into == NULL || m_vars[i].direction.in) {
1503 m_vars_extra[i].cpu_offset = m_vars[i].offset;
1504 m_vars_extra[i].cpu_disp = m_vars[i].disp;
1505 }
1506
1507 // If "into" is define we need to do the similar work for it
1508 if (!m_vars[i].into) {
1509 continue;
1510 }
1511
1512 int64_t into_disp =0, into_offset = 0;
1513
1514 switch (m_vars[i].type.dst) {
1515 case c_data_ptr_array:
1516 break;
1517 case c_data:
1518 case c_void_ptr:
1519 case c_cean_var: {
1520 int64_t size = m_vars[i].size;
1521
1522 if (m_vars[i].type.dst == c_cean_var) {
1523 // array descriptor
1524 const arr_desc *ap =
1525 static_cast<const arr_desc*>(m_vars[i].into);
1526
1527 // debug dump
1528 __arr_desc_dump(" ", "INTO", ap, 0);
1529
1530 // offset and length are derived from the array descriptor
1531 __arr_data_offset_and_length(ap, into_disp, size);
1532
1533 if (!is_arr_desc_contiguous(ap)) {
1534 m_vars[i].flags.is_noncont_dst = 1;
1535 m_vars_extra[i].read_rng_dst =
1536 init_read_ranges_arr_desc(ap);
1537 if (!cean_ranges_match(
1538 m_vars_extra[i].read_rng_src,
1539 m_vars_extra[i].read_rng_dst)) {
1540 LIBOFFLOAD_ERROR(c_ranges_dont_match);
1541 exit(1);
1542 }
1543 }
1544 m_vars[i].into = reinterpret_cast<void*>(ap->base);
1545 }
1546
1547 int64_t size_src = m_vars_extra[i].read_rng_src ?
1548 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
1549 m_vars[i].size;
1550 int64_t size_dst = m_vars_extra[i].read_rng_dst ?
1551 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
1552 size;
1553 // It's supposed that "into" size must be not less
1554 // than src size
1555 if (size_src > size_dst) {
1556 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
1557 size_src, size_dst);
1558 exit(1);
1559 }
1560
1561 if (m_vars[i].direction.bits) {
1562 if (m_vars[i].flags.is_static_dstn) {
1563 PtrData *ptr_data;
1564
1565 // find data associated with variable
1566 if (!find_ptr_data(ptr_data, m_vars[i].into,
1567 into_disp, size, false)) {
1568 return false;
1569 }
1570 if (ptr_data != 0) {
1571 // offset to base from the beginning of the buffer
1572 // memory
1573 into_offset =
1574 (char*) m_vars[i].into -
1575 (char*) ptr_data->cpu_addr.start();
1576 }
1577 else {
1578 m_vars[i].flags.is_static_dstn = false;
1579 }
1580 m_vars_extra[i].dst_data = ptr_data;
1581 }
1582 }
1583
1584 if (m_vars[i].direction.in &&
1585 !m_vars[i].flags.is_static_dstn) {
1586 m_in_datalen += m_vars[i].size;
1587
1588 // for non-static target destination defined as CEAN
1589 // expression we pass to target its size and dist
1590 if (m_vars[i].type.dst == c_cean_var) {
1591 m_in_datalen += 2 * sizeof(uint64_t);
1592 }
1593 m_need_runfunction = true;
1594 }
1595 break;
1596 }
1597
1598 case c_dv:
1599 if (m_vars[i].direction.bits ||
1600 m_vars[i].alloc_if ||
1601 m_vars[i].free_if) {
1602 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into);
1603
1604 // debug dump
1605 __dv_desc_dump("INTO", dvp);
1606
1607 // send dope vector contents excluding base
1608 m_in_datalen += m_vars[i].size - sizeof(uint64_t);
1609 m_need_runfunction = true;
1610 }
1611 break;
1612
1613 case c_string_ptr:
1614 case c_data_ptr:
1615 case c_cean_var_ptr:
1616 case c_dv_ptr: {
1617 int64_t size = m_vars[i].size;
1618
1619 if (m_vars[i].type.dst == c_cean_var_ptr) {
1620 // array descriptor
1621 const arr_desc *ap =
1622 static_cast<const arr_desc*>(m_vars[i].into);
1623
1624 // debug dump
1625 __arr_desc_dump(" ", "INTO", ap, 1);
1626
1627 // offset and length are derived from the array descriptor
1628 __arr_data_offset_and_length(ap, into_disp, size);
1629
1630 if (!is_arr_desc_contiguous(ap)) {
1631 m_vars[i].flags.is_noncont_src = 1;
1632 m_vars_extra[i].read_rng_dst =
1633 init_read_ranges_arr_desc(ap);
1634 if (!cean_ranges_match(
1635 m_vars_extra[i].read_rng_src,
1636 m_vars_extra[i].read_rng_dst)) {
1637 LIBOFFLOAD_ERROR(c_ranges_dont_match);
1638 }
1639 }
1640 m_vars[i].into = reinterpret_cast<char**>(ap->base);
1641 }
1642 else if (m_vars[i].type.dst == c_dv_ptr) {
1643 // need to send DV to the device unless it is 'nocopy'
1644 if (m_vars[i].direction.bits ||
1645 m_vars[i].alloc_if ||
1646 m_vars[i].free_if) {
1647 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into);
1648
1649 // debug dump
1650 __dv_desc_dump("INTO", dvp);
1651
1652 m_vars[i].direction.bits = c_parameter_in;
1653 }
1654 }
1655
1656 int64_t size_src = m_vars_extra[i].read_rng_src ?
1657 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
1658 m_vars[i].size;
1659 int64_t size_dst = m_vars_extra[i].read_rng_dst ?
1660 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
1661 size;
1662 // It's supposed that "into" size must be not less than
1663 // src size
1664 if (size_src > size_dst) {
1665 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
1666 size_src, size_dst);
1667 exit(1);
1668 }
1669
1670 if (m_vars[i].direction.bits) {
1671 PtrData *ptr_data;
1672
1673 // base address
1674 void *base = *static_cast<void**>(m_vars[i].into);
1675
1676 if (m_vars[i].direction.in) {
1677 // allocate buffer
1678 if (m_vars[i].flags.is_stack_buf) {
1679 // for stack persistent objects ptr data is created
1680 // by var_desc with number 0.
1681 // Its ptr_data is stored at m_stack_ptr_data
1682 ptr_data = m_stack_ptr_data;
1683 m_vars[i].flags.sink_addr = 1;
1684 }
1685 else if (m_vars[i].alloc_if) {
1686 // add new entry
1687 if (!alloc_ptr_data(
1688 ptr_data,
1689 base,
1690 (alloc_base != NULL) ?
1691 alloc_disp : into_disp,
1692 (alloc_base != NULL) ?
1693 alloc_size : size,
1694 alloc_disp,
1695 (alloc_base != NULL) ?
1696 0 : m_vars[i].align)) {
1697 return false;
1698 }
1699
1700 if (ptr_data->add_reference() == 0 &&
1701 ptr_data->mic_buf != 0) {
1702 // add buffer to the list of buffers that
1703 // are passed to dispatch call
1704 m_compute_buffers.push_back(
1705 ptr_data->mic_buf);
1706 }
1707 else {
1708 // will send buffer address to device
1709 m_vars[i].flags.sink_addr = 1;
1710 }
1711
1712 if (!ptr_data->is_static) {
1713 // need to add reference for buffer
1714 m_need_runfunction = true;
1715 }
1716 }
1717 else {
1718 // use existing association from pointer table
1719 if (!find_ptr_data(ptr_data, base, into_disp, size)) {
1720 return false;
1721 }
1722 m_vars[i].flags.sink_addr = 1;
1723 }
1724
1725 if (ptr_data->alloc_disp != 0) {
1726 m_vars[i].flags.alloc_disp = 1;
1727 m_in_datalen += sizeof(alloc_disp);
1728 }
1729
1730 if (m_vars[i].flags.sink_addr) {
1731 // get buffers's address on the sink
1732 if (!init_mic_address(ptr_data)) {
1733 return false;
1734 }
1735
1736 m_in_datalen += sizeof(ptr_data->mic_addr);
1737 }
1738
1739 if (!ptr_data->is_static && m_vars[i].free_if) {
1740 // need to decrement buffer reference on target
1741 m_need_runfunction = true;
1742 }
1743
1744 // copy other pointer properties to var descriptor
1745 m_vars[i].mic_offset = ptr_data->mic_offset;
1746 m_vars[i].flags.is_static_dstn = ptr_data->is_static;
1747 }
1748 else {
1749 if (!find_ptr_data(ptr_data,
1750 base,
1751 into_disp,
1752 m_vars[i].size,
1753 false)) {
1754 return false;
1755 }
1756 }
1757 if (ptr_data) {
1758 into_offset = ptr_data ?
1759 (char*) base -
1760 (char*) ptr_data->cpu_addr.start() :
1761 0;
1762 }
1763 // save pointer data
1764 m_vars_extra[i].dst_data = ptr_data;
1765 }
1766 break;
1767 }
1768
1769 case c_func_ptr:
1770 break;
1771
1772 case c_dv_data:
1773 case c_dv_ptr_data:
1774 case c_dv_data_slice:
1775 case c_dv_ptr_data_slice:
1776 if (m_vars[i].direction.bits ||
1777 m_vars[i].alloc_if ||
1778 m_vars[i].free_if) {
1779 const arr_desc *ap;
1780 ArrDesc *dvp;
1781 PtrData *ptr_data;
1782 int64_t disp;
1783 int64_t size;
1784
1785 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
1786 ap = static_cast<const arr_desc*>(m_vars[i].into);
1787
1788 // debug dump
1789 __arr_desc_dump(" ", "INTO", ap, 0);
1790
1791 dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
1792 reinterpret_cast<ArrDesc*>(ap->base) :
1793 *reinterpret_cast<ArrDesc**>(ap->base);
1794 }
1795 else {
1796 dvp = (m_vars[i].type.dst == c_dv_data) ?
1797 static_cast<ArrDesc*>(m_vars[i].into) :
1798 *static_cast<ArrDesc**>(m_vars[i].into);
1799 }
1800 if (!__dv_is_contiguous(dvp)) {
1801 m_vars[i].flags.is_noncont_dst = 1;
1802 m_vars_extra[i].read_rng_dst =
1803 init_read_ranges_dv(dvp);
1804 }
1805 // size and displacement
1806 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
1807 // offset and length are derived from the array
1808 // descriptor
1809 __arr_data_offset_and_length(ap, into_disp, size);
1810 if (m_vars[i].direction.bits) {
1811 if (!is_arr_desc_contiguous(ap)) {
1812 if (m_vars[i].flags.is_noncont_dst) {
1813 LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
1814 return false;
1815 }
1816 m_vars[i].flags.is_noncont_dst = 1;
1817 m_vars_extra[i].read_rng_dst =
1818 init_read_ranges_arr_desc(ap);
1819 if (!cean_ranges_match(
1820 m_vars_extra[i].read_rng_src,
1821 m_vars_extra[i].read_rng_dst)) {
1822 LIBOFFLOAD_ERROR(c_ranges_dont_match);
1823 }
1824 }
1825 }
1826 }
1827 else {
1828 if (m_vars[i].flags.has_length) {
1829 size = __dv_data_length(dvp, m_vars[i].count);
1830 }
1831 else {
1832 size = __dv_data_length(dvp);
1833 }
1834 disp = 0;
1835 }
1836
1837 int64_t size_src =
1838 m_vars_extra[i].read_rng_src ?
1839 cean_get_transf_size(m_vars_extra[i].read_rng_src) :
1840 m_vars[i].size;
1841 int64_t size_dst =
1842 m_vars_extra[i].read_rng_dst ?
1843 cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
1844 size;
1845 // It's supposed that "into" size must be not less
1846 // than src size
1847 if (size_src > size_dst) {
1848 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
1849 size_src, size_dst);
1850 exit(1);
1851 }
1852
1853 // base address
1854 void *base = reinterpret_cast<void*>(dvp->Base);
1855
1856 // allocate buffer
1857 if (m_vars[i].direction.in) {
1858 if (m_vars[i].alloc_if) {
1859 // add new entry
1860 if (!alloc_ptr_data(
1861 ptr_data,
1862 base,
1863 (alloc_base != NULL) ?
1864 alloc_disp : into_disp,
1865 (alloc_base != NULL) ?
1866 alloc_size : size,
1867 alloc_disp,
1868 (alloc_base != NULL) ?
1869 0 : m_vars[i].align)) {
1870 return false;
1871 }
1872 if (ptr_data->add_reference() == 0 &&
1873 ptr_data->mic_buf !=0) {
1874 // add buffer to the list of buffers
1875 // that are passed to dispatch call
1876 m_compute_buffers.push_back(
1877 ptr_data->mic_buf);
1878 }
1879 else {
1880 // will send buffer address to device
1881 m_vars[i].flags.sink_addr = 1;
1882 }
1883
1884 if (!ptr_data->is_static) {
1885 // need to add reference for buffer
1886 m_need_runfunction = true;
1887 }
1888 }
1889 else {
1890 // use existing association from pointer table
1891 if (!find_ptr_data(ptr_data, base, into_disp, size)) {
1892 return false;
1893 }
1894
1895 // need to update base in dope vector on device
1896 m_vars[i].flags.sink_addr = 1;
1897 }
1898
1899 if (ptr_data->alloc_disp != 0) {
1900 m_vars[i].flags.alloc_disp = 1;
1901 m_in_datalen += sizeof(alloc_disp);
1902 }
1903
1904 if (m_vars[i].flags.sink_addr) {
1905 // get buffers's address on the sink
1906 if (!init_mic_address(ptr_data)) {
1907 return false;
1908 }
1909 m_in_datalen += sizeof(ptr_data->mic_addr);
1910 }
1911
1912 if (!ptr_data->is_static && m_vars[i].free_if) {
1913 // need to decrement buffer reference on target
1914 m_need_runfunction = true;
1915 }
1916
1917 // offset to base from the beginning of the buffer
1918 // memory
1919 into_offset =
1920 (char*) base - (char*) ptr_data->cpu_addr.start();
1921
1922 // copy other pointer properties to var descriptor
1923 m_vars[i].mic_offset = ptr_data->mic_offset;
1924 m_vars[i].flags.is_static_dstn = ptr_data->is_static;
1925 }
1926 else { // src_is_for_mic
1927 if (!find_ptr_data(ptr_data,
1928 base,
1929 into_disp,
1930 size,
1931 false)) {
1932 return false;
1933 }
1934 into_offset = !ptr_data ?
1935 0 :
1936 (char*) base - (char*) ptr_data->cpu_addr.start();
1937 }
1938
1939 // save pointer data
1940 m_vars_extra[i].dst_data = ptr_data;
1941 }
1942 break;
1943
1944 default:
1945 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
1946 LIBOFFLOAD_ABORT;
1947 }
1948 // if into is used at CPU save its offset and disp
1949 if (m_vars[i].direction.out) {
1950 m_vars_extra[i].cpu_offset = into_offset;
1951 m_vars_extra[i].cpu_disp = into_disp;
1952 }
1953 else {
1954 if (m_vars[i].flags.is_stack_buf) {
1955 into_offset = static_cast<char*>(m_vars[i].into) -
1956 m_device.m_persist_list.front().cpu_stack_addr;
1957 }
1958 m_vars[i].offset = into_offset;
1959 m_vars[i].disp = into_disp;
1960 }
1961 }
1962
1963 return true;
1964}
1965
1966bool OffloadDescriptor::setup_misc_data(const char *name)
1967{
1968 OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data);
1969
1970 // we can skip run functon call together with wait if offloaded
1971 // region is empty and there is no user defined non-pointer IN/OUT data
1972 if (m_need_runfunction) {
1973 // variable descriptors are sent as input data
1974 m_in_datalen += m_vars_total * sizeof(VarDesc);
1975
1976 // timer data is sent as a part of the output data
1977 m_out_datalen += OFFLOAD_TIMER_DATALEN();
1978
1979 // max from input data and output data length
1980 uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen :
1981 m_out_datalen;
1982
1983 // Misc data has the following layout
1984 // <Function Descriptor>
1985 // <Function Name>
1986 // <In/Out Data> (optional)
1987 //
1988 // We can transfer copyin/copyout data in misc/return data which can
1989 // be passed to run function call if its size does not exceed
1990 // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate
1991 // buffer for it.
1992
1993 m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1;
1994 m_func_desc_size = (m_func_desc_size + 7) & ~7;
1995
1996 int misc_data_offset = 0;
1997 int misc_data_size = 0;
1998 if (data_len > 0) {
1999 if (m_func_desc_size +
2000 m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN &&
2001 m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) {
2002 // use misc/return data for copyin/copyout
2003 misc_data_offset = m_func_desc_size;
2004 misc_data_size = data_len;
2005 }
2006 else {
2007 OffloadTimer timer_buf(get_timer_data(),
2008 c_offload_host_alloc_data_buffer);
2009
2010 // send/receive data using buffer
2011 COIRESULT res = COI::BufferCreate(data_len,
2012 COI_BUFFER_NORMAL,
2013 0, 0,
2014 1, &m_device.get_process(),
2015 &m_inout_buf);
2016 if (res != COI_SUCCESS) {
2017 if (m_status != 0) {
2018 m_status->result = translate_coi_error(res);
2019 return false;
2020 }
2021 report_coi_error(c_buf_create, res);
2022 }
2023
2024 m_compute_buffers.push_back(m_inout_buf);
2025 m_destroy_buffers.push_back(m_inout_buf);
2026 }
2027 }
2028
2029 // initialize function descriptor
2030 m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size +
2031 misc_data_size);
2032 m_func_desc->console_enabled = console_enabled;
2033 m_func_desc->timer_enabled =
2034 timer_enabled || (offload_report_level && offload_report_enabled);
2035 m_func_desc->offload_report_level = offload_report_level;
2036 m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
2037 m_func_desc->in_datalen = m_in_datalen;
2038 m_func_desc->out_datalen = m_out_datalen;
2039 m_func_desc->vars_num = m_vars_total;
2040 m_func_desc->data_offset = misc_data_offset;
2041
2042 // append entry name
2043 strcpy(m_func_desc->data, name);
2044 }
2045
2046 return true;
2047}
2048
2049bool OffloadDescriptor::wait_dependencies(
2050 const void **waits,
2051 int num_waits
2052)
2053{
2054 OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
2055 bool ret = true;
2056
2057 for (int i = 0; i < num_waits; i++) {
2058
2059 OffloadDescriptor *task = m_device.find_signal(waits[i], true);
2060 if (task == 0) {
2061 LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
2062 waits[i]);
2063 LIBOFFLOAD_ABORT;
2064 }
2065
2066 if (!task->offload_finish()) {
2067 ret = false;
2068 }
2069
2070 task->cleanup();
2071 delete task;
2072 }
2073
2074 return ret;
2075}
2076
2077bool OffloadDescriptor::offload(
2078 const char *name,
2079 bool is_empty,
2080 VarDesc *vars,
2081 VarDesc2 *vars2,
2082 int vars_total,
2083 const void **waits,
2084 int num_waits,
2085 const void **signal,
2086 int entry_id,
2087 const void *stack_addr
2088)
2089{
2090 if (signal == 0) {
2091 OFFLOAD_DEBUG_TRACE_1(1,
2092 GET_OFFLOAD_NUMBER(get_timer_data()),
2093 c_offload_init_func,
2094 "Offload function %s, is_empty=%d, #varDescs=%d, "
2095 "#waits=%d, signal=none\n",
2096 name, is_empty, vars_total, num_waits);
2097 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2098 c_offload_sent_pointer_data,
2099 "#Wait : %d \n", num_waits);
2100 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2101 c_offload_signal,
2102 "none %d\n", 0);
2103 }
2104 else {
2105 OFFLOAD_DEBUG_TRACE_1(1,
2106 GET_OFFLOAD_NUMBER(get_timer_data()),
2107 c_offload_init_func,
2108 "Offload function %s, is_empty=%d, #varDescs=%d, "
2109 "#waits=%d, signal=%p\n",
2110 name, is_empty, vars_total, num_waits,
2111 *signal);
2112
2113 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2114 c_offload_signal,
2115 "%d\n", signal);
2116 }
2117 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
2118 c_offload_wait,
2119 "#Wait : %d %p\n", num_waits, waits);
2120
2121 if (m_status != 0) {
2122 m_status->result = OFFLOAD_SUCCESS;
2123 m_status->device_number = m_device.get_logical_index();
2124 }
2125
2126 m_need_runfunction = !is_empty;
2127
2128 // wait for dependencies to finish
2129 if (!wait_dependencies(waits, num_waits)) {
2130 cleanup();
2131 return false;
2132 }
2133
2134 // setup buffers
2135 if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) {
2136 cleanup();
2137 return false;
2138 }
2139
2140 // initiate send for pointers. Want to do it as early as possible.
2141 if (!send_pointer_data(signal != 0)) {
2142 cleanup();
2143 return false;
2144 }
2145
2146 // setup misc data for run function
2147 if (!setup_misc_data(name)) {
2148 cleanup();
2149 return false;
2150 }
2151
2152 // gather copyin data into buffer
2153 if (!gather_copyin_data()) {
2154 cleanup();
2155 return false;
2156 }
2157
2158 // Start the computation
2159 if (!compute()) {
2160 cleanup();
2161 return false;
2162 }
2163
2164 // initiate receive for pointers
2165 if (!receive_pointer_data(signal != 0)) {
2166 cleanup();
2167 return false;
2168 }
2169
2170 // if there is a signal save descriptor for the later use.
2171 if (signal != 0) {
2172 m_device.add_signal(*signal, this);
2173 return true;
2174 }
2175
2176 // wait for the offload to finish.
2177 if (!offload_finish()) {
2178 cleanup();
2179 return false;
2180 }
2181
2182 cleanup();
2183 return true;
2184}
2185
2186bool OffloadDescriptor::offload_finish()
2187{
2188 COIRESULT res;
2189
2190 // wait for compute dependencies to become signaled
2191 if (m_in_deps_total > 0) {
2192 OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
2193
2194 if (__offload_active_wait) {
2195 // keep CPU busy
2196 do {
2197 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
2198 }
2199 while (res == COI_TIME_OUT_REACHED);
2200 }
2201 else {
2202 res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0);
2203 }
2204
2205 if (res != COI_SUCCESS) {
2206 if (m_status != 0) {
2207 m_status->result = translate_coi_error(res);
2208 return false;
2209 }
2210 report_coi_error(c_event_wait, res);
2211 }
2212 }
2213
2214 // scatter copyout data received from target
2215 if (!scatter_copyout_data()) {
2216 return false;
2217 }
2218 // wait for receive dependencies to become signaled
2219 if (m_out_deps_total > 0) {
2220 OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
2221
2222 if (__offload_active_wait) {
2223 // keep CPU busy
2224 do {
2225 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
2226 }
2227 while (res == COI_TIME_OUT_REACHED);
2228 }
2229 else {
2230 res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0);
2231 }
2232
2233 if (res != COI_SUCCESS) {
2234 if (m_status != 0) {
2235 m_status->result = translate_coi_error(res);
2236 return false;
2237 }
2238 report_coi_error(c_event_wait, res);
2239 }
2240 }
2241
2242 // destroy buffers
2243 {
2244 OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
2245
2246 for (BufferList::const_iterator it = m_destroy_buffers.begin();
2247 it != m_destroy_buffers.end(); it++) {
2248 res = COI::BufferDestroy(*it);
2249 if (res != COI_SUCCESS) {
2250 if (m_status != 0) {
2251 m_status->result = translate_coi_error(res);
2252 return false;
2253 }
2254 report_coi_error(c_buf_destroy, res);
2255 }
2256 }
2257 }
2258
2259 return true;
2260}
2261
2262void OffloadDescriptor::cleanup()
2263{
2264 // release device in orsl
2265 ORSL::release(m_device.get_logical_index());
2266
2267 OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload);
2268
2269 // report stuff
2270 Offload_Report_Epilog(get_timer_data());
2271}
2272
2273bool OffloadDescriptor::is_signaled()
2274{
2275 bool signaled = true;
2276 COIRESULT res;
2277
2278 // check compute and receive dependencies
2279 if (m_in_deps_total > 0) {
2280 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
2281 signaled = signaled && (res == COI_SUCCESS);
2282 }
2283 if (m_out_deps_total > 0) {
2284 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
2285 signaled = signaled && (res == COI_SUCCESS);
2286 }
2287
2288 return signaled;
2289}
2290
2291// Send pointer data if source or destination or both of them are
2292// noncontiguous. There is guarantee that length of destination enough for
Alp Tokerc2d5e612014-06-01 18:28:36 +00002293// transferred data.
Jim Cownie33f7b242014-04-09 15:40:23 +00002294bool OffloadDescriptor::send_noncontiguous_pointer_data(
2295 int i,
2296 PtrData* src_data,
2297 PtrData* dst_data,
2298 COIEVENT *event
2299 )
2300{
2301 int64_t offset_src, offset_dst;
2302 int64_t length_src, length_dst;
2303 int64_t length_src_cur, length_dst_cur;
2304 int64_t send_size, data_sent = 0;
2305 COIRESULT res;
2306 bool dst_is_empty = true;
2307 bool src_is_empty = true;
2308
2309 // Set length_src and length_dst
2310 length_src = (m_vars_extra[i].read_rng_src) ?
2311 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
2312 length_dst = !m_vars[i].into ? length_src :
2313 (m_vars_extra[i].read_rng_dst) ?
2314 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
2315 send_size = (length_src < length_dst) ? length_src : length_dst;
2316
2317 // consequently get contiguous ranges,
2318 // define corresponded destination offset and send data
2319 do {
2320 if (src_is_empty) {
2321 if (m_vars_extra[i].read_rng_src) {
2322 if (!get_next_range(m_vars_extra[i].read_rng_src,
2323 &offset_src)) {
2324 // source ranges are over - nothing to send
2325 break;
2326 }
2327 }
2328 else if (data_sent == 0) {
2329 offset_src = m_vars_extra[i].cpu_disp;
2330 }
2331 else {
2332 break;
2333 }
2334 length_src_cur = length_src;
2335 }
2336 else {
2337 // if source is contiguous or its contiguous range is greater
2338 // than destination one
2339 offset_src += send_size;
2340 }
2341 length_src_cur -= send_size;
2342 src_is_empty = length_src_cur == 0;
2343
2344 if (dst_is_empty) {
2345 if (m_vars[i].into) {
2346 if (m_vars_extra[i].read_rng_dst) {
2347 if (!get_next_range(m_vars_extra[i].read_rng_dst,
2348 &offset_dst)) {
2349 // destination ranges are over
2350 LIBOFFLOAD_ERROR(c_destination_is_over);
2351 return false;
2352 }
2353 }
2354 // into is contiguous.
2355 else {
2356 offset_dst = m_vars[i].disp;
2357 }
2358 length_dst_cur = length_dst;
2359 }
2360 // same as source
2361 else {
2362 offset_dst = offset_src;
2363 length_dst_cur = length_src;
2364 }
2365 }
2366 else {
2367 // if destination is contiguous or its contiguous range is greater
2368 // than source one
2369 offset_dst += send_size;
2370 }
2371 length_dst_cur -= send_size;
2372 dst_is_empty = length_dst_cur == 0;
2373
2374 if (src_data != 0 && src_data->cpu_buf != 0) {
2375 res = COI::BufferCopy(
2376 dst_data->mic_buf,
2377 src_data->cpu_buf,
2378 m_vars[i].mic_offset - dst_data->alloc_disp +
2379 m_vars[i].offset + offset_dst,
2380 m_vars_extra[i].cpu_offset + offset_src,
2381 send_size,
2382 COI_COPY_UNSPECIFIED,
2383 0, 0,
2384 event);
2385 if (res != COI_SUCCESS) {
2386 if (m_status != 0) {
2387 m_status->result = translate_coi_error(res);
2388 return false;
2389 }
2390 report_coi_error(c_buf_copy, res);
2391 }
2392 }
2393 else {
2394 char *base = offload_get_src_base(m_vars[i].ptr,
2395 m_vars[i].type.src);
2396
2397 res = COI::BufferWrite(
2398 dst_data->mic_buf,
2399 m_vars[i].mic_offset - dst_data->alloc_disp +
2400 m_vars[i].offset + offset_dst,
2401 base + offset_src,
2402 send_size,
2403 COI_COPY_UNSPECIFIED,
2404 0, 0,
2405 event);
2406 if (res != COI_SUCCESS) {
2407 if (m_status != 0) {
2408 m_status->result = translate_coi_error(res);
2409 return false;
2410 }
2411 report_coi_error(c_buf_write, res);
2412 }
2413 }
2414 data_sent += length_src;
2415 }
2416 while (true);
2417 return true;
2418}
2419
2420bool OffloadDescriptor::send_pointer_data(bool is_async)
2421{
2422 OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
2423
2424 uint64_t ptr_sent = 0;
2425 COIRESULT res;
2426
2427 // Initiate send for pointer data
2428 for (int i = 0; i < m_vars_total; i++) {
2429 switch (m_vars[i].type.dst) {
2430 case c_data_ptr_array:
2431 break;
2432 case c_data:
2433 case c_void_ptr:
2434 case c_cean_var:
2435 if (m_vars[i].direction.in &&
2436 m_vars[i].flags.is_static_dstn) {
2437 COIEVENT *event =
2438 (is_async ||
2439 m_vars[i].size >= __offload_use_async_buffer_write) ?
2440 &m_in_deps[m_in_deps_total++] : 0;
2441 PtrData* dst_data = m_vars[i].into ?
2442 m_vars_extra[i].dst_data :
2443 m_vars_extra[i].src_data;
2444 PtrData* src_data =
2445 VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
2446 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
2447 m_vars[i].flags.is_static ?
2448 m_vars_extra[i].src_data : 0;
2449
2450 if (m_vars[i].flags.is_noncont_src ||
2451 m_vars[i].flags.is_noncont_dst) {
2452 if (!send_noncontiguous_pointer_data(
2453 i, src_data, dst_data, event)) {
2454 return false;
2455 }
2456 }
2457 else if (src_data != 0 && src_data->cpu_buf != 0) {
2458 res = COI::BufferCopy(
2459 dst_data->mic_buf,
2460 src_data->cpu_buf,
2461 m_vars[i].mic_offset - dst_data->alloc_disp +
2462 m_vars[i].offset + m_vars[i].disp,
2463 m_vars_extra[i].cpu_offset +
2464 m_vars_extra[i].cpu_disp,
2465 m_vars[i].size,
2466 COI_COPY_UNSPECIFIED,
2467 0, 0,
2468 event);
2469 if (res != COI_SUCCESS) {
2470 if (m_status != 0) {
2471 m_status->result = translate_coi_error(res);
2472 return false;
2473 }
2474 report_coi_error(c_buf_copy, res);
2475 }
2476 }
2477 else {
2478 char *base = offload_get_src_base(m_vars[i].ptr,
2479 m_vars[i].type.src);
2480 res = COI::BufferWrite(
2481 dst_data->mic_buf,
2482 m_vars[i].mic_offset - dst_data->alloc_disp +
2483 m_vars[i].offset + m_vars[i].disp,
2484 base + m_vars_extra[i].cpu_disp,
2485 m_vars[i].size,
2486 COI_COPY_UNSPECIFIED,
2487 0, 0,
2488 event);
2489 if (res != COI_SUCCESS) {
2490 if (m_status != 0) {
2491 m_status->result = translate_coi_error(res);
2492 return false;
2493 }
2494 report_coi_error(c_buf_write, res);
2495 }
2496 }
2497 ptr_sent += m_vars[i].size;
2498 }
2499 break;
2500
2501 case c_string_ptr:
2502 case c_data_ptr:
2503 case c_cean_var_ptr:
2504 case c_dv_ptr:
2505 if (m_vars[i].direction.in && m_vars[i].size > 0) {
2506 COIEVENT *event =
2507 (is_async ||
2508 m_vars[i].size >= __offload_use_async_buffer_write) ?
2509 &m_in_deps[m_in_deps_total++] : 0;
2510 PtrData* dst_data = m_vars[i].into ?
2511 m_vars_extra[i].dst_data :
2512 m_vars_extra[i].src_data;
2513 PtrData* src_data =
2514 VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
2515 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
2516 m_vars[i].flags.is_static ?
2517 m_vars_extra[i].src_data : 0;
2518
2519 if (m_vars[i].flags.is_noncont_src ||
2520 m_vars[i].flags.is_noncont_dst) {
2521 send_noncontiguous_pointer_data(
2522 i, src_data, dst_data, event);
2523 }
2524 else if (src_data != 0 && src_data->cpu_buf != 0) {
2525 res = COI::BufferCopy(
2526 dst_data->mic_buf,
2527 src_data->cpu_buf,
2528 m_vars[i].mic_offset - dst_data->alloc_disp +
2529 m_vars[i].offset + m_vars[i].disp,
2530 m_vars_extra[i].cpu_offset +
2531 m_vars_extra[i].cpu_disp,
2532 m_vars[i].size,
2533 COI_COPY_UNSPECIFIED,
2534 0, 0,
2535 event);
2536 if (res != COI_SUCCESS) {
2537 if (m_status != 0) {
2538 m_status->result = translate_coi_error(res);
2539 return false;
2540 }
2541 report_coi_error(c_buf_copy, res);
2542 }
2543 }
2544 else {
2545 char *base = offload_get_src_base(m_vars[i].ptr,
2546 m_vars[i].type.src);
2547 res = COI::BufferWrite(
2548 dst_data->mic_buf,
2549 m_vars[i].mic_offset - dst_data->alloc_disp +
2550 m_vars[i].offset + m_vars[i].disp,
2551 base + m_vars_extra[i].cpu_disp,
2552 m_vars[i].size,
2553 COI_COPY_UNSPECIFIED,
2554 0, 0,
2555 event);
2556 if (res != COI_SUCCESS) {
2557 if (m_status != 0) {
2558 m_status->result = translate_coi_error(res);
2559 return false;
2560 }
2561 report_coi_error(c_buf_write, res);
2562 }
2563 }
2564
2565 ptr_sent += m_vars[i].size;
2566 }
2567 break;
2568
2569 case c_dv_data:
2570 case c_dv_ptr_data:
2571 if (m_vars[i].direction.in &&
2572 m_vars[i].size > 0) {
2573 PtrData *ptr_data = m_vars[i].into ?
2574 m_vars_extra[i].dst_data :
2575 m_vars_extra[i].src_data;
2576 PtrData* src_data = m_vars_extra[i].src_data;
2577
2578 COIEVENT *event =
2579 (is_async ||
2580 m_vars[i].size >= __offload_use_async_buffer_write) ?
2581 &m_in_deps[m_in_deps_total++] : 0;
2582
2583 if (m_vars[i].flags.is_noncont_src ||
2584 m_vars[i].flags.is_noncont_dst) {
2585 send_noncontiguous_pointer_data(
2586 i, src_data, ptr_data, event);
2587 }
2588 else if (src_data && src_data->cpu_buf != 0) {
2589 res = COI::BufferCopy(
2590 ptr_data->mic_buf,
2591 src_data->cpu_buf,
2592 m_vars[i].offset + ptr_data->mic_offset -
2593 ptr_data->alloc_disp +
2594 m_vars[i].disp,
2595 m_vars_extra[i].cpu_offset +
2596 m_vars_extra[i].cpu_disp,
2597 m_vars[i].size,
2598 COI_COPY_UNSPECIFIED,
2599 0, 0,
2600 event);
2601 if (res != COI_SUCCESS) {
2602 if (m_status != 0) {
2603 m_status->result = translate_coi_error(res);
2604 return false;
2605 }
2606 report_coi_error(c_buf_copy, res);
2607 }
2608 }
2609 else {
2610 char *base = offload_get_src_base(m_vars[i].ptr,
2611 m_vars[i].type.src);
2612 res = COI::BufferWrite(
2613 ptr_data->mic_buf,
2614 ptr_data->mic_offset - ptr_data->alloc_disp +
2615 m_vars[i].offset + m_vars[i].disp,
2616 base + m_vars_extra[i].cpu_disp,
2617 m_vars[i].size,
2618 COI_COPY_UNSPECIFIED,
2619 0, 0,
2620 event);
2621 if (res != COI_SUCCESS) {
2622 if (m_status != 0) {
2623 m_status->result = translate_coi_error(res);
2624 return false;
2625 }
2626 report_coi_error(c_buf_write, res);
2627 }
2628 }
2629 ptr_sent += m_vars[i].size;
2630 }
2631 break;
2632
2633 case c_dv_data_slice:
2634 case c_dv_ptr_data_slice:
2635 if (m_vars[i].direction.in &&
2636 m_vars[i].size > 0) {
2637 PtrData *dst_data = m_vars[i].into ?
2638 m_vars_extra[i].dst_data :
2639 m_vars_extra[i].src_data;
2640 PtrData* src_data =
2641 (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
2642 VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
2643 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
2644 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
2645 m_vars[i].flags.is_static) ?
2646 m_vars_extra[i].src_data : 0;
2647 COIEVENT *event =
2648 (is_async ||
2649 m_vars[i].size >= __offload_use_async_buffer_write) ?
2650 &m_in_deps[m_in_deps_total++] : 0;
2651 if (m_vars[i].flags.is_noncont_src ||
2652 m_vars[i].flags.is_noncont_dst) {
2653 send_noncontiguous_pointer_data(
2654 i, src_data, dst_data, event);
2655 }
2656 else if (src_data && src_data->cpu_buf != 0) {
2657 res = COI::BufferCopy(
2658 dst_data->mic_buf,
2659 src_data->cpu_buf,
2660 m_vars[i].offset - dst_data->alloc_disp +
2661 dst_data->mic_offset +
2662 m_vars[i].disp,
2663 m_vars_extra[i].cpu_offset +
2664 m_vars_extra[i].cpu_disp,
2665 m_vars[i].size,
2666 COI_COPY_UNSPECIFIED,
2667 0, 0,
2668 event);
2669 if (res != COI_SUCCESS) {
2670 if (m_status != 0) {
2671 m_status->result = translate_coi_error(res);
2672 return false;
2673 }
2674 report_coi_error(c_buf_copy, res);
2675 }
2676 }
2677 else {
2678 char *base = offload_get_src_base(m_vars[i].ptr,
2679 m_vars[i].type.src);
2680 res = COI::BufferWrite(
2681 dst_data->mic_buf,
2682 dst_data->mic_offset - dst_data->alloc_disp +
2683 m_vars[i].offset + m_vars[i].disp,
2684 base + m_vars_extra[i].cpu_disp,
2685 m_vars[i].size,
2686 COI_COPY_UNSPECIFIED,
2687 0, 0,
2688 event);
2689 if (res != COI_SUCCESS) {
2690 if (m_status != 0) {
2691 m_status->result = translate_coi_error(res);
2692 return false;
2693 }
2694 report_coi_error(c_buf_write, res);
2695 }
2696 }
2697
2698 ptr_sent += m_vars[i].size;
2699 }
2700 break;
2701
2702 default:
2703 break;
2704 }
2705
2706 // alloc field isn't used at target.
2707 // We can reuse it for offset of array pointers.
2708 if (m_vars_extra[i].is_arr_ptr_el) {
2709 m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
2710 }
2711 }
2712
2713 if (m_status) {
2714 m_status->data_sent += ptr_sent;
2715 }
2716
2717 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent);
2718 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
2719 c_offload_sent_pointer_data,
2720 "Total pointer data sent to target: [%lld] bytes\n",
2721 ptr_sent);
2722
2723 return true;
2724}
2725
2726bool OffloadDescriptor::gather_copyin_data()
2727{
2728 OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs);
2729
2730 if (m_need_runfunction && m_in_datalen > 0) {
2731 COIMAPINSTANCE map_inst;
2732 char *data;
2733
2734 // init marshaller
2735 if (m_inout_buf != 0) {
2736 OffloadTimer timer_map(get_timer_data(),
2737 c_offload_host_map_in_data_buffer);
2738
2739 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen,
2740 COI_MAP_WRITE_ENTIRE_BUFFER,
2741 0, 0, 0, &map_inst,
2742 reinterpret_cast<void**>(&data));
2743 if (res != COI_SUCCESS) {
2744 if (m_status != 0) {
2745 m_status->result = translate_coi_error(res);
2746 return false;
2747 }
2748 report_coi_error(c_buf_map, res);
2749 }
2750 }
2751 else {
2752 data = (char*) m_func_desc + m_func_desc->data_offset;
2753 }
2754
2755 // send variable descriptors
2756 memcpy(data, m_vars, m_vars_total * sizeof(VarDesc));
2757 data += m_vars_total * sizeof(VarDesc);
2758
2759 // init marshaller
2760 m_in.init_buffer(data, m_in_datalen);
2761
2762 // Gather copy data into buffer
2763 for (int i = 0; i < m_vars_total; i++) {
2764 bool src_is_for_mic = (m_vars[i].direction.out ||
2765 m_vars[i].into == NULL);
2766 PtrData* ptr_data = src_is_for_mic ?
2767 m_vars_extra[i].src_data :
2768 m_vars_extra[i].dst_data;
2769 if (m_vars[i].flags.alloc_disp) {
2770 m_in.send_data(&ptr_data->alloc_disp,
2771 sizeof(ptr_data->alloc_disp));
2772 }
2773
2774 // send sink address to the target
2775 if (m_vars[i].flags.sink_addr) {
2776 m_in.send_data(&ptr_data->mic_addr,
2777 sizeof(ptr_data->mic_addr));
2778 }
2779
2780 switch (m_vars[i].type.dst) {
2781 case c_data_ptr_array:
2782 break;
2783 case c_data:
2784 case c_void_ptr:
2785 case c_cean_var:
2786 if (m_vars[i].direction.in &&
2787 !m_vars[i].flags.is_static_dstn) {
2788
2789 char *ptr = offload_get_src_base(m_vars[i].ptr,
2790 m_vars[i].type.src);
2791 if (m_vars[i].type.dst == c_cean_var) {
2792 // offset and length are derived from the array
2793 // descriptor
2794 int64_t size = m_vars[i].size;
2795 int64_t disp = m_vars[i].disp;
2796 m_in.send_data(reinterpret_cast<char*>(&size),
2797 sizeof(int64_t));
2798 m_in.send_data(reinterpret_cast<char*>(&disp),
2799 sizeof(int64_t));
2800 }
2801
2802 m_in.send_data(ptr + m_vars_extra[i].cpu_disp,
2803 m_vars[i].size);
2804 }
2805 break;
2806
2807 case c_dv:
2808 if (m_vars[i].direction.bits ||
2809 m_vars[i].alloc_if ||
2810 m_vars[i].free_if) {
2811 // send dope vector excluding base
2812 char *ptr = static_cast<char*>(m_vars[i].ptr);
2813 m_in.send_data(ptr + sizeof(uint64_t),
2814 m_vars[i].size - sizeof(uint64_t));
2815 }
2816 break;
2817
2818 case c_data_ptr:
2819 // send to target addresses of obsolete
2820 // stacks to be released
2821 if (m_vars[i].flags.is_stack_buf &&
2822 !m_vars[i].direction.bits &&
2823 m_vars[i].alloc_if &&
2824 m_vars[i].size != 0) {
2825 for (PtrDataList::iterator it =
2826 m_destroy_stack.begin();
2827 it != m_destroy_stack.end(); it++) {
2828 PtrData * ptr_data = *it;
2829 m_in.send_data(&(ptr_data->mic_addr),
2830 sizeof(ptr_data->mic_addr));
2831 }
2832 }
2833 break;
2834 case c_func_ptr:
2835 if (m_vars[i].direction.in) {
2836 m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
2837 }
2838 break;
2839
2840 default:
2841 break;
2842 }
2843 }
2844
2845 if (m_status) {
2846 m_status->data_sent += m_in.get_tfr_size();
2847 }
2848
2849 if (m_func_desc->data_offset == 0) {
2850 OffloadTimer timer_unmap(get_timer_data(),
2851 c_offload_host_unmap_in_data_buffer);
2852 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
2853 if (res != COI_SUCCESS) {
2854 if (m_status != 0) {
2855 m_status->result = translate_coi_error(res);
2856 return false;
2857 }
2858 report_coi_error(c_buf_unmap, res);
2859 }
2860 }
2861 }
2862
2863 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size());
2864 OFFLOAD_DEBUG_TRACE_1(1,
2865 GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data,
2866 "Total copyin data sent to target: [%lld] bytes\n",
2867 m_in.get_tfr_size());
2868
2869 return true;
2870}
2871
2872bool OffloadDescriptor::compute()
2873{
2874 OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
2875
2876 if (m_need_runfunction) {
2877 OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()),
2878 c_offload_compute, "Compute task on MIC\n");
2879
2880 void* misc = m_func_desc;
2881 int misc_len = m_func_desc_size;
2882 void* ret = 0;
2883 int ret_len = 0;
2884
2885 if (m_func_desc->data_offset != 0) {
2886 misc_len += m_in_datalen;
2887
2888 if (m_out_datalen > 0) {
2889 ret = (char*) m_func_desc + m_func_desc->data_offset;
2890 ret_len = m_out_datalen;
2891 }
2892 }
2893
2894 // dispatch task
2895 COIRESULT res;
2896 COIEVENT event;
2897 res = m_device.compute(m_compute_buffers,
2898 misc, misc_len,
2899 ret, ret_len,
2900 m_in_deps_total,
2901 m_in_deps_total > 0 ? m_in_deps : 0,
2902 &event);
2903 if (res != COI_SUCCESS) {
2904 if (m_status != 0) {
2905 m_status->result = translate_coi_error(res);
2906 return false;
2907 }
2908 report_coi_error(c_pipeline_run_func, res);
2909 }
2910
2911 m_in_deps_total = 1;
2912 m_in_deps[0] = event;
2913 }
2914
2915 return true;
2916}
2917
Alp Tokerc2d5e612014-06-01 18:28:36 +00002918// receive pointer data if source or destination or both of them are
Jim Cownie33f7b242014-04-09 15:40:23 +00002919// noncontiguous. There is guarantee that length of destination enough for
Alp Tokerc2d5e612014-06-01 18:28:36 +00002920// transferred data.
2921bool OffloadDescriptor::receive_noncontiguous_pointer_data(
Jim Cownie33f7b242014-04-09 15:40:23 +00002922 int i,
2923 char* base,
2924 COIBUFFER dst_buf,
2925 COIEVENT *event
2926)
2927{
2928 int64_t offset_src, offset_dst;
2929 int64_t length_src, length_dst;
2930 int64_t length_src_cur, length_dst_cur;
Alp Tokerc2d5e612014-06-01 18:28:36 +00002931 int64_t receive_size, data_received = 0;
Jim Cownie33f7b242014-04-09 15:40:23 +00002932 COIRESULT res;
2933 bool dst_is_empty = true;
2934 bool src_is_empty = true;
2935
2936 // Set length_src and length_dst
2937 length_src = (m_vars_extra[i].read_rng_src) ?
2938 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
2939 length_dst = !m_vars[i].into ? length_src :
2940 (m_vars_extra[i].read_rng_dst) ?
2941 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
Alp Tokerc2d5e612014-06-01 18:28:36 +00002942 receive_size = (length_src < length_dst) ? length_src : length_dst;
Jim Cownie33f7b242014-04-09 15:40:23 +00002943
2944 // consequently get contiguous ranges,
Alp Tokerc2d5e612014-06-01 18:28:36 +00002945 // define corresponded destination offset and receive data
Jim Cownie33f7b242014-04-09 15:40:23 +00002946 do {
2947 // get sorce offset
2948 if (src_is_empty) {
2949 if (m_vars_extra[i].read_rng_src) {
2950 if (!get_next_range(m_vars_extra[i].read_rng_src,
2951 &offset_src)) {
2952 // source ranges are over - nothing to send
2953 break;
2954 }
2955 }
Alp Tokerc2d5e612014-06-01 18:28:36 +00002956 else if (data_received == 0) {
Jim Cownie33f7b242014-04-09 15:40:23 +00002957 offset_src = 0;
2958 }
2959 else {
2960 break;
2961 }
2962 length_src_cur = length_src;
2963 }
2964 else {
2965 // if source is contiguous or its contiguous range is greater
2966 // than destination one
Alp Tokerc2d5e612014-06-01 18:28:36 +00002967 offset_src += receive_size;
Jim Cownie33f7b242014-04-09 15:40:23 +00002968 }
Alp Tokerc2d5e612014-06-01 18:28:36 +00002969 length_src_cur -= receive_size;
Jim Cownie33f7b242014-04-09 15:40:23 +00002970 src_is_empty = length_src_cur == 0;
2971
2972 // get destination offset
2973 if (dst_is_empty) {
2974 if (m_vars[i].into) {
2975 if (m_vars_extra[i].read_rng_dst) {
2976 if (!get_next_range(m_vars_extra[i].read_rng_dst,
2977 &offset_dst)) {
2978 // destination ranges are over
2979 LIBOFFLOAD_ERROR(c_destination_is_over);
2980 return false;
2981 }
2982 }
2983 // destination is contiguous.
2984 else {
2985 offset_dst = m_vars_extra[i].cpu_disp;
2986 }
2987 length_dst_cur = length_dst;
2988 }
2989 // same as source
2990 else {
2991 offset_dst = offset_src;
2992 length_dst_cur = length_src;
2993 }
2994 }
2995 else {
2996 // if destination is contiguous or its contiguous range is greater
2997 // than source one
Alp Tokerc2d5e612014-06-01 18:28:36 +00002998 offset_dst += receive_size;
Jim Cownie33f7b242014-04-09 15:40:23 +00002999 }
Alp Tokerc2d5e612014-06-01 18:28:36 +00003000 length_dst_cur -= receive_size;
Jim Cownie33f7b242014-04-09 15:40:23 +00003001 dst_is_empty = length_dst_cur == 0;
3002
3003 if (dst_buf != 0) {
3004 res = COI::BufferCopy(
3005 dst_buf,
3006 m_vars_extra[i].src_data->mic_buf,
3007 m_vars_extra[i].cpu_offset + offset_dst,
3008 m_vars[i].offset + offset_src +
3009 m_vars[i].mic_offset -
3010 m_vars_extra[i].src_data->alloc_disp,
Alp Tokerc2d5e612014-06-01 18:28:36 +00003011 receive_size,
Jim Cownie33f7b242014-04-09 15:40:23 +00003012 COI_COPY_UNSPECIFIED,
3013 m_in_deps_total,
3014 m_in_deps_total > 0 ? m_in_deps : 0,
3015 event);
3016 if (res != COI_SUCCESS) {
3017 if (m_status != 0) {
3018 m_status->result = translate_coi_error(res);
3019 return false;
3020 }
3021 report_coi_error(c_buf_copy, res);
3022 }
3023 }
3024 else {
3025 res = COI::BufferRead(
3026 m_vars_extra[i].src_data->mic_buf,
3027 m_vars[i].offset + offset_src +
3028 m_vars[i].mic_offset -
3029 m_vars_extra[i].src_data->alloc_disp,
3030 base + offset_dst,
Alp Tokerc2d5e612014-06-01 18:28:36 +00003031 receive_size,
Jim Cownie33f7b242014-04-09 15:40:23 +00003032 COI_COPY_UNSPECIFIED,
3033 m_in_deps_total,
3034 m_in_deps_total > 0 ? m_in_deps : 0,
3035 event);
3036 if (res != COI_SUCCESS) {
3037 if (m_status != 0) {
3038 m_status->result = translate_coi_error(res);
3039 return false;
3040 }
3041 report_coi_error(c_buf_read, res);
3042 }
3043 }
Alp Tokerc2d5e612014-06-01 18:28:36 +00003044 data_received += receive_size;
Jim Cownie33f7b242014-04-09 15:40:23 +00003045 }
3046 while (true);
3047 return true;
3048}
3049
3050bool OffloadDescriptor::receive_pointer_data(bool is_async)
3051{
3052 OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
3053
3054 uint64_t ptr_received = 0;
3055 COIRESULT res;
3056
3057 for (int i = 0; i < m_vars_total; i++) {
3058 switch (m_vars[i].type.src) {
3059 case c_data_ptr_array:
3060 break;
3061 case c_data:
3062 case c_void_ptr:
3063 case c_cean_var:
3064 if (m_vars[i].direction.out &&
3065 m_vars[i].flags.is_static) {
3066 COIEVENT *event =
3067 (is_async ||
3068 m_in_deps_total > 0 ||
3069 m_vars[i].size >= __offload_use_async_buffer_read) ?
3070 &m_out_deps[m_out_deps_total++] : 0;
3071 PtrData *ptr_data = NULL;
3072 COIBUFFER dst_buf = NULL; // buffer at host
3073 char *base;
3074
3075 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
3076 ptr_data = m_vars[i].into ?
3077 m_vars_extra[i].dst_data :
3078 m_vars_extra[i].src_data;
3079 }
3080 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
3081 if (m_vars[i].flags.is_static_dstn) {
3082 ptr_data = m_vars[i].into ?
3083 m_vars_extra[i].dst_data :
3084 m_vars_extra[i].src_data;
3085 }
3086 }
3087 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
3088 if (dst_buf == NULL) {
3089 base = offload_get_src_base(
3090 m_vars[i].into ?
3091 static_cast<char*>(m_vars[i].into) :
3092 static_cast<char*>(m_vars[i].ptr),
3093 m_vars[i].type.dst);
3094 }
3095
3096 if (m_vars[i].flags.is_noncont_src ||
3097 m_vars[i].flags.is_noncont_dst) {
Alp Tokerc2d5e612014-06-01 18:28:36 +00003098 receive_noncontiguous_pointer_data(
Jim Cownie33f7b242014-04-09 15:40:23 +00003099 i, base, dst_buf, event);
3100 }
3101 else if (dst_buf != 0) {
3102 res = COI::BufferCopy(
3103 dst_buf,
3104 m_vars_extra[i].src_data->mic_buf,
3105 m_vars_extra[i].cpu_offset +
3106 m_vars_extra[i].cpu_disp,
3107 m_vars[i].offset + m_vars[i].disp,
3108 m_vars[i].size,
3109 COI_COPY_UNSPECIFIED,
3110 m_in_deps_total,
3111 m_in_deps_total > 0 ? m_in_deps : 0,
3112 event);
3113 if (res != COI_SUCCESS) {
3114 if (m_status != 0) {
3115 m_status->result = translate_coi_error(res);
3116 return false;
3117 }
3118 report_coi_error(c_buf_copy, res);
3119 }
3120 }
3121 else {
3122 res = COI::BufferRead(
3123 m_vars_extra[i].src_data->mic_buf,
3124 m_vars[i].offset + m_vars[i].disp,
3125 base + m_vars_extra[i].cpu_offset +
3126 m_vars_extra[i].cpu_disp,
3127 m_vars[i].size,
3128 COI_COPY_UNSPECIFIED,
3129 m_in_deps_total,
3130 m_in_deps_total > 0 ? m_in_deps : 0,
3131 event);
3132 if (res != COI_SUCCESS) {
3133 if (m_status != 0) {
3134 m_status->result = translate_coi_error(res);
3135 return false;
3136 }
3137 report_coi_error(c_buf_read, res);
3138 }
3139 }
3140 ptr_received += m_vars[i].size;
3141 }
3142 break;
3143
3144 case c_string_ptr:
3145 case c_data_ptr:
3146 case c_cean_var_ptr:
3147 case c_dv_data:
3148 case c_dv_ptr_data:
3149 case c_dv_data_slice:
3150 case c_dv_ptr_data_slice:
3151 case c_dv_ptr: {
3152 COIBUFFER dst_buf = NULL; // buffer on host
3153 if (m_vars[i].direction.out && m_vars[i].size > 0) {
3154 COIEVENT *event =
3155 (is_async ||
3156 m_in_deps_total > 0 ||
3157 m_vars[i].size >= __offload_use_async_buffer_read) ?
3158 &m_out_deps[m_out_deps_total++] : 0;
3159
3160 uint64_t dst_offset = 0;
3161 char *base = static_cast<char*>(m_vars[i].ptr);
3162
3163 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
3164 PtrData *ptr_data = m_vars[i].into ?
3165 m_vars_extra[i].dst_data :
3166 m_vars_extra[i].src_data;
3167 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
3168 if (dst_buf == NULL) {
3169 base = m_vars[i].into ?
3170 *static_cast<char**>(m_vars[i].into) :
3171 *static_cast<char**>(m_vars[i].ptr);
3172 }
3173 dst_offset = m_vars_extra[i].cpu_offset +
3174 m_vars_extra[i].cpu_disp;
3175 }
3176 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
3177 if (m_vars[i].flags.is_static_dstn) {
3178 dst_buf = m_vars[i].into ?
3179 m_vars_extra[i].dst_data->cpu_buf :
3180 m_vars_extra[i].src_data->cpu_buf;
3181 }
3182 if (dst_buf == NULL) {
3183 base = offload_get_src_base(
3184 m_vars[i].into ?
3185 static_cast<char*>(m_vars[i].into) :
3186 static_cast<char*>(m_vars[i].ptr),
3187 m_vars[i].type.dst);
3188 }
3189 dst_offset = m_vars_extra[i].cpu_offset +
3190 m_vars_extra[i].cpu_disp;
3191 }
3192 else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
3193 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
3194 PtrData *ptr_data = m_vars[i].into != 0 ?
3195 m_vars_extra[i].dst_data :
3196 m_vars_extra[i].src_data;
3197 dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0;
3198 if (dst_buf == NULL) {
3199 base = offload_get_src_base(
3200 m_vars[i].into ?
3201 static_cast<char*>(m_vars[i].into) :
3202 static_cast<char*>(m_vars[i].ptr),
3203 m_vars[i].type.dst);
3204
3205 }
3206 dst_offset = m_vars_extra[i].cpu_offset +
3207 m_vars_extra[i].cpu_disp;
3208 }
3209
3210 if (m_vars[i].flags.is_noncont_src ||
3211 m_vars[i].flags.is_noncont_dst) {
Alp Tokerc2d5e612014-06-01 18:28:36 +00003212 receive_noncontiguous_pointer_data(
Jim Cownie33f7b242014-04-09 15:40:23 +00003213 i, base, dst_buf, event);
3214 }
3215 else if (dst_buf != 0) {
3216 res = COI::BufferCopy(
3217 dst_buf,
3218 m_vars_extra[i].src_data->mic_buf,
3219 dst_offset,
3220 m_vars[i].offset + m_vars[i].disp +
3221 m_vars[i].mic_offset -
3222 m_vars_extra[i].src_data->alloc_disp,
3223 m_vars[i].size,
3224 COI_COPY_UNSPECIFIED,
3225 m_in_deps_total,
3226 m_in_deps_total > 0 ? m_in_deps : 0,
3227 event);
3228 if (res != COI_SUCCESS) {
3229 if (m_status != 0) {
3230 m_status->result = translate_coi_error(res);
3231 return false;
3232 }
3233 report_coi_error(c_buf_copy, res);
3234 }
3235 }
3236 else {
3237 res = COI::BufferRead(
3238 m_vars_extra[i].src_data->mic_buf,
3239 m_vars[i].offset + m_vars[i].disp +
3240 m_vars[i].mic_offset -
3241 m_vars_extra[i].src_data->alloc_disp,
3242 base + dst_offset,
3243 m_vars[i].size,
3244 COI_COPY_UNSPECIFIED,
3245 m_in_deps_total,
3246 m_in_deps_total > 0 ? m_in_deps : 0,
3247 event);
3248 if (res != COI_SUCCESS) {
3249 if (m_status != 0) {
3250 m_status->result = translate_coi_error(res);
3251 return false;
3252 }
3253 report_coi_error(c_buf_read, res);
3254 }
3255 }
3256 ptr_received += m_vars[i].size;
3257 }
3258 break;
3259 }
3260
3261 default:
3262 break;
3263 }
3264
3265 // destroy buffers for obsolete stacks
3266 if (m_destroy_stack.size() != 0) {
3267 for (PtrDataList::iterator it = m_destroy_stack.begin();
3268 it != m_destroy_stack.end(); it++) {
3269 PtrData *ptr_data = *it;
3270 m_destroy_buffers.push_back(ptr_data->mic_buf);
3271 OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n",
3272 ptr_data->mic_addr);
3273 }
3274 m_destroy_stack.clear();
3275 }
3276 if (m_vars[i].free_if) {
3277 // remove association for automatic variables
3278 if (m_is_openmp && !m_vars[i].flags.is_static &&
3279 (m_vars[i].type.src == c_data ||
3280 m_vars[i].type.src == c_void_ptr ||
3281 m_vars[i].type.src == c_cean_var)) {
3282 AutoData *auto_data = m_vars_extra[i].auto_data;
3283 if (auto_data != 0 && auto_data->remove_reference() == 0) {
3284 m_device.remove_auto_data(auto_data->cpu_addr.start());
3285 }
3286 }
3287
3288 // destroy buffers
3289 if (m_vars[i].direction.out || m_vars[i].into == NULL) {
3290 if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
3291 !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
3292 !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
3293 continue;
3294 }
3295
3296 PtrData *ptr_data = m_vars_extra[i].src_data;
3297 if (ptr_data->remove_reference() == 0) {
3298 // destroy buffers
3299 if (ptr_data->cpu_buf != 0) {
3300 m_destroy_buffers.push_back(ptr_data->cpu_buf);
3301 }
3302 if (ptr_data->mic_buf != 0) {
3303 m_destroy_buffers.push_back(ptr_data->mic_buf);
3304 }
3305 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
3306 ptr_data->cpu_addr.start());
3307
3308 // remove association from map
3309 m_device.remove_ptr_data(ptr_data->cpu_addr.start());
3310 }
3311 }
3312 else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
3313 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
3314 VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
3315 PtrData *ptr_data = m_vars_extra[i].dst_data;
3316 if (ptr_data->remove_reference() == 0) {
3317 // destroy buffers
3318 if (ptr_data->cpu_buf != 0) {
3319 m_destroy_buffers.push_back(ptr_data->cpu_buf);
3320 }
3321 if (ptr_data->mic_buf != 0) {
3322 m_destroy_buffers.push_back(ptr_data->mic_buf);
3323 }
3324 OFFLOAD_TRACE(3, "Removing association for addr %p\n",
3325 ptr_data->cpu_addr.start());
3326
3327 // remove association from map
3328 m_device.remove_ptr_data(ptr_data->cpu_addr.start());
3329 }
3330 }
3331 }
3332 }
3333
3334 if (m_status) {
3335 m_status->data_received += ptr_received;
3336 }
3337
3338 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
3339 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
3340 c_offload_received_pointer_data,
3341 "Total pointer data received from target: [%lld] bytes\n",
3342 ptr_received);
3343
3344 return true;
3345}
3346
3347bool OffloadDescriptor::scatter_copyout_data()
3348{
3349 OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs);
3350
3351 if (m_need_runfunction && m_out_datalen > 0) {
3352
3353 // total size that need to be transferred from target to host
3354 COIMAPINSTANCE map_inst;
3355 COIRESULT res;
3356 char *data;
3357
3358 // output data buffer
3359 if (m_func_desc->data_offset == 0) {
3360 OffloadTimer timer_map(get_timer_data(),
3361 c_offload_host_map_out_data_buffer);
3362
3363 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen,
3364 COI_MAP_READ_ONLY, 0, 0, 0,
3365 &map_inst,
3366 reinterpret_cast<void**>(&data));
3367 if (res != COI_SUCCESS) {
3368 if (m_status != 0) {
3369 m_status->result = translate_coi_error(res);
3370 return false;
3371 }
3372 report_coi_error(c_buf_map, res);
3373 }
3374 }
3375 else {
3376 data = (char*) m_func_desc + m_func_desc->data_offset;
3377 }
3378
3379 // get timing data
3380 OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data);
3381 data += OFFLOAD_TIMER_DATALEN();
3382
3383 // initialize output marshaller
3384 m_out.init_buffer(data, m_out_datalen);
3385
3386 for (int i = 0; i < m_vars_total; i++) {
3387 switch (m_vars[i].type.src) {
3388 case c_data_ptr_array:
3389 break;
3390 case c_data:
3391 case c_void_ptr:
3392 case c_cean_var:
3393 if (m_vars[i].direction.out &&
3394 !m_vars[i].flags.is_static) {
3395
3396 if (m_vars[i].into) {
3397 char *ptr = offload_get_src_base(
3398 static_cast<char*>(m_vars[i].into),
3399 m_vars[i].type.dst);
3400 m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
3401 m_vars[i].size);
3402 }
3403 else {
3404 m_out.receive_data(
3405 static_cast<char*>(m_vars[i].ptr) +
3406 m_vars_extra[i].cpu_disp,
3407 m_vars[i].size);
3408 }
3409 }
3410 break;
3411
3412 case c_func_ptr:
3413 if (m_vars[i].direction.out) {
3414 m_out.receive_func_ptr((const void**) m_vars[i].ptr);
3415 }
3416 break;
3417
3418 default:
3419 break;
3420 }
3421 }
3422
3423 if (m_status) {
3424 m_status->data_received += m_out.get_tfr_size();
3425 }
3426
3427 if (m_func_desc->data_offset == 0) {
3428 OffloadTimer timer_unmap(get_timer_data(),
3429 c_offload_host_unmap_out_data_buffer);
3430
3431 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
3432 if (res != COI_SUCCESS) {
3433 if (m_status != 0) {
3434 m_status->result = translate_coi_error(res);
3435 return false;
3436 }
3437 report_coi_error(c_buf_unmap, res);
3438 }
3439 }
3440 }
3441
3442 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size());
3443 OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n",
3444 m_out.get_tfr_size());
3445
3446 return true;
3447}
3448
3449void get_arr_desc_numbers(
3450 const arr_desc *ap,
3451 int64_t el_size,
3452 int64_t &offset,
3453 int64_t &size,
3454 int &el_number,
3455 CeanReadRanges* &ptr_ranges
3456)
3457{
3458 if (is_arr_desc_contiguous(ap)) {
3459 ptr_ranges = NULL;
3460 __arr_data_offset_and_length(ap, offset, size);
3461 el_number = size / el_size;
3462 }
3463 else {
3464 ptr_ranges = init_read_ranges_arr_desc(ap);
3465 el_number = (ptr_ranges->range_size / el_size) *
3466 ptr_ranges->range_max_number;
3467 size = ptr_ranges->range_size;
3468 }
3469}
3470
3471arr_desc * make_arr_desc(
3472 void* ptr_val,
3473 int64_t extent_start_val,
3474 int64_t extent_elements_val,
3475 int64_t size
3476)
3477{
3478 arr_desc *res;
3479 res = (arr_desc *)malloc(sizeof(arr_desc));
3480 res->base = reinterpret_cast<int64_t>(ptr_val);
3481 res->rank = 1;
3482 res->dim[0].size = size;
3483 res->dim[0].lindex = 0;
3484 res->dim[0].lower = extent_start_val;
3485 res->dim[0].upper = extent_elements_val + extent_start_val - 1;
3486 res->dim[0].stride = 1;
3487 return res;
3488}
3489
3490bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
3491{
3492 int pointers_number;
3493 int tmp_val;
3494 int new_index = m_vars_total;
3495 const arr_desc *ap;
3496 const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
3497 int flags = vd3->array_fields;
3498 bool src_is_for_mic = (m_vars[i].direction.out ||
3499 m_vars[i].into == NULL);
3500
3501 ReadArrElements<void *> ptr;
3502 ReadArrElements<void *> into;
3503 ReadArrElements<int64_t> ext_start;
3504 ReadArrElements<int64_t> ext_elements;
3505 ReadArrElements<int64_t> align;
3506 ReadArrElements<int64_t> alloc_if;
3507 ReadArrElements<int64_t> free_if;
3508 ReadArrElements<int64_t> into_start;
3509 ReadArrElements<int64_t> into_elem;
3510 ReadArrElements<int64_t> alloc_start;
3511 ReadArrElements<int64_t> alloc_elem;
3512
3513
3514 ap = static_cast<const arr_desc*>(vd3->ptr_array);
3515
Alp Tokerc2d5e612014-06-01 18:28:36 +00003516 // "pointers_number" for total number of transferred pointers.
Jim Cownie33f7b242014-04-09 15:40:23 +00003517 // For each of them we create new var_desc and put it at the bottom
3518 // of the var_desc's array
3519 get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
3520 pointers_number, ptr.ranges);
3521 ptr.base = reinterpret_cast<char*>(ap->base);
3522
3523 // 2. prepare memory for new var_descs
3524 m_vars_total += pointers_number;
3525 m_vars = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc));
3526 m_vars_extra =
3527 (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra));
3528 m_in_deps =
3529 (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1));
3530 m_out_deps =
3531 (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total);
3532
3533 // 3. Prepare for reading new var_desc's fields
3534 // EXTENT START
3535 if ((flags & (1<<flag_extent_start_is_array)) != 0) {
3536 ap = static_cast<const arr_desc*>(vd3->extent_start);
3537 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
3538 ext_start.size, tmp_val, ext_start.ranges);
3539 ext_start.base = reinterpret_cast<char*>(ap->base);
3540 ext_start.el_size = ap->dim[ap->rank - 1].size;
3541
3542 if (tmp_val < pointers_number) {
3543 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
3544 return false;
3545 }
3546 }
3547 else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) {
3548 ext_start.val = (int64_t)vd3->extent_start;
3549 }
3550 else {
3551 ext_start.val = 0;
3552 }
3553
3554 // EXTENT ELEMENTS NUMBER
3555 if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
3556 ap = static_cast<const arr_desc*>(vd3->extent_elements);
3557 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
3558 ext_elements.offset, ext_elements.size,
3559 tmp_val, ext_elements.ranges);
3560 ext_elements.base = reinterpret_cast<char*>(ap->base);
3561 ext_elements.el_size = ap->dim[ap->rank - 1].size;
3562
3563 if (tmp_val < pointers_number) {
3564 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
3565 return false;
3566 }
3567 }
3568 else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) {
3569 ext_elements.val = (int64_t)vd3->extent_elements;
3570 }
3571 else {
3572 ext_elements.val = m_vars[i].count;
3573 }
3574
3575 // ALLOC_IF
3576 if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
3577 ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
3578 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
3579 alloc_if.size, tmp_val, alloc_if.ranges);
3580 alloc_if.base = reinterpret_cast<char*>(ap->base);
3581 alloc_if.el_size = ap->dim[ap->rank - 1].size;
3582
3583 if (tmp_val < pointers_number) {
3584 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
3585 return false;
3586 }
3587 }
3588 else {
3589 alloc_if.val = m_vars[i].count;
3590 }
3591
3592 // FREE_IF
3593 if ((flags & (1<<flag_free_if_is_array)) != 0) {
3594 ap = static_cast<const arr_desc*>(vd3->free_if_array);
3595 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
3596 free_if.size, tmp_val, free_if.ranges);
3597 free_if.base = reinterpret_cast<char*>(ap->base);
3598 free_if.el_size = ap->dim[ap->rank - 1].size;
3599
3600 if (tmp_val < pointers_number) {
3601 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
3602 return false;
3603 }
3604 }
3605 else {
3606 free_if.val = m_vars[i].count;
3607 }
3608
3609 // ALIGN
3610
3611 if ((flags & (1<<flag_align_is_array)) != 0) {
3612 ap = static_cast<const arr_desc*>(vd3->align_array);
3613 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
3614 align.size, tmp_val, align.ranges);
3615 align.base = reinterpret_cast<char*>(ap->base);
3616 align.el_size = ap->dim[ap->rank - 1].size;
3617
3618 if (tmp_val < pointers_number) {
3619 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
3620 return false;
3621 }
3622 }
3623 else {
3624 align.val = m_vars[i].align;
3625 }
3626
3627 // 3.1 INTO
3628
3629 if (m_vars[i].into) {
3630 ap = static_cast<const arr_desc*>(m_vars[i].into);
3631 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
3632 into.size, tmp_val, into.ranges);
3633 into.base = reinterpret_cast<char*>(ap->base);
3634
3635 if (tmp_val < pointers_number) {
3636 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
3637 return false;
3638 }
3639 }
3640
3641 // 3.2 INTO_START
3642
3643 if ((flags & (1<<flag_into_start_is_array)) != 0) {
3644 ap = static_cast<const arr_desc*>(vd3->into_start);
3645 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
3646 into_start.size, tmp_val, into_start.ranges);
3647 into_start.base = reinterpret_cast<char*>(ap->base);
3648 into_start.el_size = ap->dim[ap->rank - 1].size;
3649
3650 if (tmp_val < pointers_number) {
3651 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
3652 return false;
3653 }
3654 }
3655 else if ((flags & (1<<flag_into_start_is_scalar)) != 0) {
3656 into_start.val = (int64_t)vd3->into_start;
3657 }
3658 else {
3659 into_start.val = 0;
3660 }
3661
3662 // 3.3 INTO_ELEMENTS
3663
3664 if ((flags & (1<<flag_into_elements_is_array)) != 0) {
3665 ap = static_cast<const arr_desc*>(vd3->into_elements);
3666 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
3667 into_elem.size, tmp_val, into_elem.ranges);
3668 into_elem.base = reinterpret_cast<char*>(ap->base);
3669 into_elem.el_size = ap->dim[ap->rank - 1].size;
3670
3671 if (tmp_val < pointers_number) {
3672 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
3673 return false;
3674 }
3675 }
3676 else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) {
3677 into_elem.val = (int64_t)vd3->into_elements;
3678 }
3679 else {
3680 into_elem.val = m_vars[i].count;
3681 }
3682
3683 // alloc_start
3684
3685 if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
3686 ap = static_cast<const arr_desc*>(vd3->alloc_start);
3687 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
3688 alloc_start.offset, alloc_start.size, tmp_val,
3689 alloc_start.ranges);
3690 alloc_start.base = reinterpret_cast<char*>(ap->base);
3691 alloc_start.el_size = ap->dim[ap->rank - 1].size;
3692
3693 if (tmp_val < pointers_number) {
3694 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
3695 return false;
3696 }
3697 }
3698 else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) {
3699 alloc_start.val = (int64_t)vd3->alloc_start;
3700 }
3701 else {
3702 alloc_start.val = 0;
3703 }
3704
3705 // alloc_elem
3706
3707 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
3708 ap = static_cast<const arr_desc*>(vd3->alloc_elements);
3709 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
3710 alloc_elem.size, tmp_val, alloc_elem.ranges);
3711 alloc_elem.base = reinterpret_cast<char*>(ap->base);
3712 alloc_elem.el_size = ap->dim[ap->rank - 1].size;
3713 if (tmp_val < pointers_number) {
3714 LIBOFFLOAD_ERROR(c_pointer_array_mismatch,
3715 "alloc_extent elements");
3716 return false;
3717 }
3718 }
3719 else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) {
3720 alloc_elem.val = (int64_t)vd3->alloc_elements;
3721 }
3722 else {
3723 alloc_elem.val = 0;
3724 }
3725
3726 for (int k = 0; k < pointers_number; k++) {
3727 int type = flags & 0x3f;
3728 int type_src, type_dst;
3729 // Get new values
3730 // type_src, type_dst
3731 type_src = type_dst = (type == c_data_ptr_array) ?
3732 c_data_ptr : (type == c_func_ptr_array) ?
3733 c_func_ptr : (type == c_void_ptr_array) ?
3734 c_void_ptr : (type == c_string_ptr_array) ?
3735 c_string_ptr : 0;
3736
3737 // Get ptr val
3738 if (!ptr.read_next(true)) {
3739 break;
3740 }
3741 else {
3742 ptr.val = (void*)(ptr.base + ptr.offset);
3743 }
3744
3745 // !!! If we got error at phase of reading - it's an internal
3746 // !!! error, as we must detect mismatch before
3747
3748 // Get into val
3749 if (m_vars[i].into) {
3750 if (!into.read_next(true)) {
3751 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
3752 LIBOFFLOAD_ABORT;
3753 }
3754 else {
3755 into.val = (void*)(into.base + into.offset);
3756 }
3757 }
3758
3759 // Get other components of the clause
3760 if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) {
3761 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
3762 LIBOFFLOAD_ABORT;
3763 }
3764 if (!ext_elements.read_next(
3765 flags & (1<<flag_extent_elements_is_array))) {
3766 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
3767 LIBOFFLOAD_ABORT;
3768 }
3769 if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) {
3770 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
3771 LIBOFFLOAD_ABORT;
3772 }
3773 if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) {
3774 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
3775 LIBOFFLOAD_ABORT;
3776 }
3777 if (!align.read_next(flags & (1<<flag_align_is_array))) {
3778 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
3779 LIBOFFLOAD_ABORT;
3780 }
3781 if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) {
3782 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
3783 LIBOFFLOAD_ABORT;
3784 }
3785 if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) {
3786 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
3787 LIBOFFLOAD_ABORT;
3788 }
3789 if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) {
3790 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
3791 LIBOFFLOAD_ABORT;
3792 }
3793 if (!alloc_elem.read_next(
3794 flags & (1<<flag_alloc_elements_is_array))) {
3795 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements");
3796 LIBOFFLOAD_ABORT;
3797 }
3798
3799 m_vars[new_index + k].direction.bits = m_vars[i].direction.bits;
3800 m_vars[new_index + k].alloc_if = alloc_if.val;
3801 m_vars[new_index + k].free_if = free_if.val;
3802 m_vars[new_index + k].align = align.val;
3803 m_vars[new_index + k].mic_offset = 0;
3804 m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
3805 m_vars[new_index + k].offset = 0;
3806 m_vars[new_index + k].size = m_vars[i].size;
3807
3808 if (ext_start.val == 0) {
3809 m_vars[new_index + k].count = ext_elements.val;
3810 m_vars[new_index + k].ptr = ptr.val;
3811 if (type_src == c_string_ptr) {
3812 m_vars[new_index + k].size = 0;
3813 }
3814 }
3815 else {
3816 m_vars[new_index + k].count = 0;
3817 m_vars[new_index + k].ptr =
3818 static_cast<void*>(make_arr_desc(
3819 ptr.val,
3820 ext_start.val,
3821 ext_elements.val,
3822 m_vars[i].size));
3823
3824 type_src = type_src == c_data_ptr ? c_cean_var_ptr :
3825 c_string_ptr ? c_cean_var_ptr :
3826 type_src;
3827 if (!m_vars[i].into) {
3828 type_dst = type_src;
3829 }
3830 }
3831
3832 if (m_vars[i].into && into_elem.val != 0) {
3833 m_vars[new_index + k].into =
3834 static_cast<void*>(make_arr_desc(
3835 into.val,
3836 into_start.val,
3837 into_elem.val,
3838 m_vars[i].size));
3839 type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr :
3840 (type == c_string_ptr_array) ? c_cean_var_ptr :
3841 type_src;
3842 }
3843 else {
3844 m_vars[new_index + k].into = NULL;
3845 }
3846
3847 if (alloc_elem.val != 0) {
3848 m_vars[new_index + k].alloc =
3849 static_cast<void*>(make_arr_desc(
3850 ptr.val,
3851 alloc_start.val,
3852 alloc_elem.val,
3853 m_vars[i].size));
3854 }
3855 else {
3856 m_vars[new_index + k].alloc = NULL;
3857 }
3858
3859 m_vars[new_index + k].type.src = type_src;
3860 m_vars[new_index + k].type.dst = type_dst;
3861
3862 m_vars_extra[new_index + k].is_arr_ptr_el = 1;
3863 m_vars_extra[new_index + k].ptr_arr_offset =
3864 src_is_for_mic ? ptr.offset : into.offset;
3865 }
3866 // count and alloc fields are useless at target. They can be reused
3867 // for pointer arrays.
3868 m_vars[i].count = pointers_number;
3869 m_vars[i].ptr_arr_offset = new_index;
3870 return true;
3871}
3872
3873static void __offload_fini_library(void)
3874{
3875 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
3876 if (mic_engines_total > 0) {
3877 delete[] mic_engines;
3878
3879 if (mic_proxy_fs_root != 0) {
3880 free(mic_proxy_fs_root);
3881 mic_proxy_fs_root = 0;
3882 }
3883
3884 if (mic_library_path != 0) {
3885 free(mic_library_path);
3886 mic_library_path = 0;
3887 }
3888
3889 // destroy thread key
3890 thread_key_delete(mic_thread_key);
3891 }
3892
3893 // unload COI library
3894 if (COI::is_available) {
3895 COI::fini();
3896 }
3897
3898 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
3899}
3900
3901static void __offload_init_library_once(void)
3902{
3903 COIRESULT res;
3904 uint32_t num_devices;
3905 std::bitset<MIC_ENGINES_MAX> devices;
3906
3907 prefix = report_get_message_str(c_report_host);
3908
3909 // initialize trace
3910 const char *env_var = getenv(htrace_envname);
3911 if (env_var != 0 && *env_var != '\0') {
3912 int64_t new_val;
3913 if (__offload_parse_int_string(env_var, new_val)) {
3914 console_enabled = new_val & 0x0f;
3915 }
3916 }
3917
3918 env_var = getenv(offload_report_envname);
3919 if (env_var != 0 && *env_var != '\0') {
3920 int64_t env_val;
3921 if (__offload_parse_int_string(env_var, env_val)) {
3922 if (env_val == OFFLOAD_REPORT_1 ||
3923 env_val == OFFLOAD_REPORT_2 ||
3924 env_val == OFFLOAD_REPORT_3) {
3925 offload_report_level = env_val;
3926 }
3927 else {
3928 LIBOFFLOAD_ERROR(c_invalid_env_report_value,
3929 offload_report_envname);
3930 }
3931 }
3932 else {
3933 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
3934 offload_report_envname);
3935 }
3936 }
3937 else if (!offload_report_level) {
3938 env_var = getenv(timer_envname);
3939 if (env_var != 0 && *env_var != '\0') {
3940 timer_enabled = atoi(env_var);
3941 }
3942 }
3943
3944 // initialize COI
3945 if (!COI::init()) {
3946 return;
3947 }
3948
3949 // get number of devices installed in the system
3950 res = COI::EngineGetCount(COI_ISA_KNC, &num_devices);
3951 if (res != COI_SUCCESS) {
3952 return;
3953 }
3954
3955 if (num_devices > MIC_ENGINES_MAX) {
3956 num_devices = MIC_ENGINES_MAX;
3957 }
3958
3959 // fill in the list of devices that can be used for offloading
3960 env_var = getenv("OFFLOAD_DEVICES");
3961 if (env_var != 0) {
3962 if (strcasecmp(env_var, "none") != 0) {
3963 // value is composed of comma separated physical device indexes
3964 char *buf = strdup(env_var);
3965 char *str, *ptr;
3966 for (str = strtok_r(buf, ",", &ptr); str != 0;
3967 str = strtok_r(0, ",", &ptr)) {
3968 // convert string to an int
3969 int64_t num;
3970 if (!__offload_parse_int_string(str, num)) {
3971 LIBOFFLOAD_ERROR(c_mic_init5);
3972
3973 // fallback to using all installed devices
3974 devices.reset();
3975 for (int i = 0; i < num_devices; i++) {
3976 devices.set(i);
3977 }
3978 break;
3979 }
3980 if (num < 0 || num >= num_devices) {
3981 LIBOFFLOAD_ERROR(c_mic_init6, num);
3982 continue;
3983 }
3984 devices.set(num);
3985 }
3986 free(buf);
3987 }
3988 }
3989 else {
3990 // use all available devices
3991 for (int i = 0; i < num_devices; i++) {
3992 COIENGINE engine;
3993 res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine);
3994 if (res == COI_SUCCESS) {
3995 devices.set(i);
3996 }
3997 }
3998 }
3999
4000 mic_engines_total = devices.count();
4001
4002 // no need to continue if there are no devices to offload to
4003 if (mic_engines_total <= 0) {
4004 return;
4005 }
4006
4007 // initialize indexes for available devices
4008 mic_engines = new Engine[mic_engines_total];
4009 for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
4010 if (devices[p_idx]) {
4011 mic_engines[l_idx].set_indexes(l_idx, p_idx);
4012 l_idx++;
4013 }
4014 }
4015
4016 // library search path for device binaries
4017 env_var = getenv("MIC_LD_LIBRARY_PATH");
4018 if (env_var != 0) {
4019 mic_library_path = strdup(env_var);
4020 }
4021
4022 // memory size reserved for COI buffers
4023 env_var = getenv("MIC_BUFFERSIZE");
4024 if (env_var != 0) {
4025 uint64_t new_size;
4026 if (__offload_parse_size_string(env_var, new_size)) {
4027 mic_buffer_size = new_size;
4028 }
4029 else {
4030 LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
4031 }
4032 }
4033
4034 // determine stacksize for the pipeline on the device
4035 env_var = getenv("MIC_STACKSIZE");
4036 if (env_var != 0 && *env_var != '\0') {
4037 uint64_t new_size;
4038 if (__offload_parse_size_string(env_var, new_size) &&
4039 (new_size >= 16384) && ((new_size & 4095) == 0)) {
4040 mic_stack_size = new_size;
4041 }
4042 else {
4043 LIBOFFLOAD_ERROR(c_mic_init3);
4044 }
4045 }
4046
4047 // proxy I/O
4048 env_var = getenv("MIC_PROXY_IO");
4049 if (env_var != 0 && *env_var != '\0') {
4050 int64_t new_val;
4051 if (__offload_parse_int_string(env_var, new_val)) {
4052 mic_proxy_io = new_val;
4053 }
4054 else {
4055 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
4056 }
4057 }
4058 env_var = getenv("MIC_PROXY_FS_ROOT");
4059 if (env_var != 0 && *env_var != '\0') {
4060 mic_proxy_fs_root = strdup(env_var);
4061 }
4062
4063 // Prepare environment for the target process using the following
4064 // rules
4065 // - If MIC_ENV_PREFIX is set then any environment variable on the
4066 // host which has that prefix are copied to the device without
4067 // the prefix.
4068 // All other host environment variables are ignored.
4069 // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host
4070 // environment is duplicated.
4071 env_var = getenv("MIC_ENV_PREFIX");
4072 if (env_var != 0 && *env_var != '\0') {
4073 mic_env_vars.set_prefix(env_var);
4074
4075 int len = strlen(env_var);
4076 for (int i = 0; environ[i] != 0; i++) {
4077 if (strncmp(environ[i], env_var, len) == 0 &&
4078 strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 &&
4079 environ[i][len] != '=') {
4080 mic_env_vars.analyze_env_var(environ[i]);
4081 }
4082 }
4083 }
4084
4085 // create key for thread data
4086 if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
4087 LIBOFFLOAD_ERROR(c_mic_init4, errno);
4088 return;
4089 }
4090
4091 // cpu frequency
4092 cpu_frequency = COI::PerfGetCycleFrequency();
4093
4094 env_var = getenv(mic_use_2mb_buffers_envname);
4095 if (env_var != 0 && *env_var != '\0') {
4096 uint64_t new_size;
4097 if (__offload_parse_size_string(env_var, new_size)) {
4098 __offload_use_2mb_buffers = new_size;
4099 }
4100 else {
4101 LIBOFFLOAD_ERROR(c_invalid_env_var_value,
4102 mic_use_2mb_buffers_envname);
4103 }
4104 }
4105
4106 env_var = getenv(mic_use_async_buffer_write_envname);
4107 if (env_var != 0 && *env_var != '\0') {
4108 uint64_t new_size;
4109 if (__offload_parse_size_string(env_var, new_size)) {
4110 __offload_use_async_buffer_write = new_size;
4111 }
4112 }
4113
4114 env_var = getenv(mic_use_async_buffer_read_envname);
4115 if (env_var != 0 && *env_var != '\0') {
4116 uint64_t new_size;
4117 if (__offload_parse_size_string(env_var, new_size)) {
4118 __offload_use_async_buffer_read = new_size;
4119 }
4120 }
4121
4122 // mic initialization type
4123 env_var = getenv(offload_init_envname);
4124 if (env_var != 0 && *env_var != '\0') {
4125 if (strcmp(env_var, "on_offload") == 0) {
4126 __offload_init_type = c_init_on_offload;
4127 }
4128 else if (strcmp(env_var, "on_offload_all") == 0) {
4129 __offload_init_type = c_init_on_offload_all;
4130 }
4131#ifndef TARGET_WINNT
4132 else if (strcmp(env_var, "on_start") == 0) {
4133 __offload_init_type = c_init_on_start;
4134 }
4135#endif // TARGET_WINNT
4136 else {
4137 LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
4138 }
4139 }
4140
4141 // active wait
4142 env_var = getenv(offload_active_wait_envname);
4143 if (env_var != 0 && *env_var != '\0') {
4144 int64_t new_val;
4145 if (__offload_parse_int_string(env_var, new_val)) {
4146 __offload_active_wait = new_val;
4147 }
4148 else {
4149 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
4150 offload_active_wait_envname);
4151 }
4152 }
4153
4154 // omp device num
4155 env_var = getenv(omp_device_num_envname);
4156 if (env_var != 0 && *env_var != '\0') {
4157 int64_t new_val;
4158 if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
4159 __omp_device_num = new_val;
4160 }
4161 else {
4162 LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
4163 omp_device_num_envname);
4164 }
4165 }
4166
4167 // init ORSL
4168 ORSL::init();
4169}
4170
4171extern int __offload_init_library(void)
4172{
4173 // do one time intialization
4174 static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
4175 __offload_run_once(&ctrl, __offload_init_library_once);
4176
4177 // offload is available if COI is available and the number of devices > 0
4178 bool is_available = COI::is_available && (mic_engines_total > 0);
4179
4180 // register pending libraries if there are any
4181 if (is_available && __target_libs) {
4182 mutex_locker_t locker(__target_libs_lock);
4183
4184 for (TargetImageList::iterator it = __target_libs_list.begin();
4185 it != __target_libs_list.end(); it++) {
4186 // Register library in COI
4187 COI::ProcessRegisterLibraries(1, &it->data, &it->size,
4188 &it->origin, &it->offset);
4189
4190 // add lib to all engines
4191 for (int i = 0; i < mic_engines_total; i++) {
4192 mic_engines[i].add_lib(*it);
4193 }
4194 }
4195
4196 __target_libs = false;
4197 __target_libs_list.clear();
4198 }
4199
4200 return is_available;
4201}
4202
4203extern "C" void __offload_register_image(const void *target_image)
4204{
4205 const struct Image *image = static_cast<const struct Image*>(target_image);
4206
4207 // decode image
4208 const char *name = image->data;
4209 const void *data = image->data + strlen(image->data) + 1;
4210 uint64_t size = image->size;
4211 const char *origin = 0;
4212 uint64_t offset = 0;
4213
4214 // our actions depend on the image type
4215 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
4216 switch (hdr->e_type) {
4217 case ET_EXEC:
4218 // Each offload application is supposed to have only one target
4219 // image representing target executable.
4220 // No thread synchronization is required here as the initialization
4221 // code is always executed in a single thread.
4222 if (__target_exe != 0) {
4223 LIBOFFLOAD_ERROR(c_multiple_target_exes);
4224 exit(1);
4225 }
4226 __target_exe = new TargetImage(name, data, size, origin, offset);
4227
4228 // Registration code for execs is always called from the context
4229 // of main and thus we can safely call any function here,
4230 // including LoadLibrary API on windows. This is the place where
4231 // we do the offload library initialization.
4232 if (__offload_init_library()) {
4233 // initialize engine if init_type is on_start
4234 if (__offload_init_type == c_init_on_start) {
4235 for (int i = 0; i < mic_engines_total; i++) {
4236 mic_engines[i].init();
4237 }
4238 }
4239 }
4240 break;
4241
4242 case ET_DYN:
4243 // Registration code for libraries is called from the DllMain
Alp Tokerc2d5e612014-06-01 18:28:36 +00004244 // context (on windows) and thus we cannot do anything useful
Jim Cownie33f7b242014-04-09 15:40:23 +00004245 // here. So we just add it to the list of pending libraries for
4246 // the later use.
4247 __target_libs_lock.lock();
4248 __target_libs = true;
4249 __target_libs_list.push_back(TargetImage(name, data, size,
4250 origin, offset));
4251 __target_libs_lock.unlock();
4252 break;
4253
4254 default:
4255 // something is definitely wrong, issue an error and exit
4256 LIBOFFLOAD_ERROR(c_unknown_binary_type);
4257 exit(1);
4258 }
4259}
4260
4261extern "C" void __offload_unregister_image(const void *target_image)
4262{
4263 // Target image is packed as follows:
4264 // 8 bytes - size of the target binary
4265 // null-terminated string - binary name
4266 // <size> bytes - binary contents
4267 const struct Image {
4268 int64_t size;
4269 char data[];
4270 } *image = static_cast<const struct Image*>(target_image);
4271
4272 // decode image
4273 const char *name = image->data;
4274 const void *data = image->data + strlen(image->data) + 1;
4275
4276 // our actions depend on the image type
4277 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
4278 if (hdr->e_type == ET_EXEC) {
4279 // We are executing exec's desctructors.
4280 // It is time to do a library cleanup.
4281 if (timer_enabled) {
4282 Offload_Timer_Print();
4283 }
4284
4285#ifdef MYO_SUPPORT
4286 __offload_myoFini();
4287#endif // MYO_SUPPORT
4288
4289 __offload_fini_library();
4290 }
4291}
4292
4293// Runtime trace interface for user programs
4294
4295void __offload_console_trace(int level)
4296{
4297 console_enabled = level;
4298}
4299
4300// User-visible offload API
4301
4302int _Offload_number_of_devices(void)
4303{
4304 __offload_init_library();
4305 return mic_engines_total;
4306}
4307
4308int _Offload_get_device_number(void)
4309{
4310 return -1;
4311}
4312
4313int _Offload_get_physical_device_number(void)
4314{
4315 return -1;
4316}
4317
4318int _Offload_signaled(int index, void *signal)
4319{
4320 __offload_init_library();
4321
4322 // check index value
4323 if (index < 0 || mic_engines_total <= 0) {
4324 LIBOFFLOAD_ERROR(c_offload_signaled1, index);
4325 LIBOFFLOAD_ABORT;
4326 }
4327
4328 // find associated async task
4329 OffloadDescriptor *task =
4330 mic_engines[index % mic_engines_total].find_signal(signal, false);
4331 if (task == 0) {
4332 LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
4333 LIBOFFLOAD_ABORT;
4334 }
4335
4336 return task->is_signaled();
4337}
4338
4339void _Offload_report(int val)
4340{
4341 if (val == OFFLOAD_REPORT_ON ||
4342 val == OFFLOAD_REPORT_OFF) {
4343 offload_report_enabled = val;
4344 }
4345}
4346
4347// IDB support
4348int __dbg_is_attached = 0;
4349int __dbg_target_id = -1;
4350pid_t __dbg_target_so_pid = -1;
4351char __dbg_target_exe_name[MAX_TARGET_NAME] = {0};
4352const int __dbg_api_major_version = 1;
4353const int __dbg_api_minor_version = 0;
4354
4355void __dbg_target_so_loaded()
4356{
4357}
4358void __dbg_target_so_unloaded()
4359{
4360}