blob: 67bc7c1e4bb923c43344aa9c2c19a590308d0031 [file] [log] [blame]
George Rokos2467df62017-01-25 21:27:24 +00001//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the interface to be used by Clang during the codegen of a
11// target region.
12//
13//===----------------------------------------------------------------------===//
14
Jonas Hahnfeld43322802017-12-06 21:59:07 +000015#include <omptarget.h>
16
17#include "device.h"
18#include "private.h"
19#include "rtl.h"
20
George Rokos2467df62017-01-25 21:27:24 +000021#include <cassert>
George Rokos2467df62017-01-25 21:27:24 +000022#include <vector>
23
Sergey Dmitrievb305d262017-08-14 15:09:59 +000024#ifdef OMPTARGET_DEBUG
Jonas Hahnfeld43322802017-12-06 21:59:07 +000025int DebugLevel = 0;
Sergey Dmitrievb305d262017-08-14 15:09:59 +000026#endif // OMPTARGET_DEBUG
27
George Rokos2467df62017-01-25 21:27:24 +000028////////////////////////////////////////////////////////////////////////////////
George Rokos2467df62017-01-25 21:27:24 +000029/// adds a target shared library to the target execution image
30EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
Jonas Hahnfeld43322802017-12-06 21:59:07 +000031 RTLs.RegisterLib(desc);
George Rokos2467df62017-01-25 21:27:24 +000032}
33
34////////////////////////////////////////////////////////////////////////////////
35/// unloads a target shared library
36EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
Jonas Hahnfeld43322802017-12-06 21:59:07 +000037 RTLs.UnregisterLib(desc);
George Rokos2467df62017-01-25 21:27:24 +000038}
39
40/// Map global data and execute pending ctors
41static int InitLibrary(DeviceTy& Device) {
42 /*
43 * Map global data
44 */
45 int32_t device_id = Device.DeviceID;
46 int rc = OFFLOAD_SUCCESS;
47
48 Device.PendingGlobalsMtx.lock();
49 TrlTblMtx.lock();
50 for (HostEntriesBeginToTransTableTy::iterator
51 ii = HostEntriesBeginToTransTable.begin();
52 ii != HostEntriesBeginToTransTable.end(); ++ii) {
53 TranslationTable *TransTable = &ii->second;
54 if (TransTable->TargetsTable[device_id] != 0) {
55 // Library entries have already been processed
56 continue;
57 }
58
59 // 1) get image.
60 assert(TransTable->TargetsImages.size() > (size_t)device_id &&
61 "Not expecting a device ID outside the table's bounds!");
62 __tgt_device_image *img = TransTable->TargetsImages[device_id];
63 if (!img) {
64 DP("No image loaded for device id %d.\n", device_id);
65 rc = OFFLOAD_FAIL;
66 break;
67 }
68 // 2) load image into the target table.
69 __tgt_target_table *TargetTable =
70 TransTable->TargetsTable[device_id] = Device.load_binary(img);
71 // Unable to get table for this image: invalidate image and fail.
72 if (!TargetTable) {
73 DP("Unable to generate entries table for device id %d.\n", device_id);
74 TransTable->TargetsImages[device_id] = 0;
75 rc = OFFLOAD_FAIL;
76 break;
77 }
78
79 // Verify whether the two table sizes match.
80 size_t hsize =
81 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
82 size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
83
84 // Invalid image for these host entries!
85 if (hsize != tsize) {
86 DP("Host and Target tables mismatch for device id %d [%zx != %zx].\n",
87 device_id, hsize, tsize);
88 TransTable->TargetsImages[device_id] = 0;
89 TransTable->TargetsTable[device_id] = 0;
90 rc = OFFLOAD_FAIL;
91 break;
92 }
93
94 // process global data that needs to be mapped.
George Rokosd57681b2017-04-22 11:45:03 +000095 Device.DataMapMtx.lock();
George Rokos2467df62017-01-25 21:27:24 +000096 __tgt_target_table *HostTable = &TransTable->HostTable;
97 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
98 *CurrHostEntry = HostTable->EntriesBegin,
99 *EntryDeviceEnd = TargetTable->EntriesEnd;
100 CurrDeviceEntry != EntryDeviceEnd;
101 CurrDeviceEntry++, CurrHostEntry++) {
102 if (CurrDeviceEntry->size != 0) {
103 // has data.
104 assert(CurrDeviceEntry->size == CurrHostEntry->size &&
105 "data size mismatch");
George Rokosba7380b2017-03-22 16:43:40 +0000106
107 // Fortran may use multiple weak declarations for the same symbol,
108 // therefore we must allow for multiple weak symbols to be loaded from
109 // the fat binary. Treat these mappings as any other "regular" mapping.
110 // Add entry to map.
George Rokosd57681b2017-04-22 11:45:03 +0000111 if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size))
112 continue;
George Rokos2467df62017-01-25 21:27:24 +0000113 DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
114 "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
115 CurrDeviceEntry->size);
George Rokosd57681b2017-04-22 11:45:03 +0000116 Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
117 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
118 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
119 (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
120 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
121 INF_REF_CNT /*RefCount*/));
George Rokos2467df62017-01-25 21:27:24 +0000122 }
123 }
George Rokosd57681b2017-04-22 11:45:03 +0000124 Device.DataMapMtx.unlock();
George Rokos2467df62017-01-25 21:27:24 +0000125 }
126 TrlTblMtx.unlock();
127
128 if (rc != OFFLOAD_SUCCESS) {
129 Device.PendingGlobalsMtx.unlock();
130 return rc;
131 }
132
133 /*
134 * Run ctors for static objects
135 */
136 if (!Device.PendingCtorsDtors.empty()) {
137 // Call all ctors for all libraries registered so far
138 for (auto &lib : Device.PendingCtorsDtors) {
139 if (!lib.second.PendingCtors.empty()) {
140 DP("Has pending ctors... call now\n");
141 for (auto &entry : lib.second.PendingCtors) {
142 void *ctor = entry;
143 int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
144 NULL, 1, 1, true /*team*/);
145 if (rc != OFFLOAD_SUCCESS) {
146 DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
147 Device.PendingGlobalsMtx.unlock();
148 return OFFLOAD_FAIL;
149 }
150 }
151 // Clear the list to indicate that this device has been used
152 lib.second.PendingCtors.clear();
153 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first));
154 }
155 }
156 }
157 Device.HasPendingGlobals = false;
158 Device.PendingGlobalsMtx.unlock();
159
160 return OFFLOAD_SUCCESS;
161}
162
163// Check whether a device has been initialized, global ctors have been
164// executed and global data has been mapped; do so if not already done.
165static int CheckDevice(int32_t device_id) {
166 // Is device ready?
167 if (!device_is_ready(device_id)) {
168 DP("Device %d is not ready.\n", device_id);
169 return OFFLOAD_FAIL;
170 }
171
172 // Get device info.
173 DeviceTy &Device = Devices[device_id];
174
175 // Check whether global data has been mapped for this device
176 Device.PendingGlobalsMtx.lock();
177 bool hasPendingGlobals = Device.HasPendingGlobals;
178 Device.PendingGlobalsMtx.unlock();
179 if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) {
180 DP("Failed to init globals on device %d\n", device_id);
181 return OFFLOAD_FAIL;
182 }
183
184 return OFFLOAD_SUCCESS;
185}
186
187// Following datatypes and functions (tgt_oldmap_type, combined_entry_t,
188// translate_map, cleanup_map) will be removed once the compiler starts using
189// the new map types.
190
191// Old map types
192enum tgt_oldmap_type {
193 OMP_TGT_OLDMAPTYPE_TO = 0x001, // copy data from host to device
194 OMP_TGT_OLDMAPTYPE_FROM = 0x002, // copy data from device to host
195 OMP_TGT_OLDMAPTYPE_ALWAYS = 0x004, // copy regardless of the ref. count
196 OMP_TGT_OLDMAPTYPE_DELETE = 0x008, // force unmapping of data
197 OMP_TGT_OLDMAPTYPE_MAP_PTR = 0x010, // map pointer as well as pointee
198 OMP_TGT_OLDMAPTYPE_FIRST_MAP = 0x020, // first occurrence of mapped variable
199 OMP_TGT_OLDMAPTYPE_RETURN_PTR = 0x040, // return TgtBase addr of mapped data
200 OMP_TGT_OLDMAPTYPE_PRIVATE_PTR = 0x080, // private variable - not mapped
201 OMP_TGT_OLDMAPTYPE_PRIVATE_VAL = 0x100 // copy by value - not mapped
202};
203
204// Temporary functions for map translation and cleanup
205struct combined_entry_t {
206 int num_members; // number of members in combined entry
207 void *base_addr; // base address of combined entry
208 void *begin_addr; // begin address of combined entry
209 void *end_addr; // size of combined entry
210};
211
212static void translate_map(int32_t arg_num, void **args_base, void **args,
George Rokosb92dbb42017-11-21 18:26:41 +0000213 int64_t *arg_sizes, int64_t *arg_types, int32_t &new_arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000214 void **&new_args_base, void **&new_args, int64_t *&new_arg_sizes,
215 int64_t *&new_arg_types, bool is_target_construct) {
216 if (arg_num <= 0) {
217 DP("Nothing to translate\n");
218 new_arg_num = 0;
219 return;
220 }
221
222 // array of combined entries
223 combined_entry_t *cmb_entries =
224 (combined_entry_t *) alloca(arg_num * sizeof(combined_entry_t));
225 // number of combined entries
226 long num_combined = 0;
227 // old entry is MAP_PTR?
228 bool *is_ptr_old = (bool *) alloca(arg_num * sizeof(bool));
229 // old entry is member of member_of[old] cmb_entry
230 int *member_of = (int *) alloca(arg_num * sizeof(int));
George Rokos15a6e7d2017-02-15 20:45:37 +0000231 // temporary storage for modifications of the original arg_types
George Rokosb92dbb42017-11-21 18:26:41 +0000232 int64_t *mod_arg_types = (int64_t *) alloca(arg_num *sizeof(int64_t));
George Rokos2467df62017-01-25 21:27:24 +0000233
234 DP("Translating %d map entries\n", arg_num);
235 for (int i = 0; i < arg_num; ++i) {
236 member_of[i] = -1;
237 is_ptr_old[i] = false;
George Rokos15a6e7d2017-02-15 20:45:37 +0000238 mod_arg_types[i] = arg_types[i];
George Rokos2467df62017-01-25 21:27:24 +0000239 // Scan previous entries to see whether this entry shares the same base
240 for (int j = 0; j < i; ++j) {
241 void *new_begin_addr = NULL;
242 void *new_end_addr = NULL;
243
George Rokos15a6e7d2017-02-15 20:45:37 +0000244 if (mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
George Rokos2467df62017-01-25 21:27:24 +0000245 if (args_base[i] == args[j]) {
George Rokos15a6e7d2017-02-15 20:45:37 +0000246 if (!(mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR)) {
George Rokos2467df62017-01-25 21:27:24 +0000247 DP("Entry %d has the same base as entry %d's begin address\n", i,
248 j);
249 new_begin_addr = args_base[i];
250 new_end_addr = (char *)args_base[i] + sizeof(void *);
251 assert(arg_sizes[j] == sizeof(void *));
252 is_ptr_old[j] = true;
253 } else {
254 DP("Entry %d has the same base as entry %d's begin address, but "
255 "%d's base was a MAP_PTR too\n", i, j, j);
George Rokos15a6e7d2017-02-15 20:45:37 +0000256 int32_t to_from_always_delete =
257 OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM |
258 OMP_TGT_OLDMAPTYPE_ALWAYS | OMP_TGT_OLDMAPTYPE_DELETE;
259 if (mod_arg_types[j] & to_from_always_delete) {
260 DP("Resetting to/from/always/delete flags for entry %d because "
261 "it is only a pointer to pointer\n", j);
262 mod_arg_types[j] &= ~to_from_always_delete;
263 }
George Rokos2467df62017-01-25 21:27:24 +0000264 }
265 }
266 } else {
George Rokos15a6e7d2017-02-15 20:45:37 +0000267 if (!(mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_FIRST_MAP) &&
George Rokos2467df62017-01-25 21:27:24 +0000268 args_base[i] == args_base[j]) {
269 DP("Entry %d has the same base address as entry %d\n", i, j);
270 new_begin_addr = args[i];
271 new_end_addr = (char *)args[i] + arg_sizes[i];
272 }
273 }
274
275 // If we have combined the entry with a previous one
276 if (new_begin_addr) {
277 int id;
278 if(member_of[j] == -1) {
279 // We have a new entry
280 id = num_combined++;
281 DP("Creating new combined entry %d for old entry %d\n", id, j);
282 // Initialize new entry
283 cmb_entries[id].num_members = 1;
284 cmb_entries[id].base_addr = args_base[j];
George Rokos15a6e7d2017-02-15 20:45:37 +0000285 if (mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR) {
George Rokos2467df62017-01-25 21:27:24 +0000286 cmb_entries[id].begin_addr = args_base[j];
287 cmb_entries[id].end_addr = (char *)args_base[j] + arg_sizes[j];
288 } else {
289 cmb_entries[id].begin_addr = args[j];
290 cmb_entries[id].end_addr = (char *)args[j] + arg_sizes[j];
291 }
292 member_of[j] = id;
293 } else {
294 // Reuse existing combined entry
295 DP("Reusing existing combined entry %d\n", member_of[j]);
296 id = member_of[j];
297 }
298
299 // Update combined entry
300 DP("Adding entry %d to combined entry %d\n", i, id);
301 cmb_entries[id].num_members++;
302 // base_addr stays the same
303 cmb_entries[id].begin_addr =
304 std::min(cmb_entries[id].begin_addr, new_begin_addr);
305 cmb_entries[id].end_addr =
306 std::max(cmb_entries[id].end_addr, new_end_addr);
307 member_of[i] = id;
308 break;
309 }
310 }
311 }
312
313 DP("New entries: %ld combined + %d original\n", num_combined, arg_num);
314 new_arg_num = arg_num + num_combined;
315 new_args_base = (void **) malloc(new_arg_num * sizeof(void *));
316 new_args = (void **) malloc(new_arg_num * sizeof(void *));
317 new_arg_sizes = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
318 new_arg_types = (int64_t *) malloc(new_arg_num * sizeof(int64_t));
319
320 const int64_t alignment = 8;
321
322 int next_id = 0; // next ID
323 int next_cid = 0; // next combined ID
324 int *combined_to_new_id = (int *) alloca(num_combined * sizeof(int));
325 for (int i = 0; i < arg_num; ++i) {
326 // It is member_of
327 if (member_of[i] == next_cid) {
328 int cid = next_cid++; // ID of this combined entry
329 int nid = next_id++; // ID of the new (global) entry
330 combined_to_new_id[cid] = nid;
331 DP("Combined entry %3d will become new entry %3d\n", cid, nid);
332
333 int64_t padding = (int64_t)cmb_entries[cid].begin_addr % alignment;
334 if (padding) {
335 DP("Using a padding of %" PRId64 " for begin address " DPxMOD "\n",
336 padding, DPxPTR(cmb_entries[cid].begin_addr));
337 cmb_entries[cid].begin_addr =
338 (char *)cmb_entries[cid].begin_addr - padding;
339 }
340
341 new_args_base[nid] = cmb_entries[cid].base_addr;
342 new_args[nid] = cmb_entries[cid].begin_addr;
343 new_arg_sizes[nid] = (int64_t) ((char *)cmb_entries[cid].end_addr -
344 (char *)cmb_entries[cid].begin_addr);
345 new_arg_types[nid] = OMP_TGT_MAPTYPE_TARGET_PARAM;
346 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", "
347 "size %" PRId64 ", type 0x%" PRIx64 "\n", nid,
348 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
349 new_arg_types[nid]);
350 } else if (member_of[i] != -1) {
351 DP("Combined entry %3d has been encountered before, do nothing\n",
352 member_of[i]);
353 }
354
355 // Now that the combined entry (the one the old entry was a member of) has
356 // been inserted into the new arguments list, proceed with the old entry.
357 int nid = next_id++;
358 DP("Old entry %3d will become new entry %3d\n", i, nid);
359
360 new_args_base[nid] = args_base[i];
361 new_args[nid] = args[i];
362 new_arg_sizes[nid] = arg_sizes[i];
George Rokos15a6e7d2017-02-15 20:45:37 +0000363 int64_t old_type = mod_arg_types[i];
George Rokos2467df62017-01-25 21:27:24 +0000364
365 if (is_ptr_old[i]) {
366 // Reset TO and FROM flags
367 old_type &= ~(OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM);
368 }
369
370 if (member_of[i] == -1) {
371 if (!is_target_construct)
372 old_type &= ~OMP_TGT_MAPTYPE_TARGET_PARAM;
373 new_arg_types[nid] = old_type;
374 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
375 ", type 0x%" PRIx64 " (old entry %d not MEMBER_OF)\n", nid,
376 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
377 new_arg_types[nid], i);
378 } else {
379 // Old entry is not FIRST_MAP
380 old_type &= ~OMP_TGT_OLDMAPTYPE_FIRST_MAP;
381 // Add MEMBER_OF
382 int new_member_of = combined_to_new_id[member_of[i]];
383 old_type |= ((int64_t)new_member_of + 1) << 48;
384 new_arg_types[nid] = old_type;
385 DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64
386 ", type 0x%" PRIx64 " (old entry %d MEMBER_OF %d)\n", nid,
387 DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid],
388 new_arg_types[nid], i, new_member_of);
389 }
390 }
391}
392
393static void cleanup_map(int32_t new_arg_num, void **new_args_base,
394 void **new_args, int64_t *new_arg_sizes, int64_t *new_arg_types,
395 int32_t arg_num, void **args_base) {
396 if (new_arg_num > 0) {
397 int offset = new_arg_num - arg_num;
398 for (int32_t i = 0; i < arg_num; ++i) {
399 // Restore old base address
400 args_base[i] = new_args_base[i+offset];
401 }
402 free(new_args_base);
403 free(new_args);
404 free(new_arg_sizes);
405 free(new_arg_types);
406 }
407}
408
409static short member_of(int64_t type) {
410 return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
411}
412
413/// Internal function to do the mapping and transfer the data to the device
414static int target_data_begin(DeviceTy &Device, int32_t arg_num,
415 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
416 // process each input.
417 int rc = OFFLOAD_SUCCESS;
418 for (int32_t i = 0; i < arg_num; ++i) {
419 // Ignore private variables and arrays - there is no mapping for them.
420 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
421 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
422 continue;
423
424 void *HstPtrBegin = args[i];
425 void *HstPtrBase = args_base[i];
426 // Address of pointer on the host and device, respectively.
427 void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
428 bool IsNew, Pointer_IsNew;
429 bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
430 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
431 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
432 DP("Has a pointer entry: \n");
433 // base is address of pointer.
434 Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
435 sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
436 if (!Pointer_TgtPtrBegin) {
437 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
438 "illegal mapping).\n");
439 }
440 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
441 "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
442 (Pointer_IsNew ? "" : " not"));
443 Pointer_HstPtrBegin = HstPtrBase;
444 // modify current entry.
445 HstPtrBase = *(void **)HstPtrBase;
446 UpdateRef = true; // subsequently update ref count of pointee
447 }
448
449 void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
450 arg_sizes[i], IsNew, IsImplicit, UpdateRef);
451 if (!TgtPtrBegin && arg_sizes[i]) {
452 // If arg_sizes[i]==0, then the argument is a pointer to NULL, so
453 // getOrAlloc() returning NULL is not an error.
454 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
455 "illegal mapping).\n");
456 }
457 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
458 " - is%s new\n", arg_sizes[i], DPxPTR(TgtPtrBegin),
459 (IsNew ? "" : " not"));
460
461 if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
462 void *ret_ptr;
463 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
464 ret_ptr = Pointer_TgtPtrBegin;
465 else {
466 bool IsLast; // not used
467 ret_ptr = Device.getTgtPtrBegin(HstPtrBegin, 0, IsLast, false);
468 }
469
470 DP("Returning device pointer " DPxMOD "\n", DPxPTR(ret_ptr));
471 args_base[i] = ret_ptr;
472 }
473
474 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
475 bool copy = false;
476 if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
477 copy = true;
478 } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
479 // Copy data only if the "parent" struct has RefCount==1.
480 short parent_idx = member_of(arg_types[i]);
481 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
482 assert(parent_rc > 0 && "parent struct not found");
483 if (parent_rc == 1) {
484 copy = true;
485 }
486 }
487
488 if (copy) {
489 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
490 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
491 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
492 if (rt != OFFLOAD_SUCCESS) {
493 DP("Copying data to device failed.\n");
494 rc = OFFLOAD_FAIL;
495 }
496 }
497 }
498
499 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
500 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
501 DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
502 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
503 void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
504 int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase,
505 sizeof(void *));
506 if (rt != OFFLOAD_SUCCESS) {
507 DP("Copying data to device failed.\n");
508 rc = OFFLOAD_FAIL;
509 }
510 // create shadow pointers for this entry
511 Device.ShadowMtx.lock();
512 Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase,
513 Pointer_TgtPtrBegin, TgtPtrBase};
514 Device.ShadowMtx.unlock();
515 }
516 }
517
518 return rc;
519}
520
George Rokosb92dbb42017-11-21 18:26:41 +0000521EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
522 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
George Rokos2467df62017-01-25 21:27:24 +0000523 int32_t depNum, void *depList, int32_t noAliasDepNum,
524 void *noAliasDepList) {
525 if (depNum + noAliasDepNum > 0)
526 __kmpc_omp_taskwait(NULL, 0);
527
528 __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
529 arg_types);
530}
531
532/// creates host-to-target data mapping, stores it in the
533/// libomptarget.so internal structure (an entry in a stack of data maps)
534/// and passes the data to the device.
George Rokosb92dbb42017-11-21 18:26:41 +0000535EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
536 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
537 DP("Entering data begin region for device %ld with %d mappings\n", device_id,
George Rokos2467df62017-01-25 21:27:24 +0000538 arg_num);
539
540 // No devices available?
541 if (device_id == OFFLOAD_DEVICE_DEFAULT) {
542 device_id = omp_get_default_device();
George Rokosb92dbb42017-11-21 18:26:41 +0000543 DP("Use default device id %ld\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000544 }
545
546 if (CheckDevice(device_id) != OFFLOAD_SUCCESS) {
George Rokosb92dbb42017-11-21 18:26:41 +0000547 DP("Failed to get device %ld ready\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000548 return;
549 }
550
551 DeviceTy& Device = Devices[device_id];
552
553 // Translate maps
554 int32_t new_arg_num;
555 void **new_args_base;
556 void **new_args;
557 int64_t *new_arg_sizes;
558 int64_t *new_arg_types;
559 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
560 new_args_base, new_args, new_arg_sizes, new_arg_types, false);
561
562 //target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types);
563 target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
564 new_arg_types);
565
566 // Cleanup translation memory
567 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
568 new_arg_types, arg_num, args_base);
569}
570
571/// Internal function to undo the mapping and retrieve the data from the device.
572static int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
573 void **args, int64_t *arg_sizes, int64_t *arg_types) {
574 int rc = OFFLOAD_SUCCESS;
575 // process each input.
576 for (int32_t i = arg_num - 1; i >= 0; --i) {
577 // Ignore private variables and arrays - there is no mapping for them.
578 // Also, ignore the use_device_ptr directive, it has no effect here.
579 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
580 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
581 continue;
582
583 void *HstPtrBegin = args[i];
584 bool IsLast;
585 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
586 (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
587 bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
588
589 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
590 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
591 UpdateRef);
592 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
593 " - is%s last\n", arg_sizes[i], DPxPTR(TgtPtrBegin),
594 (IsLast ? "" : " not"));
595
George Rokos15a6e7d2017-02-15 20:45:37 +0000596 bool DelEntry = IsLast || ForceDelete;
597
George Rokos2467df62017-01-25 21:27:24 +0000598 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
599 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
George Rokos15a6e7d2017-02-15 20:45:37 +0000600 DelEntry = false; // protect parent struct from being deallocated
George Rokos2467df62017-01-25 21:27:24 +0000601 }
602
George Rokos2467df62017-01-25 21:27:24 +0000603 if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
604 // Move data back to the host
605 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
606 bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
607 bool CopyMember = false;
608 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
609 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
610 // Copy data only if the "parent" struct has RefCount==1.
611 short parent_idx = member_of(arg_types[i]);
612 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
613 assert(parent_rc > 0 && "parent struct not found");
614 if (parent_rc == 1) {
615 CopyMember = true;
616 }
617 }
618
619 if (DelEntry || Always || CopyMember) {
620 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
621 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
622 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, arg_sizes[i]);
623 if (rt != OFFLOAD_SUCCESS) {
624 DP("Copying data from device failed.\n");
625 rc = OFFLOAD_FAIL;
626 }
627 }
628 }
629
630 // If we copied back to the host a struct/array containing pointers, we
631 // need to restore the original host pointer values from their shadow
632 // copies. If the struct is going to be deallocated, remove any remaining
633 // shadow pointer entries for this struct.
634 uintptr_t lb = (uintptr_t) HstPtrBegin;
635 uintptr_t ub = (uintptr_t) HstPtrBegin + arg_sizes[i];
636 Device.ShadowMtx.lock();
637 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
638 it != Device.ShadowPtrMap.end(); ++it) {
639 void **ShadowHstPtrAddr = (void**) it->first;
640
641 // An STL map is sorted on its keys; use this property
642 // to quickly determine when to break out of the loop.
643 if ((uintptr_t) ShadowHstPtrAddr < lb)
644 continue;
645 if ((uintptr_t) ShadowHstPtrAddr >= ub)
646 break;
647
648 // If we copied the struct to the host, we need to restore the pointer.
649 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
650 DP("Restoring original host pointer value " DPxMOD " for host "
651 "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
652 DPxPTR(ShadowHstPtrAddr));
653 *ShadowHstPtrAddr = it->second.HstPtrVal;
654 }
655 // If the struct is to be deallocated, remove the shadow entry.
656 if (DelEntry) {
657 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
658 Device.ShadowPtrMap.erase(it);
659 }
660 }
661 Device.ShadowMtx.unlock();
662
663 // Deallocate map
664 if (DelEntry) {
665 int rt = Device.deallocTgtPtr(HstPtrBegin, arg_sizes[i], ForceDelete);
666 if (rt != OFFLOAD_SUCCESS) {
667 DP("Deallocating data from device failed.\n");
668 rc = OFFLOAD_FAIL;
669 }
670 }
671 }
672 }
673
674 return rc;
675}
676
677/// passes data from the target, releases target memory and destroys
678/// the host-target mapping (top entry from the stack of data maps)
679/// created by the last __tgt_target_data_begin.
George Rokosb92dbb42017-11-21 18:26:41 +0000680EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
681 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000682 DP("Entering data end region with %d mappings\n", arg_num);
683
684 // No devices available?
685 if (device_id == OFFLOAD_DEVICE_DEFAULT) {
686 device_id = omp_get_default_device();
687 }
688
689 RTLsMtx.lock();
690 size_t Devices_size = Devices.size();
691 RTLsMtx.unlock();
692 if (Devices_size <= (size_t)device_id) {
George Rokosb92dbb42017-11-21 18:26:41 +0000693 DP("Device ID %ld does not have a matching RTL.\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000694 return;
695 }
696
697 DeviceTy &Device = Devices[device_id];
698 if (!Device.IsInit) {
699 DP("uninit device: ignore");
700 return;
701 }
702
703 // Translate maps
704 int32_t new_arg_num;
705 void **new_args_base;
706 void **new_args;
707 int64_t *new_arg_sizes;
708 int64_t *new_arg_types;
709 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
710 new_args_base, new_args, new_arg_sizes, new_arg_types, false);
711
712 //target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
713 target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes,
714 new_arg_types);
715
716 // Cleanup translation memory
717 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
718 new_arg_types, arg_num, args_base);
719}
720
George Rokosb92dbb42017-11-21 18:26:41 +0000721EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
722 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
George Rokos2467df62017-01-25 21:27:24 +0000723 int32_t depNum, void *depList, int32_t noAliasDepNum,
724 void *noAliasDepList) {
725 if (depNum + noAliasDepNum > 0)
726 __kmpc_omp_taskwait(NULL, 0);
727
728 __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
729 arg_types);
730}
731
732/// passes data to/from the target.
George Rokosb92dbb42017-11-21 18:26:41 +0000733EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
734 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000735 DP("Entering data update with %d mappings\n", arg_num);
736
737 // No devices available?
738 if (device_id == OFFLOAD_DEVICE_DEFAULT) {
739 device_id = omp_get_default_device();
740 }
741
742 if (CheckDevice(device_id) != OFFLOAD_SUCCESS) {
George Rokosb92dbb42017-11-21 18:26:41 +0000743 DP("Failed to get device %ld ready\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000744 return;
745 }
746
747 DeviceTy& Device = Devices[device_id];
748
749 // process each input.
750 for (int32_t i = 0; i < arg_num; ++i) {
751 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
752 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
753 continue;
754
755 void *HstPtrBegin = args[i];
756 int64_t MapSize = arg_sizes[i];
757 bool IsLast;
758 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
759 false);
760
761 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
762 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
763 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
764 Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize);
765
766 uintptr_t lb = (uintptr_t) HstPtrBegin;
767 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
768 Device.ShadowMtx.lock();
769 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
770 it != Device.ShadowPtrMap.end(); ++it) {
771 void **ShadowHstPtrAddr = (void**) it->first;
772 if ((uintptr_t) ShadowHstPtrAddr < lb)
773 continue;
774 if ((uintptr_t) ShadowHstPtrAddr >= ub)
775 break;
776 DP("Restoring original host pointer value " DPxMOD " for host pointer "
777 DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
778 DPxPTR(ShadowHstPtrAddr));
779 *ShadowHstPtrAddr = it->second.HstPtrVal;
780 }
781 Device.ShadowMtx.unlock();
782 }
783
784 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
785 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
786 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
787 Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize);
788
789 uintptr_t lb = (uintptr_t) HstPtrBegin;
790 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
791 Device.ShadowMtx.lock();
792 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
793 it != Device.ShadowPtrMap.end(); ++it) {
794 void **ShadowHstPtrAddr = (void**) it->first;
795 if ((uintptr_t) ShadowHstPtrAddr < lb)
796 continue;
797 if ((uintptr_t) ShadowHstPtrAddr >= ub)
798 break;
799 DP("Restoring original target pointer value " DPxMOD " for target "
800 "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal),
801 DPxPTR(it->second.TgtPtrAddr));
802 Device.data_submit(it->second.TgtPtrAddr,
803 &it->second.TgtPtrVal, sizeof(void *));
804 }
805 Device.ShadowMtx.unlock();
806 }
807 }
808}
809
810EXTERN void __tgt_target_data_update_nowait(
George Rokosb92dbb42017-11-21 18:26:41 +0000811 int64_t device_id, int32_t arg_num, void **args_base, void **args,
812 int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
George Rokos2467df62017-01-25 21:27:24 +0000813 int32_t noAliasDepNum, void *noAliasDepList) {
814 if (depNum + noAliasDepNum > 0)
815 __kmpc_omp_taskwait(NULL, 0);
816
817 __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
818 arg_types);
819}
820
821/// performs the same actions as data_begin in case arg_num is
822/// non-zero and initiates run of the offloaded region on the target platform;
823/// if arg_num is non-zero after the region execution is done it also
824/// performs the same action as data_update and data_end above. This function
825/// returns 0 if it was able to transfer the execution to a target and an
826/// integer different from zero otherwise.
Jonas Hahnfeld43322802017-12-06 21:59:07 +0000827int target(int64_t device_id, void *host_ptr, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000828 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
829 int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
830 DeviceTy &Device = Devices[device_id];
831
832 // Find the table information in the map or look it up in the translation
833 // tables.
834 TableMap *TM = 0;
835 TblMapMtx.lock();
836 HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr);
837 if (TableMapIt == HostPtrToTableMap.end()) {
838 // We don't have a map. So search all the registered libraries.
839 TrlTblMtx.lock();
840 for (HostEntriesBeginToTransTableTy::iterator
841 ii = HostEntriesBeginToTransTable.begin(),
842 ie = HostEntriesBeginToTransTable.end();
843 !TM && ii != ie; ++ii) {
844 // get the translation table (which contains all the good info).
845 TranslationTable *TransTable = &ii->second;
846 // iterate over all the host table entries to see if we can locate the
847 // host_ptr.
848 __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin;
849 __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd;
850 __tgt_offload_entry *cur = begin;
851 for (uint32_t i = 0; cur < end; ++cur, ++i) {
852 if (cur->addr != host_ptr)
853 continue;
854 // we got a match, now fill the HostPtrToTableMap so that we
855 // may avoid this search next time.
856 TM = &HostPtrToTableMap[host_ptr];
857 TM->Table = TransTable;
858 TM->Index = i;
859 break;
860 }
861 }
862 TrlTblMtx.unlock();
863 } else {
864 TM = &TableMapIt->second;
865 }
866 TblMapMtx.unlock();
867
868 // No map for this host pointer found!
869 if (!TM) {
870 DP("Host ptr " DPxMOD " does not have a matching target pointer.\n",
871 DPxPTR(host_ptr));
872 return OFFLOAD_FAIL;
873 }
874
875 // get target table.
876 TrlTblMtx.lock();
877 assert(TM->Table->TargetsTable.size() > (size_t)device_id &&
878 "Not expecting a device ID outside the table's bounds!");
879 __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id];
880 TrlTblMtx.unlock();
881 assert(TargetTable && "Global data has not been mapped\n");
882
883 // Move data to device.
884 int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
885 arg_types);
886
887 if (rc != OFFLOAD_SUCCESS) {
888 DP("Call to target_data_begin failed, skipping target execution.\n");
889 // Call target_data_end to dealloc whatever target_data_begin allocated
890 // and return OFFLOAD_FAIL.
891 target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
892 return OFFLOAD_FAIL;
893 }
894
895 std::vector<void *> tgt_args;
George Rokos1546d312017-05-10 14:12:36 +0000896 std::vector<ptrdiff_t> tgt_offsets;
George Rokos2467df62017-01-25 21:27:24 +0000897
898 // List of (first-)private arrays allocated for this target region
899 std::vector<void *> fpArrays;
900
901 for (int32_t i = 0; i < arg_num; ++i) {
902 if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
903 // This is not a target parameter, do not push it into tgt_args.
904 continue;
905 }
906 void *HstPtrBegin = args[i];
907 void *HstPtrBase = args_base[i];
George Rokos1546d312017-05-10 14:12:36 +0000908 void *TgtPtrBegin;
909 ptrdiff_t TgtBaseOffset;
George Rokos2467df62017-01-25 21:27:24 +0000910 bool IsLast; // unused.
911 if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
912 DP("Forwarding first-private value " DPxMOD " to the target construct\n",
913 DPxPTR(HstPtrBase));
George Rokos1546d312017-05-10 14:12:36 +0000914 TgtPtrBegin = HstPtrBase;
915 TgtBaseOffset = 0;
George Rokos2467df62017-01-25 21:27:24 +0000916 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
917 // Allocate memory for (first-)private array
George Rokos1546d312017-05-10 14:12:36 +0000918 TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
919 arg_sizes[i], HstPtrBegin);
George Rokos2467df62017-01-25 21:27:24 +0000920 if (!TgtPtrBegin) {
921 DP ("Data allocation for %sprivate array " DPxMOD " failed\n",
922 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
923 DPxPTR(HstPtrBegin));
924 rc = OFFLOAD_FAIL;
925 break;
926 } else {
927 fpArrays.push_back(TgtPtrBegin);
George Rokos1546d312017-05-10 14:12:36 +0000928 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000929#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000930 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000931 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
932 "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
933 arg_sizes[i], DPxPTR(TgtPtrBegin),
934 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
935 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000936#endif
George Rokos2467df62017-01-25 21:27:24 +0000937 // If first-private, copy data from host
938 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
939 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
940 if (rt != OFFLOAD_SUCCESS) {
941 DP ("Copying data to device failed.\n");
942 rc = OFFLOAD_FAIL;
943 break;
944 }
945 }
946 }
947 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
George Rokos1546d312017-05-10 14:12:36 +0000948 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
949 false);
950 TgtBaseOffset = 0; // no offset for ptrs.
George Rokos2467df62017-01-25 21:27:24 +0000951 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
952 "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
953 DPxPTR(HstPtrBase));
954 } else {
George Rokos1546d312017-05-10 14:12:36 +0000955 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
956 false);
957 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000958#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000959 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000960 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
961 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000962#endif
George Rokos2467df62017-01-25 21:27:24 +0000963 }
George Rokos1546d312017-05-10 14:12:36 +0000964 tgt_args.push_back(TgtPtrBegin);
965 tgt_offsets.push_back(TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000966 }
George Rokos1546d312017-05-10 14:12:36 +0000967
968 assert(tgt_args.size() == tgt_offsets.size() &&
969 "Size mismatch in arguments and offsets");
George Rokos2467df62017-01-25 21:27:24 +0000970
971 // Pop loop trip count
972 uint64_t ltc = Device.loopTripCnt;
973 Device.loopTripCnt = 0;
974
975 // Launch device execution.
976 if (rc == OFFLOAD_SUCCESS) {
977 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
978 TargetTable->EntriesBegin[TM->Index].name,
979 DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
980 if (IsTeamConstruct) {
981 rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
George Rokos1546d312017-05-10 14:12:36 +0000982 &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
983 thread_limit, ltc);
George Rokos2467df62017-01-25 21:27:24 +0000984 } else {
985 rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
George Rokos1546d312017-05-10 14:12:36 +0000986 &tgt_args[0], &tgt_offsets[0], tgt_args.size());
George Rokos2467df62017-01-25 21:27:24 +0000987 }
988 } else {
989 DP("Errors occurred while obtaining target arguments, skipping kernel "
990 "execution\n");
991 }
992
993 // Deallocate (first-)private arrays
994 for (auto it : fpArrays) {
995 int rt = Device.RTL->data_delete(Device.RTLDeviceID, it);
996 if (rt != OFFLOAD_SUCCESS) {
997 DP("Deallocation of (first-)private arrays failed.\n");
998 rc = OFFLOAD_FAIL;
999 }
1000 }
1001
1002 // Move data from device.
1003 int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
1004 arg_types);
1005
1006 if (rt != OFFLOAD_SUCCESS) {
1007 DP("Call to target_data_end failed.\n");
1008 rc = OFFLOAD_FAIL;
1009 }
1010
1011 return rc;
1012}
1013
George Rokosb92dbb42017-11-21 18:26:41 +00001014EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
1015 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
1016 DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
George Rokos2467df62017-01-25 21:27:24 +00001017 DPxPTR(host_ptr), device_id);
1018
1019 if (device_id == OFFLOAD_DEVICE_DEFAULT) {
1020 device_id = omp_get_default_device();
1021 }
1022
1023 if (CheckDevice(device_id) != OFFLOAD_SUCCESS) {
George Rokosb92dbb42017-11-21 18:26:41 +00001024 DP("Failed to get device %ld ready\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +00001025 return OFFLOAD_FAIL;
1026 }
1027
1028 // Translate maps
1029 int32_t new_arg_num;
1030 void **new_args_base;
1031 void **new_args;
1032 int64_t *new_arg_sizes;
1033 int64_t *new_arg_types;
1034 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
1035 new_args_base, new_args, new_arg_sizes, new_arg_types, true);
1036
1037 //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
1038 // arg_types, 0, 0, false /*team*/, false /*recursive*/);
1039 int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
1040 new_arg_sizes, new_arg_types, 0, 0, false /*team*/);
1041
1042 // Cleanup translation memory
1043 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
1044 new_arg_types, arg_num, args_base);
1045
1046 return rc;
1047}
1048
George Rokosb92dbb42017-11-21 18:26:41 +00001049EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
George Rokos2467df62017-01-25 21:27:24 +00001050 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
George Rokosb92dbb42017-11-21 18:26:41 +00001051 int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
George Rokos2467df62017-01-25 21:27:24 +00001052 void *noAliasDepList) {
1053 if (depNum + noAliasDepNum > 0)
1054 __kmpc_omp_taskwait(NULL, 0);
1055
1056 return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
1057 arg_types);
1058}
1059
George Rokosb92dbb42017-11-21 18:26:41 +00001060EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
George Rokos2467df62017-01-25 21:27:24 +00001061 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
George Rokosb92dbb42017-11-21 18:26:41 +00001062 int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
1063 DP("Entering target region with entry point " DPxMOD " and device Id %ld\n",
George Rokos2467df62017-01-25 21:27:24 +00001064 DPxPTR(host_ptr), device_id);
1065
1066 if (device_id == OFFLOAD_DEVICE_DEFAULT) {
1067 device_id = omp_get_default_device();
1068 }
1069
1070 if (CheckDevice(device_id) != OFFLOAD_SUCCESS) {
George Rokosb92dbb42017-11-21 18:26:41 +00001071 DP("Failed to get device %ld ready\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +00001072 return OFFLOAD_FAIL;
1073 }
1074
1075 // Translate maps
1076 int32_t new_arg_num;
1077 void **new_args_base;
1078 void **new_args;
1079 int64_t *new_arg_sizes;
1080 int64_t *new_arg_types;
1081 translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num,
1082 new_args_base, new_args, new_arg_sizes, new_arg_types, true);
1083
1084 //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
1085 // arg_types, team_num, thread_limit, true /*team*/,
1086 // false /*recursive*/);
1087 int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args,
1088 new_arg_sizes, new_arg_types, team_num, thread_limit, true /*team*/);
1089
1090 // Cleanup translation memory
1091 cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes,
1092 new_arg_types, arg_num, args_base);
1093
1094 return rc;
1095}
1096
George Rokosb92dbb42017-11-21 18:26:41 +00001097EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
George Rokos2467df62017-01-25 21:27:24 +00001098 int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
George Rokosb92dbb42017-11-21 18:26:41 +00001099 int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
George Rokos2467df62017-01-25 21:27:24 +00001100 void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
1101 if (depNum + noAliasDepNum > 0)
1102 __kmpc_omp_taskwait(NULL, 0);
1103
1104 return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
1105 arg_sizes, arg_types, team_num, thread_limit);
1106}
1107
1108
1109// The trip count mechanism will be revised - this scheme is not thread-safe.
George Rokosb92dbb42017-11-21 18:26:41 +00001110EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
George Rokos2467df62017-01-25 21:27:24 +00001111 uint64_t loop_tripcount) {
1112 if (device_id == OFFLOAD_DEVICE_DEFAULT) {
1113 device_id = omp_get_default_device();
1114 }
1115
1116 if (CheckDevice(device_id) != OFFLOAD_SUCCESS) {
George Rokosb92dbb42017-11-21 18:26:41 +00001117 DP("Failed to get device %ld ready\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +00001118 return;
1119 }
1120
George Rokosb92dbb42017-11-21 18:26:41 +00001121 DP("__kmpc_push_target_tripcount(%ld, %" PRIu64 ")\n", device_id,
George Rokos2467df62017-01-25 21:27:24 +00001122 loop_tripcount);
1123 Devices[device_id].loopTripCnt = loop_tripcount;
1124}
1125