blob: d5574fbb291a9e95d8362831fc7170a4829d58e9 [file] [log] [blame]
George Rokos2467df62017-01-25 21:27:24 +00001//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the interface to be used by Clang during the codegen of a
11// target region.
12//
13//===----------------------------------------------------------------------===//
14
Jonas Hahnfeld43322802017-12-06 21:59:07 +000015#include <omptarget.h>
16
17#include "device.h"
18#include "private.h"
19#include "rtl.h"
20
George Rokos2467df62017-01-25 21:27:24 +000021#include <cassert>
George Rokos2467df62017-01-25 21:27:24 +000022#include <vector>
23
Sergey Dmitrievb305d262017-08-14 15:09:59 +000024#ifdef OMPTARGET_DEBUG
Jonas Hahnfeld43322802017-12-06 21:59:07 +000025int DebugLevel = 0;
Sergey Dmitrievb305d262017-08-14 15:09:59 +000026#endif // OMPTARGET_DEBUG
27
George Rokos2467df62017-01-25 21:27:24 +000028/// Map global data and execute pending ctors
29static int InitLibrary(DeviceTy& Device) {
30 /*
31 * Map global data
32 */
33 int32_t device_id = Device.DeviceID;
34 int rc = OFFLOAD_SUCCESS;
35
36 Device.PendingGlobalsMtx.lock();
37 TrlTblMtx.lock();
38 for (HostEntriesBeginToTransTableTy::iterator
39 ii = HostEntriesBeginToTransTable.begin();
40 ii != HostEntriesBeginToTransTable.end(); ++ii) {
41 TranslationTable *TransTable = &ii->second;
42 if (TransTable->TargetsTable[device_id] != 0) {
43 // Library entries have already been processed
44 continue;
45 }
46
47 // 1) get image.
48 assert(TransTable->TargetsImages.size() > (size_t)device_id &&
49 "Not expecting a device ID outside the table's bounds!");
50 __tgt_device_image *img = TransTable->TargetsImages[device_id];
51 if (!img) {
52 DP("No image loaded for device id %d.\n", device_id);
53 rc = OFFLOAD_FAIL;
54 break;
55 }
56 // 2) load image into the target table.
57 __tgt_target_table *TargetTable =
58 TransTable->TargetsTable[device_id] = Device.load_binary(img);
59 // Unable to get table for this image: invalidate image and fail.
60 if (!TargetTable) {
61 DP("Unable to generate entries table for device id %d.\n", device_id);
62 TransTable->TargetsImages[device_id] = 0;
63 rc = OFFLOAD_FAIL;
64 break;
65 }
66
67 // Verify whether the two table sizes match.
68 size_t hsize =
69 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
70 size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
71
72 // Invalid image for these host entries!
73 if (hsize != tsize) {
74 DP("Host and Target tables mismatch for device id %d [%zx != %zx].\n",
75 device_id, hsize, tsize);
76 TransTable->TargetsImages[device_id] = 0;
77 TransTable->TargetsTable[device_id] = 0;
78 rc = OFFLOAD_FAIL;
79 break;
80 }
81
82 // process global data that needs to be mapped.
George Rokosd57681b2017-04-22 11:45:03 +000083 Device.DataMapMtx.lock();
George Rokos2467df62017-01-25 21:27:24 +000084 __tgt_target_table *HostTable = &TransTable->HostTable;
85 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
86 *CurrHostEntry = HostTable->EntriesBegin,
87 *EntryDeviceEnd = TargetTable->EntriesEnd;
88 CurrDeviceEntry != EntryDeviceEnd;
89 CurrDeviceEntry++, CurrHostEntry++) {
90 if (CurrDeviceEntry->size != 0) {
91 // has data.
92 assert(CurrDeviceEntry->size == CurrHostEntry->size &&
93 "data size mismatch");
George Rokosba7380b2017-03-22 16:43:40 +000094
95 // Fortran may use multiple weak declarations for the same symbol,
96 // therefore we must allow for multiple weak symbols to be loaded from
97 // the fat binary. Treat these mappings as any other "regular" mapping.
98 // Add entry to map.
George Rokosd57681b2017-04-22 11:45:03 +000099 if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size))
100 continue;
George Rokos2467df62017-01-25 21:27:24 +0000101 DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
102 "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
103 CurrDeviceEntry->size);
George Rokosd57681b2017-04-22 11:45:03 +0000104 Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
105 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
106 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
107 (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
108 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
109 INF_REF_CNT /*RefCount*/));
George Rokos2467df62017-01-25 21:27:24 +0000110 }
111 }
George Rokosd57681b2017-04-22 11:45:03 +0000112 Device.DataMapMtx.unlock();
George Rokos2467df62017-01-25 21:27:24 +0000113 }
114 TrlTblMtx.unlock();
115
116 if (rc != OFFLOAD_SUCCESS) {
117 Device.PendingGlobalsMtx.unlock();
118 return rc;
119 }
120
121 /*
122 * Run ctors for static objects
123 */
124 if (!Device.PendingCtorsDtors.empty()) {
125 // Call all ctors for all libraries registered so far
126 for (auto &lib : Device.PendingCtorsDtors) {
127 if (!lib.second.PendingCtors.empty()) {
128 DP("Has pending ctors... call now\n");
129 for (auto &entry : lib.second.PendingCtors) {
130 void *ctor = entry;
131 int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
132 NULL, 1, 1, true /*team*/);
133 if (rc != OFFLOAD_SUCCESS) {
134 DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
135 Device.PendingGlobalsMtx.unlock();
136 return OFFLOAD_FAIL;
137 }
138 }
139 // Clear the list to indicate that this device has been used
140 lib.second.PendingCtors.clear();
141 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first));
142 }
143 }
144 }
145 Device.HasPendingGlobals = false;
146 Device.PendingGlobalsMtx.unlock();
147
148 return OFFLOAD_SUCCESS;
149}
150
151// Check whether a device has been initialized, global ctors have been
152// executed and global data has been mapped; do so if not already done.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000153int CheckDeviceAndCtors(int64_t device_id) {
George Rokos2467df62017-01-25 21:27:24 +0000154 // Is device ready?
155 if (!device_is_ready(device_id)) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000156 DP("Device %" PRId64 " is not ready.\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000157 return OFFLOAD_FAIL;
158 }
159
160 // Get device info.
161 DeviceTy &Device = Devices[device_id];
162
163 // Check whether global data has been mapped for this device
164 Device.PendingGlobalsMtx.lock();
165 bool hasPendingGlobals = Device.HasPendingGlobals;
166 Device.PendingGlobalsMtx.unlock();
167 if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000168 DP("Failed to init globals on device %" PRId64 "\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000169 return OFFLOAD_FAIL;
170 }
171
172 return OFFLOAD_SUCCESS;
173}
174
George Rokos2467df62017-01-25 21:27:24 +0000175static short member_of(int64_t type) {
176 return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
177}
178
179/// Internal function to do the mapping and transfer the data to the device
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000180int target_data_begin(DeviceTy &Device, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000181 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
182 // process each input.
183 int rc = OFFLOAD_SUCCESS;
184 for (int32_t i = 0; i < arg_num; ++i) {
185 // Ignore private variables and arrays - there is no mapping for them.
186 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
187 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
188 continue;
189
190 void *HstPtrBegin = args[i];
191 void *HstPtrBase = args_base[i];
192 // Address of pointer on the host and device, respectively.
193 void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
194 bool IsNew, Pointer_IsNew;
195 bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
196 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
197 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
198 DP("Has a pointer entry: \n");
199 // base is address of pointer.
200 Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
201 sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
202 if (!Pointer_TgtPtrBegin) {
203 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
204 "illegal mapping).\n");
205 }
206 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
207 "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
208 (Pointer_IsNew ? "" : " not"));
209 Pointer_HstPtrBegin = HstPtrBase;
210 // modify current entry.
211 HstPtrBase = *(void **)HstPtrBase;
212 UpdateRef = true; // subsequently update ref count of pointee
213 }
214
215 void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
216 arg_sizes[i], IsNew, IsImplicit, UpdateRef);
217 if (!TgtPtrBegin && arg_sizes[i]) {
218 // If arg_sizes[i]==0, then the argument is a pointer to NULL, so
219 // getOrAlloc() returning NULL is not an error.
220 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
221 "illegal mapping).\n");
222 }
223 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
224 " - is%s new\n", arg_sizes[i], DPxPTR(TgtPtrBegin),
225 (IsNew ? "" : " not"));
226
227 if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
228 void *ret_ptr;
229 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
230 ret_ptr = Pointer_TgtPtrBegin;
231 else {
232 bool IsLast; // not used
233 ret_ptr = Device.getTgtPtrBegin(HstPtrBegin, 0, IsLast, false);
234 }
235
236 DP("Returning device pointer " DPxMOD "\n", DPxPTR(ret_ptr));
237 args_base[i] = ret_ptr;
238 }
239
240 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
241 bool copy = false;
242 if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
243 copy = true;
244 } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
245 // Copy data only if the "parent" struct has RefCount==1.
246 short parent_idx = member_of(arg_types[i]);
247 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
248 assert(parent_rc > 0 && "parent struct not found");
249 if (parent_rc == 1) {
250 copy = true;
251 }
252 }
253
254 if (copy) {
255 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
256 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
257 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
258 if (rt != OFFLOAD_SUCCESS) {
259 DP("Copying data to device failed.\n");
260 rc = OFFLOAD_FAIL;
261 }
262 }
263 }
264
265 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
266 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
267 DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
268 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
269 void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
270 int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase,
271 sizeof(void *));
272 if (rt != OFFLOAD_SUCCESS) {
273 DP("Copying data to device failed.\n");
274 rc = OFFLOAD_FAIL;
275 }
276 // create shadow pointers for this entry
277 Device.ShadowMtx.lock();
278 Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase,
279 Pointer_TgtPtrBegin, TgtPtrBase};
280 Device.ShadowMtx.unlock();
281 }
282 }
283
284 return rc;
285}
286
George Rokos2467df62017-01-25 21:27:24 +0000287/// Internal function to undo the mapping and retrieve the data from the device.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000288int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
George Rokos2467df62017-01-25 21:27:24 +0000289 void **args, int64_t *arg_sizes, int64_t *arg_types) {
290 int rc = OFFLOAD_SUCCESS;
291 // process each input.
292 for (int32_t i = arg_num - 1; i >= 0; --i) {
293 // Ignore private variables and arrays - there is no mapping for them.
294 // Also, ignore the use_device_ptr directive, it has no effect here.
295 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
296 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
297 continue;
298
299 void *HstPtrBegin = args[i];
300 bool IsLast;
301 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
302 (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
303 bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
304
305 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
306 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
307 UpdateRef);
308 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
309 " - is%s last\n", arg_sizes[i], DPxPTR(TgtPtrBegin),
310 (IsLast ? "" : " not"));
311
George Rokos15a6e7d2017-02-15 20:45:37 +0000312 bool DelEntry = IsLast || ForceDelete;
313
George Rokos2467df62017-01-25 21:27:24 +0000314 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
315 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
George Rokos15a6e7d2017-02-15 20:45:37 +0000316 DelEntry = false; // protect parent struct from being deallocated
George Rokos2467df62017-01-25 21:27:24 +0000317 }
318
George Rokos2467df62017-01-25 21:27:24 +0000319 if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
320 // Move data back to the host
321 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
322 bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
323 bool CopyMember = false;
324 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
325 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
326 // Copy data only if the "parent" struct has RefCount==1.
327 short parent_idx = member_of(arg_types[i]);
328 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
329 assert(parent_rc > 0 && "parent struct not found");
330 if (parent_rc == 1) {
331 CopyMember = true;
332 }
333 }
334
335 if (DelEntry || Always || CopyMember) {
336 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
337 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
338 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, arg_sizes[i]);
339 if (rt != OFFLOAD_SUCCESS) {
340 DP("Copying data from device failed.\n");
341 rc = OFFLOAD_FAIL;
342 }
343 }
344 }
345
346 // If we copied back to the host a struct/array containing pointers, we
347 // need to restore the original host pointer values from their shadow
348 // copies. If the struct is going to be deallocated, remove any remaining
349 // shadow pointer entries for this struct.
350 uintptr_t lb = (uintptr_t) HstPtrBegin;
351 uintptr_t ub = (uintptr_t) HstPtrBegin + arg_sizes[i];
352 Device.ShadowMtx.lock();
353 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
354 it != Device.ShadowPtrMap.end(); ++it) {
355 void **ShadowHstPtrAddr = (void**) it->first;
356
357 // An STL map is sorted on its keys; use this property
358 // to quickly determine when to break out of the loop.
359 if ((uintptr_t) ShadowHstPtrAddr < lb)
360 continue;
361 if ((uintptr_t) ShadowHstPtrAddr >= ub)
362 break;
363
364 // If we copied the struct to the host, we need to restore the pointer.
365 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
366 DP("Restoring original host pointer value " DPxMOD " for host "
367 "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
368 DPxPTR(ShadowHstPtrAddr));
369 *ShadowHstPtrAddr = it->second.HstPtrVal;
370 }
371 // If the struct is to be deallocated, remove the shadow entry.
372 if (DelEntry) {
373 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
374 Device.ShadowPtrMap.erase(it);
375 }
376 }
377 Device.ShadowMtx.unlock();
378
379 // Deallocate map
380 if (DelEntry) {
381 int rt = Device.deallocTgtPtr(HstPtrBegin, arg_sizes[i], ForceDelete);
382 if (rt != OFFLOAD_SUCCESS) {
383 DP("Deallocating data from device failed.\n");
384 rc = OFFLOAD_FAIL;
385 }
386 }
387 }
388 }
389
390 return rc;
391}
392
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000393/// Internal function to pass data to/from the target.
394void target_data_update(DeviceTy &Device, int32_t arg_num,
George Rokosb92dbb42017-11-21 18:26:41 +0000395 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000396 // process each input.
397 for (int32_t i = 0; i < arg_num; ++i) {
398 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
399 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
400 continue;
401
402 void *HstPtrBegin = args[i];
403 int64_t MapSize = arg_sizes[i];
404 bool IsLast;
405 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
406 false);
407
408 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
409 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
410 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
411 Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize);
412
413 uintptr_t lb = (uintptr_t) HstPtrBegin;
414 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
415 Device.ShadowMtx.lock();
416 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
417 it != Device.ShadowPtrMap.end(); ++it) {
418 void **ShadowHstPtrAddr = (void**) it->first;
419 if ((uintptr_t) ShadowHstPtrAddr < lb)
420 continue;
421 if ((uintptr_t) ShadowHstPtrAddr >= ub)
422 break;
423 DP("Restoring original host pointer value " DPxMOD " for host pointer "
424 DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
425 DPxPTR(ShadowHstPtrAddr));
426 *ShadowHstPtrAddr = it->second.HstPtrVal;
427 }
428 Device.ShadowMtx.unlock();
429 }
430
431 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
432 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
433 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
434 Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize);
435
436 uintptr_t lb = (uintptr_t) HstPtrBegin;
437 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
438 Device.ShadowMtx.lock();
439 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
440 it != Device.ShadowPtrMap.end(); ++it) {
441 void **ShadowHstPtrAddr = (void**) it->first;
442 if ((uintptr_t) ShadowHstPtrAddr < lb)
443 continue;
444 if ((uintptr_t) ShadowHstPtrAddr >= ub)
445 break;
446 DP("Restoring original target pointer value " DPxMOD " for target "
447 "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal),
448 DPxPTR(it->second.TgtPtrAddr));
449 Device.data_submit(it->second.TgtPtrAddr,
450 &it->second.TgtPtrVal, sizeof(void *));
451 }
452 Device.ShadowMtx.unlock();
453 }
454 }
455}
456
George Rokos2467df62017-01-25 21:27:24 +0000457/// performs the same actions as data_begin in case arg_num is
458/// non-zero and initiates run of the offloaded region on the target platform;
459/// if arg_num is non-zero after the region execution is done it also
460/// performs the same action as data_update and data_end above. This function
461/// returns 0 if it was able to transfer the execution to a target and an
462/// integer different from zero otherwise.
Jonas Hahnfeld43322802017-12-06 21:59:07 +0000463int target(int64_t device_id, void *host_ptr, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000464 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
465 int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
466 DeviceTy &Device = Devices[device_id];
467
468 // Find the table information in the map or look it up in the translation
469 // tables.
470 TableMap *TM = 0;
471 TblMapMtx.lock();
472 HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr);
473 if (TableMapIt == HostPtrToTableMap.end()) {
474 // We don't have a map. So search all the registered libraries.
475 TrlTblMtx.lock();
476 for (HostEntriesBeginToTransTableTy::iterator
477 ii = HostEntriesBeginToTransTable.begin(),
478 ie = HostEntriesBeginToTransTable.end();
479 !TM && ii != ie; ++ii) {
480 // get the translation table (which contains all the good info).
481 TranslationTable *TransTable = &ii->second;
482 // iterate over all the host table entries to see if we can locate the
483 // host_ptr.
484 __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin;
485 __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd;
486 __tgt_offload_entry *cur = begin;
487 for (uint32_t i = 0; cur < end; ++cur, ++i) {
488 if (cur->addr != host_ptr)
489 continue;
490 // we got a match, now fill the HostPtrToTableMap so that we
491 // may avoid this search next time.
492 TM = &HostPtrToTableMap[host_ptr];
493 TM->Table = TransTable;
494 TM->Index = i;
495 break;
496 }
497 }
498 TrlTblMtx.unlock();
499 } else {
500 TM = &TableMapIt->second;
501 }
502 TblMapMtx.unlock();
503
504 // No map for this host pointer found!
505 if (!TM) {
506 DP("Host ptr " DPxMOD " does not have a matching target pointer.\n",
507 DPxPTR(host_ptr));
508 return OFFLOAD_FAIL;
509 }
510
511 // get target table.
512 TrlTblMtx.lock();
513 assert(TM->Table->TargetsTable.size() > (size_t)device_id &&
514 "Not expecting a device ID outside the table's bounds!");
515 __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id];
516 TrlTblMtx.unlock();
517 assert(TargetTable && "Global data has not been mapped\n");
518
519 // Move data to device.
520 int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
521 arg_types);
522
523 if (rc != OFFLOAD_SUCCESS) {
524 DP("Call to target_data_begin failed, skipping target execution.\n");
525 // Call target_data_end to dealloc whatever target_data_begin allocated
526 // and return OFFLOAD_FAIL.
527 target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
528 return OFFLOAD_FAIL;
529 }
530
531 std::vector<void *> tgt_args;
George Rokos1546d312017-05-10 14:12:36 +0000532 std::vector<ptrdiff_t> tgt_offsets;
George Rokos2467df62017-01-25 21:27:24 +0000533
534 // List of (first-)private arrays allocated for this target region
535 std::vector<void *> fpArrays;
536
537 for (int32_t i = 0; i < arg_num; ++i) {
538 if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
539 // This is not a target parameter, do not push it into tgt_args.
540 continue;
541 }
542 void *HstPtrBegin = args[i];
543 void *HstPtrBase = args_base[i];
George Rokos1546d312017-05-10 14:12:36 +0000544 void *TgtPtrBegin;
545 ptrdiff_t TgtBaseOffset;
George Rokos2467df62017-01-25 21:27:24 +0000546 bool IsLast; // unused.
547 if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
548 DP("Forwarding first-private value " DPxMOD " to the target construct\n",
549 DPxPTR(HstPtrBase));
George Rokos1546d312017-05-10 14:12:36 +0000550 TgtPtrBegin = HstPtrBase;
551 TgtBaseOffset = 0;
George Rokos2467df62017-01-25 21:27:24 +0000552 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
553 // Allocate memory for (first-)private array
George Rokos1546d312017-05-10 14:12:36 +0000554 TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
555 arg_sizes[i], HstPtrBegin);
George Rokos2467df62017-01-25 21:27:24 +0000556 if (!TgtPtrBegin) {
557 DP ("Data allocation for %sprivate array " DPxMOD " failed\n",
558 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
559 DPxPTR(HstPtrBegin));
560 rc = OFFLOAD_FAIL;
561 break;
562 } else {
563 fpArrays.push_back(TgtPtrBegin);
George Rokos1546d312017-05-10 14:12:36 +0000564 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000565#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000566 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000567 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
568 "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
569 arg_sizes[i], DPxPTR(TgtPtrBegin),
570 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
571 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000572#endif
George Rokos2467df62017-01-25 21:27:24 +0000573 // If first-private, copy data from host
574 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
575 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
576 if (rt != OFFLOAD_SUCCESS) {
577 DP ("Copying data to device failed.\n");
578 rc = OFFLOAD_FAIL;
579 break;
580 }
581 }
582 }
583 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
George Rokos1546d312017-05-10 14:12:36 +0000584 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
585 false);
586 TgtBaseOffset = 0; // no offset for ptrs.
George Rokos2467df62017-01-25 21:27:24 +0000587 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
588 "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
589 DPxPTR(HstPtrBase));
590 } else {
George Rokos1546d312017-05-10 14:12:36 +0000591 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
592 false);
593 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000594#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000595 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000596 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
597 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000598#endif
George Rokos2467df62017-01-25 21:27:24 +0000599 }
George Rokos1546d312017-05-10 14:12:36 +0000600 tgt_args.push_back(TgtPtrBegin);
601 tgt_offsets.push_back(TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000602 }
George Rokos1546d312017-05-10 14:12:36 +0000603
604 assert(tgt_args.size() == tgt_offsets.size() &&
605 "Size mismatch in arguments and offsets");
George Rokos2467df62017-01-25 21:27:24 +0000606
607 // Pop loop trip count
608 uint64_t ltc = Device.loopTripCnt;
609 Device.loopTripCnt = 0;
610
611 // Launch device execution.
612 if (rc == OFFLOAD_SUCCESS) {
613 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
614 TargetTable->EntriesBegin[TM->Index].name,
615 DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
616 if (IsTeamConstruct) {
617 rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
George Rokos1546d312017-05-10 14:12:36 +0000618 &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
619 thread_limit, ltc);
George Rokos2467df62017-01-25 21:27:24 +0000620 } else {
621 rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
George Rokos1546d312017-05-10 14:12:36 +0000622 &tgt_args[0], &tgt_offsets[0], tgt_args.size());
George Rokos2467df62017-01-25 21:27:24 +0000623 }
624 } else {
625 DP("Errors occurred while obtaining target arguments, skipping kernel "
626 "execution\n");
627 }
628
629 // Deallocate (first-)private arrays
630 for (auto it : fpArrays) {
631 int rt = Device.RTL->data_delete(Device.RTLDeviceID, it);
632 if (rt != OFFLOAD_SUCCESS) {
633 DP("Deallocation of (first-)private arrays failed.\n");
634 rc = OFFLOAD_FAIL;
635 }
636 }
637
638 // Move data from device.
639 int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
640 arg_types);
641
642 if (rt != OFFLOAD_SUCCESS) {
643 DP("Call to target_data_end failed.\n");
644 rc = OFFLOAD_FAIL;
645 }
646
647 return rc;
648}