blob: 3cc09b4578584570cb996a59e8ea30661c8acb01 [file] [log] [blame]
George Rokos2467df62017-01-25 21:27:24 +00001//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the interface to be used by Clang during the codegen of a
11// target region.
12//
13//===----------------------------------------------------------------------===//
14
Jonas Hahnfeld43322802017-12-06 21:59:07 +000015#include <omptarget.h>
16
17#include "device.h"
18#include "private.h"
19#include "rtl.h"
20
George Rokos2467df62017-01-25 21:27:24 +000021#include <cassert>
George Rokos2467df62017-01-25 21:27:24 +000022#include <vector>
23
Sergey Dmitrievb305d262017-08-14 15:09:59 +000024#ifdef OMPTARGET_DEBUG
Jonas Hahnfeld43322802017-12-06 21:59:07 +000025int DebugLevel = 0;
Sergey Dmitrievb305d262017-08-14 15:09:59 +000026#endif // OMPTARGET_DEBUG
27
George Rokosa0da2462018-07-19 13:41:03 +000028/* All begin addresses for partially mapped structs must be 8-aligned in order
29 * to ensure proper alignment of members. E.g.
30 *
31 * struct S {
32 * int a; // 4-aligned
33 * int b; // 4-aligned
34 * int *p; // 8-aligned
35 * } s1;
36 * ...
37 * #pragma omp target map(tofrom: s1.b, s1.p[0:N])
38 * {
39 * s1.b = 5;
40 * for (int i...) s1.p[i] = ...;
41 * }
42 *
43 * Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
44 * BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
45 * then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
46 * requirements for its type. Now, when we allocate memory on the device, in
47 * CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
48 * This means that the chunk of the struct on the device will start at a
49 * 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
50 * address of p will be a misaligned 0x204 (on the host there was no need to add
51 * padding between b and p, so p comes exactly 4 bytes after b). If the device
52 * kernel tries to access s1.p, a misaligned address error occurs (as reported
53 * by the CUDA plugin). By padding the begin address down to a multiple of 8 and
54 * extending the size of the allocated chuck accordingly, the chuck on the
55 * device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
56 * &s1.p=0x208, as they should be to satisfy the alignment requirements.
57 */
58static const int64_t alignment = 8;
59
George Rokos2467df62017-01-25 21:27:24 +000060/// Map global data and execute pending ctors
61static int InitLibrary(DeviceTy& Device) {
62 /*
63 * Map global data
64 */
65 int32_t device_id = Device.DeviceID;
66 int rc = OFFLOAD_SUCCESS;
67
68 Device.PendingGlobalsMtx.lock();
69 TrlTblMtx.lock();
70 for (HostEntriesBeginToTransTableTy::iterator
71 ii = HostEntriesBeginToTransTable.begin();
72 ii != HostEntriesBeginToTransTable.end(); ++ii) {
73 TranslationTable *TransTable = &ii->second;
74 if (TransTable->TargetsTable[device_id] != 0) {
75 // Library entries have already been processed
76 continue;
77 }
78
79 // 1) get image.
80 assert(TransTable->TargetsImages.size() > (size_t)device_id &&
81 "Not expecting a device ID outside the table's bounds!");
82 __tgt_device_image *img = TransTable->TargetsImages[device_id];
83 if (!img) {
84 DP("No image loaded for device id %d.\n", device_id);
85 rc = OFFLOAD_FAIL;
86 break;
87 }
88 // 2) load image into the target table.
89 __tgt_target_table *TargetTable =
90 TransTable->TargetsTable[device_id] = Device.load_binary(img);
91 // Unable to get table for this image: invalidate image and fail.
92 if (!TargetTable) {
93 DP("Unable to generate entries table for device id %d.\n", device_id);
94 TransTable->TargetsImages[device_id] = 0;
95 rc = OFFLOAD_FAIL;
96 break;
97 }
98
99 // Verify whether the two table sizes match.
100 size_t hsize =
101 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
102 size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
103
104 // Invalid image for these host entries!
105 if (hsize != tsize) {
106 DP("Host and Target tables mismatch for device id %d [%zx != %zx].\n",
107 device_id, hsize, tsize);
108 TransTable->TargetsImages[device_id] = 0;
109 TransTable->TargetsTable[device_id] = 0;
110 rc = OFFLOAD_FAIL;
111 break;
112 }
113
114 // process global data that needs to be mapped.
George Rokosd57681b2017-04-22 11:45:03 +0000115 Device.DataMapMtx.lock();
George Rokos2467df62017-01-25 21:27:24 +0000116 __tgt_target_table *HostTable = &TransTable->HostTable;
117 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
118 *CurrHostEntry = HostTable->EntriesBegin,
119 *EntryDeviceEnd = TargetTable->EntriesEnd;
120 CurrDeviceEntry != EntryDeviceEnd;
121 CurrDeviceEntry++, CurrHostEntry++) {
122 if (CurrDeviceEntry->size != 0) {
123 // has data.
124 assert(CurrDeviceEntry->size == CurrHostEntry->size &&
125 "data size mismatch");
George Rokosba7380b2017-03-22 16:43:40 +0000126
127 // Fortran may use multiple weak declarations for the same symbol,
128 // therefore we must allow for multiple weak symbols to be loaded from
129 // the fat binary. Treat these mappings as any other "regular" mapping.
130 // Add entry to map.
George Rokosd57681b2017-04-22 11:45:03 +0000131 if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size))
132 continue;
George Rokos2467df62017-01-25 21:27:24 +0000133 DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
134 "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
135 CurrDeviceEntry->size);
George Rokosd57681b2017-04-22 11:45:03 +0000136 Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
137 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
138 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
139 (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
140 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
141 INF_REF_CNT /*RefCount*/));
George Rokos2467df62017-01-25 21:27:24 +0000142 }
143 }
George Rokosd57681b2017-04-22 11:45:03 +0000144 Device.DataMapMtx.unlock();
George Rokos2467df62017-01-25 21:27:24 +0000145 }
146 TrlTblMtx.unlock();
147
148 if (rc != OFFLOAD_SUCCESS) {
149 Device.PendingGlobalsMtx.unlock();
150 return rc;
151 }
152
153 /*
154 * Run ctors for static objects
155 */
156 if (!Device.PendingCtorsDtors.empty()) {
157 // Call all ctors for all libraries registered so far
158 for (auto &lib : Device.PendingCtorsDtors) {
159 if (!lib.second.PendingCtors.empty()) {
160 DP("Has pending ctors... call now\n");
161 for (auto &entry : lib.second.PendingCtors) {
162 void *ctor = entry;
163 int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
164 NULL, 1, 1, true /*team*/);
165 if (rc != OFFLOAD_SUCCESS) {
166 DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
167 Device.PendingGlobalsMtx.unlock();
168 return OFFLOAD_FAIL;
169 }
170 }
171 // Clear the list to indicate that this device has been used
172 lib.second.PendingCtors.clear();
173 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first));
174 }
175 }
176 }
177 Device.HasPendingGlobals = false;
178 Device.PendingGlobalsMtx.unlock();
179
180 return OFFLOAD_SUCCESS;
181}
182
183// Check whether a device has been initialized, global ctors have been
184// executed and global data has been mapped; do so if not already done.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000185int CheckDeviceAndCtors(int64_t device_id) {
George Rokos2467df62017-01-25 21:27:24 +0000186 // Is device ready?
187 if (!device_is_ready(device_id)) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000188 DP("Device %" PRId64 " is not ready.\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000189 return OFFLOAD_FAIL;
190 }
191
192 // Get device info.
193 DeviceTy &Device = Devices[device_id];
194
195 // Check whether global data has been mapped for this device
196 Device.PendingGlobalsMtx.lock();
197 bool hasPendingGlobals = Device.HasPendingGlobals;
198 Device.PendingGlobalsMtx.unlock();
199 if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000200 DP("Failed to init globals on device %" PRId64 "\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000201 return OFFLOAD_FAIL;
202 }
203
204 return OFFLOAD_SUCCESS;
205}
206
George Rokosa0da2462018-07-19 13:41:03 +0000207static int32_t member_of(int64_t type) {
George Rokos2467df62017-01-25 21:27:24 +0000208 return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
209}
210
211/// Internal function to do the mapping and transfer the data to the device
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000212int target_data_begin(DeviceTy &Device, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000213 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
214 // process each input.
215 int rc = OFFLOAD_SUCCESS;
216 for (int32_t i = 0; i < arg_num; ++i) {
217 // Ignore private variables and arrays - there is no mapping for them.
218 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
219 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
220 continue;
221
222 void *HstPtrBegin = args[i];
223 void *HstPtrBase = args_base[i];
George Rokosa0da2462018-07-19 13:41:03 +0000224 int64_t data_size = arg_sizes[i];
225
226 // Adjust for proper alignment if this is a combined entry (for structs).
227 // Look at the next argument - if that is MEMBER_OF this one, then this one
228 // is a combined entry.
229 int64_t padding = 0;
230 const int next_i = i+1;
231 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
232 member_of(arg_types[next_i]) == i) {
233 padding = (int64_t)HstPtrBegin % alignment;
234 if (padding) {
235 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
236 "\n", padding, DPxPTR(HstPtrBegin));
237 HstPtrBegin = (char *) HstPtrBegin - padding;
238 data_size += padding;
239 }
240 }
241
George Rokos2467df62017-01-25 21:27:24 +0000242 // Address of pointer on the host and device, respectively.
243 void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
244 bool IsNew, Pointer_IsNew;
245 bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
George Rokosa0da2462018-07-19 13:41:03 +0000246 // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
247 // have reached this point via __tgt_target_data_begin and not __tgt_target
248 // then no argument is marked as TARGET_PARAM ("omp target data map" is not
249 // associated with a target region, so there are no target parameters). This
250 // may be considered a hack, we could revise the scheme in the future.
George Rokos2467df62017-01-25 21:27:24 +0000251 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
252 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
253 DP("Has a pointer entry: \n");
254 // base is address of pointer.
255 Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
256 sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
257 if (!Pointer_TgtPtrBegin) {
258 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
259 "illegal mapping).\n");
260 }
261 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
262 "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
263 (Pointer_IsNew ? "" : " not"));
264 Pointer_HstPtrBegin = HstPtrBase;
265 // modify current entry.
266 HstPtrBase = *(void **)HstPtrBase;
267 UpdateRef = true; // subsequently update ref count of pointee
268 }
269
270 void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
George Rokosa0da2462018-07-19 13:41:03 +0000271 data_size, IsNew, IsImplicit, UpdateRef);
272 if (!TgtPtrBegin && data_size) {
273 // If data_size==0, then the argument could be a zero-length pointer to
274 // NULL, so getOrAlloc() returning NULL is not an error.
George Rokos2467df62017-01-25 21:27:24 +0000275 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
276 "illegal mapping).\n");
277 }
278 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000279 " - is%s new\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000280 (IsNew ? "" : " not"));
281
282 if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
George Rokosa0da2462018-07-19 13:41:03 +0000283 uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
284 void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
285 DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
286 args_base[i] = TgtPtrBase;
George Rokos2467df62017-01-25 21:27:24 +0000287 }
288
289 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
290 bool copy = false;
291 if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
292 copy = true;
293 } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
294 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000295 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000296 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
297 assert(parent_rc > 0 && "parent struct not found");
298 if (parent_rc == 1) {
299 copy = true;
300 }
301 }
302
303 if (copy) {
304 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000305 data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
306 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000307 if (rt != OFFLOAD_SUCCESS) {
308 DP("Copying data to device failed.\n");
309 rc = OFFLOAD_FAIL;
310 }
311 }
312 }
313
314 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
315 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
316 DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
317 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
318 void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
319 int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase,
320 sizeof(void *));
321 if (rt != OFFLOAD_SUCCESS) {
322 DP("Copying data to device failed.\n");
323 rc = OFFLOAD_FAIL;
324 }
325 // create shadow pointers for this entry
326 Device.ShadowMtx.lock();
327 Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase,
328 Pointer_TgtPtrBegin, TgtPtrBase};
329 Device.ShadowMtx.unlock();
330 }
331 }
332
333 return rc;
334}
335
George Rokos2467df62017-01-25 21:27:24 +0000336/// Internal function to undo the mapping and retrieve the data from the device.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000337int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
George Rokos2467df62017-01-25 21:27:24 +0000338 void **args, int64_t *arg_sizes, int64_t *arg_types) {
339 int rc = OFFLOAD_SUCCESS;
340 // process each input.
341 for (int32_t i = arg_num - 1; i >= 0; --i) {
342 // Ignore private variables and arrays - there is no mapping for them.
343 // Also, ignore the use_device_ptr directive, it has no effect here.
344 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
345 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
346 continue;
347
348 void *HstPtrBegin = args[i];
George Rokosa0da2462018-07-19 13:41:03 +0000349 int64_t data_size = arg_sizes[i];
350 // Adjust for proper alignment if this is a combined entry (for structs).
351 // Look at the next argument - if that is MEMBER_OF this one, then this one
352 // is a combined entry.
353 int64_t padding = 0;
354 const int next_i = i+1;
355 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
356 member_of(arg_types[next_i]) == i) {
357 padding = (int64_t)HstPtrBegin % alignment;
358 if (padding) {
359 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
360 "\n", padding, DPxPTR(HstPtrBegin));
361 HstPtrBegin = (char *) HstPtrBegin - padding;
362 data_size += padding;
363 }
364 }
365
George Rokos2467df62017-01-25 21:27:24 +0000366 bool IsLast;
367 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
368 (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
369 bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
370
371 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
George Rokosa0da2462018-07-19 13:41:03 +0000372 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
George Rokos2467df62017-01-25 21:27:24 +0000373 UpdateRef);
374 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000375 " - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000376 (IsLast ? "" : " not"));
377
George Rokos15a6e7d2017-02-15 20:45:37 +0000378 bool DelEntry = IsLast || ForceDelete;
379
George Rokos2467df62017-01-25 21:27:24 +0000380 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
381 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
George Rokos15a6e7d2017-02-15 20:45:37 +0000382 DelEntry = false; // protect parent struct from being deallocated
George Rokos2467df62017-01-25 21:27:24 +0000383 }
384
George Rokos2467df62017-01-25 21:27:24 +0000385 if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
386 // Move data back to the host
387 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
388 bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
389 bool CopyMember = false;
390 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
391 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
392 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000393 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000394 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
395 assert(parent_rc > 0 && "parent struct not found");
396 if (parent_rc == 1) {
397 CopyMember = true;
398 }
399 }
400
401 if (DelEntry || Always || CopyMember) {
402 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000403 data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
404 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000405 if (rt != OFFLOAD_SUCCESS) {
406 DP("Copying data from device failed.\n");
407 rc = OFFLOAD_FAIL;
408 }
409 }
410 }
411
412 // If we copied back to the host a struct/array containing pointers, we
413 // need to restore the original host pointer values from their shadow
414 // copies. If the struct is going to be deallocated, remove any remaining
415 // shadow pointer entries for this struct.
416 uintptr_t lb = (uintptr_t) HstPtrBegin;
George Rokosa0da2462018-07-19 13:41:03 +0000417 uintptr_t ub = (uintptr_t) HstPtrBegin + data_size;
George Rokos2467df62017-01-25 21:27:24 +0000418 Device.ShadowMtx.lock();
419 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
420 it != Device.ShadowPtrMap.end(); ++it) {
421 void **ShadowHstPtrAddr = (void**) it->first;
422
423 // An STL map is sorted on its keys; use this property
424 // to quickly determine when to break out of the loop.
425 if ((uintptr_t) ShadowHstPtrAddr < lb)
426 continue;
427 if ((uintptr_t) ShadowHstPtrAddr >= ub)
428 break;
429
430 // If we copied the struct to the host, we need to restore the pointer.
431 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
432 DP("Restoring original host pointer value " DPxMOD " for host "
433 "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
434 DPxPTR(ShadowHstPtrAddr));
435 *ShadowHstPtrAddr = it->second.HstPtrVal;
436 }
437 // If the struct is to be deallocated, remove the shadow entry.
438 if (DelEntry) {
439 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
440 Device.ShadowPtrMap.erase(it);
441 }
442 }
443 Device.ShadowMtx.unlock();
444
445 // Deallocate map
446 if (DelEntry) {
George Rokosa0da2462018-07-19 13:41:03 +0000447 int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete);
George Rokos2467df62017-01-25 21:27:24 +0000448 if (rt != OFFLOAD_SUCCESS) {
449 DP("Deallocating data from device failed.\n");
450 rc = OFFLOAD_FAIL;
451 }
452 }
453 }
454 }
455
456 return rc;
457}
458
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000459/// Internal function to pass data to/from the target.
460void target_data_update(DeviceTy &Device, int32_t arg_num,
George Rokosb92dbb42017-11-21 18:26:41 +0000461 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000462 // process each input.
463 for (int32_t i = 0; i < arg_num; ++i) {
464 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
465 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
466 continue;
467
468 void *HstPtrBegin = args[i];
469 int64_t MapSize = arg_sizes[i];
470 bool IsLast;
471 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
472 false);
473
474 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
475 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
476 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
477 Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize);
478
479 uintptr_t lb = (uintptr_t) HstPtrBegin;
480 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
481 Device.ShadowMtx.lock();
482 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
483 it != Device.ShadowPtrMap.end(); ++it) {
484 void **ShadowHstPtrAddr = (void**) it->first;
485 if ((uintptr_t) ShadowHstPtrAddr < lb)
486 continue;
487 if ((uintptr_t) ShadowHstPtrAddr >= ub)
488 break;
489 DP("Restoring original host pointer value " DPxMOD " for host pointer "
490 DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
491 DPxPTR(ShadowHstPtrAddr));
492 *ShadowHstPtrAddr = it->second.HstPtrVal;
493 }
494 Device.ShadowMtx.unlock();
495 }
496
497 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
498 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
499 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
500 Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize);
501
502 uintptr_t lb = (uintptr_t) HstPtrBegin;
503 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
504 Device.ShadowMtx.lock();
505 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
506 it != Device.ShadowPtrMap.end(); ++it) {
507 void **ShadowHstPtrAddr = (void**) it->first;
508 if ((uintptr_t) ShadowHstPtrAddr < lb)
509 continue;
510 if ((uintptr_t) ShadowHstPtrAddr >= ub)
511 break;
512 DP("Restoring original target pointer value " DPxMOD " for target "
513 "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal),
514 DPxPTR(it->second.TgtPtrAddr));
515 Device.data_submit(it->second.TgtPtrAddr,
516 &it->second.TgtPtrVal, sizeof(void *));
517 }
518 Device.ShadowMtx.unlock();
519 }
520 }
521}
522
George Rokos2467df62017-01-25 21:27:24 +0000523/// performs the same actions as data_begin in case arg_num is
524/// non-zero and initiates run of the offloaded region on the target platform;
525/// if arg_num is non-zero after the region execution is done it also
526/// performs the same action as data_update and data_end above. This function
527/// returns 0 if it was able to transfer the execution to a target and an
528/// integer different from zero otherwise.
Jonas Hahnfeld43322802017-12-06 21:59:07 +0000529int target(int64_t device_id, void *host_ptr, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000530 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
531 int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
532 DeviceTy &Device = Devices[device_id];
533
534 // Find the table information in the map or look it up in the translation
535 // tables.
536 TableMap *TM = 0;
537 TblMapMtx.lock();
538 HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr);
539 if (TableMapIt == HostPtrToTableMap.end()) {
540 // We don't have a map. So search all the registered libraries.
541 TrlTblMtx.lock();
542 for (HostEntriesBeginToTransTableTy::iterator
543 ii = HostEntriesBeginToTransTable.begin(),
544 ie = HostEntriesBeginToTransTable.end();
545 !TM && ii != ie; ++ii) {
546 // get the translation table (which contains all the good info).
547 TranslationTable *TransTable = &ii->second;
548 // iterate over all the host table entries to see if we can locate the
549 // host_ptr.
550 __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin;
551 __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd;
552 __tgt_offload_entry *cur = begin;
553 for (uint32_t i = 0; cur < end; ++cur, ++i) {
554 if (cur->addr != host_ptr)
555 continue;
556 // we got a match, now fill the HostPtrToTableMap so that we
557 // may avoid this search next time.
558 TM = &HostPtrToTableMap[host_ptr];
559 TM->Table = TransTable;
560 TM->Index = i;
561 break;
562 }
563 }
564 TrlTblMtx.unlock();
565 } else {
566 TM = &TableMapIt->second;
567 }
568 TblMapMtx.unlock();
569
570 // No map for this host pointer found!
571 if (!TM) {
572 DP("Host ptr " DPxMOD " does not have a matching target pointer.\n",
573 DPxPTR(host_ptr));
574 return OFFLOAD_FAIL;
575 }
576
577 // get target table.
578 TrlTblMtx.lock();
579 assert(TM->Table->TargetsTable.size() > (size_t)device_id &&
580 "Not expecting a device ID outside the table's bounds!");
581 __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id];
582 TrlTblMtx.unlock();
583 assert(TargetTable && "Global data has not been mapped\n");
584
585 // Move data to device.
586 int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
587 arg_types);
588
589 if (rc != OFFLOAD_SUCCESS) {
590 DP("Call to target_data_begin failed, skipping target execution.\n");
591 // Call target_data_end to dealloc whatever target_data_begin allocated
592 // and return OFFLOAD_FAIL.
593 target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types);
594 return OFFLOAD_FAIL;
595 }
596
597 std::vector<void *> tgt_args;
George Rokos1546d312017-05-10 14:12:36 +0000598 std::vector<ptrdiff_t> tgt_offsets;
George Rokos2467df62017-01-25 21:27:24 +0000599
600 // List of (first-)private arrays allocated for this target region
601 std::vector<void *> fpArrays;
602
603 for (int32_t i = 0; i < arg_num; ++i) {
604 if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
605 // This is not a target parameter, do not push it into tgt_args.
606 continue;
607 }
608 void *HstPtrBegin = args[i];
609 void *HstPtrBase = args_base[i];
George Rokos1546d312017-05-10 14:12:36 +0000610 void *TgtPtrBegin;
611 ptrdiff_t TgtBaseOffset;
George Rokos2467df62017-01-25 21:27:24 +0000612 bool IsLast; // unused.
613 if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
614 DP("Forwarding first-private value " DPxMOD " to the target construct\n",
615 DPxPTR(HstPtrBase));
George Rokos1546d312017-05-10 14:12:36 +0000616 TgtPtrBegin = HstPtrBase;
617 TgtBaseOffset = 0;
George Rokos2467df62017-01-25 21:27:24 +0000618 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
619 // Allocate memory for (first-)private array
George Rokos1546d312017-05-10 14:12:36 +0000620 TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
621 arg_sizes[i], HstPtrBegin);
George Rokos2467df62017-01-25 21:27:24 +0000622 if (!TgtPtrBegin) {
623 DP ("Data allocation for %sprivate array " DPxMOD " failed\n",
624 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
625 DPxPTR(HstPtrBegin));
626 rc = OFFLOAD_FAIL;
627 break;
628 } else {
629 fpArrays.push_back(TgtPtrBegin);
George Rokos1546d312017-05-10 14:12:36 +0000630 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000631#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000632 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000633 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
634 "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
635 arg_sizes[i], DPxPTR(TgtPtrBegin),
636 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
637 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000638#endif
George Rokos2467df62017-01-25 21:27:24 +0000639 // If first-private, copy data from host
640 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
641 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
642 if (rt != OFFLOAD_SUCCESS) {
643 DP ("Copying data to device failed.\n");
644 rc = OFFLOAD_FAIL;
645 break;
646 }
647 }
648 }
649 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
George Rokos1546d312017-05-10 14:12:36 +0000650 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
651 false);
652 TgtBaseOffset = 0; // no offset for ptrs.
George Rokos2467df62017-01-25 21:27:24 +0000653 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
654 "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
655 DPxPTR(HstPtrBase));
656 } else {
George Rokos1546d312017-05-10 14:12:36 +0000657 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
658 false);
659 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000660#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000661 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000662 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
663 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000664#endif
George Rokos2467df62017-01-25 21:27:24 +0000665 }
George Rokos1546d312017-05-10 14:12:36 +0000666 tgt_args.push_back(TgtPtrBegin);
667 tgt_offsets.push_back(TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000668 }
George Rokos1546d312017-05-10 14:12:36 +0000669
670 assert(tgt_args.size() == tgt_offsets.size() &&
671 "Size mismatch in arguments and offsets");
George Rokos2467df62017-01-25 21:27:24 +0000672
673 // Pop loop trip count
674 uint64_t ltc = Device.loopTripCnt;
675 Device.loopTripCnt = 0;
676
677 // Launch device execution.
678 if (rc == OFFLOAD_SUCCESS) {
679 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
680 TargetTable->EntriesBegin[TM->Index].name,
681 DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
682 if (IsTeamConstruct) {
683 rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
George Rokos1546d312017-05-10 14:12:36 +0000684 &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
685 thread_limit, ltc);
George Rokos2467df62017-01-25 21:27:24 +0000686 } else {
687 rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
George Rokos1546d312017-05-10 14:12:36 +0000688 &tgt_args[0], &tgt_offsets[0], tgt_args.size());
George Rokos2467df62017-01-25 21:27:24 +0000689 }
690 } else {
691 DP("Errors occurred while obtaining target arguments, skipping kernel "
692 "execution\n");
693 }
694
695 // Deallocate (first-)private arrays
696 for (auto it : fpArrays) {
697 int rt = Device.RTL->data_delete(Device.RTLDeviceID, it);
698 if (rt != OFFLOAD_SUCCESS) {
699 DP("Deallocation of (first-)private arrays failed.\n");
700 rc = OFFLOAD_FAIL;
701 }
702 }
703
704 // Move data from device.
705 int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
706 arg_types);
707
708 if (rt != OFFLOAD_SUCCESS) {
709 DP("Call to target_data_end failed.\n");
710 rc = OFFLOAD_FAIL;
711 }
712
713 return rc;
714}