blob: a23d82b7979d8fc11fde27a7c55d8118f12b5447 [file] [log] [blame]
George Rokos2467df62017-01-25 21:27:24 +00001//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the interface to be used by Clang during the codegen of a
11// target region.
12//
13//===----------------------------------------------------------------------===//
14
Jonas Hahnfeld43322802017-12-06 21:59:07 +000015#include <omptarget.h>
16
17#include "device.h"
18#include "private.h"
19#include "rtl.h"
20
George Rokos2467df62017-01-25 21:27:24 +000021#include <cassert>
George Rokos2467df62017-01-25 21:27:24 +000022#include <vector>
23
Sergey Dmitrievb305d262017-08-14 15:09:59 +000024#ifdef OMPTARGET_DEBUG
Jonas Hahnfeld43322802017-12-06 21:59:07 +000025int DebugLevel = 0;
Sergey Dmitrievb305d262017-08-14 15:09:59 +000026#endif // OMPTARGET_DEBUG
27
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +000028
29
George Rokosa0da2462018-07-19 13:41:03 +000030/* All begin addresses for partially mapped structs must be 8-aligned in order
31 * to ensure proper alignment of members. E.g.
32 *
33 * struct S {
34 * int a; // 4-aligned
35 * int b; // 4-aligned
36 * int *p; // 8-aligned
37 * } s1;
38 * ...
39 * #pragma omp target map(tofrom: s1.b, s1.p[0:N])
40 * {
41 * s1.b = 5;
42 * for (int i...) s1.p[i] = ...;
43 * }
44 *
45 * Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
46 * BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
47 * then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
48 * requirements for its type. Now, when we allocate memory on the device, in
49 * CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
50 * This means that the chunk of the struct on the device will start at a
51 * 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
52 * address of p will be a misaligned 0x204 (on the host there was no need to add
53 * padding between b and p, so p comes exactly 4 bytes after b). If the device
54 * kernel tries to access s1.p, a misaligned address error occurs (as reported
55 * by the CUDA plugin). By padding the begin address down to a multiple of 8 and
56 * extending the size of the allocated chuck accordingly, the chuck on the
57 * device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
58 * &s1.p=0x208, as they should be to satisfy the alignment requirements.
59 */
60static const int64_t alignment = 8;
61
George Rokos2467df62017-01-25 21:27:24 +000062/// Map global data and execute pending ctors
63static int InitLibrary(DeviceTy& Device) {
64 /*
65 * Map global data
66 */
67 int32_t device_id = Device.DeviceID;
68 int rc = OFFLOAD_SUCCESS;
69
70 Device.PendingGlobalsMtx.lock();
71 TrlTblMtx.lock();
72 for (HostEntriesBeginToTransTableTy::iterator
73 ii = HostEntriesBeginToTransTable.begin();
74 ii != HostEntriesBeginToTransTable.end(); ++ii) {
75 TranslationTable *TransTable = &ii->second;
76 if (TransTable->TargetsTable[device_id] != 0) {
77 // Library entries have already been processed
78 continue;
79 }
80
81 // 1) get image.
82 assert(TransTable->TargetsImages.size() > (size_t)device_id &&
83 "Not expecting a device ID outside the table's bounds!");
84 __tgt_device_image *img = TransTable->TargetsImages[device_id];
85 if (!img) {
86 DP("No image loaded for device id %d.\n", device_id);
87 rc = OFFLOAD_FAIL;
88 break;
89 }
90 // 2) load image into the target table.
91 __tgt_target_table *TargetTable =
92 TransTable->TargetsTable[device_id] = Device.load_binary(img);
93 // Unable to get table for this image: invalidate image and fail.
94 if (!TargetTable) {
95 DP("Unable to generate entries table for device id %d.\n", device_id);
96 TransTable->TargetsImages[device_id] = 0;
97 rc = OFFLOAD_FAIL;
98 break;
99 }
100
101 // Verify whether the two table sizes match.
102 size_t hsize =
103 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
104 size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
105
106 // Invalid image for these host entries!
107 if (hsize != tsize) {
108 DP("Host and Target tables mismatch for device id %d [%zx != %zx].\n",
109 device_id, hsize, tsize);
110 TransTable->TargetsImages[device_id] = 0;
111 TransTable->TargetsTable[device_id] = 0;
112 rc = OFFLOAD_FAIL;
113 break;
114 }
115
116 // process global data that needs to be mapped.
George Rokosd57681b2017-04-22 11:45:03 +0000117 Device.DataMapMtx.lock();
George Rokos2467df62017-01-25 21:27:24 +0000118 __tgt_target_table *HostTable = &TransTable->HostTable;
119 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
120 *CurrHostEntry = HostTable->EntriesBegin,
121 *EntryDeviceEnd = TargetTable->EntriesEnd;
122 CurrDeviceEntry != EntryDeviceEnd;
123 CurrDeviceEntry++, CurrHostEntry++) {
124 if (CurrDeviceEntry->size != 0) {
125 // has data.
126 assert(CurrDeviceEntry->size == CurrHostEntry->size &&
127 "data size mismatch");
George Rokosba7380b2017-03-22 16:43:40 +0000128
129 // Fortran may use multiple weak declarations for the same symbol,
130 // therefore we must allow for multiple weak symbols to be loaded from
131 // the fat binary. Treat these mappings as any other "regular" mapping.
132 // Add entry to map.
George Rokosd57681b2017-04-22 11:45:03 +0000133 if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size))
134 continue;
George Rokos2467df62017-01-25 21:27:24 +0000135 DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
136 "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
137 CurrDeviceEntry->size);
George Rokosd57681b2017-04-22 11:45:03 +0000138 Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
139 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
140 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
141 (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
142 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
143 INF_REF_CNT /*RefCount*/));
George Rokos2467df62017-01-25 21:27:24 +0000144 }
145 }
George Rokosd57681b2017-04-22 11:45:03 +0000146 Device.DataMapMtx.unlock();
George Rokos2467df62017-01-25 21:27:24 +0000147 }
148 TrlTblMtx.unlock();
149
150 if (rc != OFFLOAD_SUCCESS) {
151 Device.PendingGlobalsMtx.unlock();
152 return rc;
153 }
154
155 /*
156 * Run ctors for static objects
157 */
158 if (!Device.PendingCtorsDtors.empty()) {
159 // Call all ctors for all libraries registered so far
160 for (auto &lib : Device.PendingCtorsDtors) {
161 if (!lib.second.PendingCtors.empty()) {
162 DP("Has pending ctors... call now\n");
163 for (auto &entry : lib.second.PendingCtors) {
164 void *ctor = entry;
165 int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
166 NULL, 1, 1, true /*team*/);
167 if (rc != OFFLOAD_SUCCESS) {
168 DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
169 Device.PendingGlobalsMtx.unlock();
170 return OFFLOAD_FAIL;
171 }
172 }
173 // Clear the list to indicate that this device has been used
174 lib.second.PendingCtors.clear();
175 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first));
176 }
177 }
178 }
179 Device.HasPendingGlobals = false;
180 Device.PendingGlobalsMtx.unlock();
181
182 return OFFLOAD_SUCCESS;
183}
184
185// Check whether a device has been initialized, global ctors have been
186// executed and global data has been mapped; do so if not already done.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000187int CheckDeviceAndCtors(int64_t device_id) {
George Rokos2467df62017-01-25 21:27:24 +0000188 // Is device ready?
189 if (!device_is_ready(device_id)) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000190 DP("Device %" PRId64 " is not ready.\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000191 return OFFLOAD_FAIL;
192 }
193
194 // Get device info.
195 DeviceTy &Device = Devices[device_id];
196
197 // Check whether global data has been mapped for this device
198 Device.PendingGlobalsMtx.lock();
199 bool hasPendingGlobals = Device.HasPendingGlobals;
200 Device.PendingGlobalsMtx.unlock();
201 if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000202 DP("Failed to init globals on device %" PRId64 "\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000203 return OFFLOAD_FAIL;
204 }
205
206 return OFFLOAD_SUCCESS;
207}
208
George Rokosa0da2462018-07-19 13:41:03 +0000209static int32_t member_of(int64_t type) {
George Rokos2467df62017-01-25 21:27:24 +0000210 return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
211}
212
213/// Internal function to do the mapping and transfer the data to the device
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000214int target_data_begin(DeviceTy &Device, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000215 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
216 // process each input.
George Rokos2467df62017-01-25 21:27:24 +0000217 for (int32_t i = 0; i < arg_num; ++i) {
218 // Ignore private variables and arrays - there is no mapping for them.
219 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
220 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
221 continue;
222
223 void *HstPtrBegin = args[i];
224 void *HstPtrBase = args_base[i];
George Rokosa0da2462018-07-19 13:41:03 +0000225 int64_t data_size = arg_sizes[i];
226
227 // Adjust for proper alignment if this is a combined entry (for structs).
228 // Look at the next argument - if that is MEMBER_OF this one, then this one
229 // is a combined entry.
230 int64_t padding = 0;
231 const int next_i = i+1;
232 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
233 member_of(arg_types[next_i]) == i) {
234 padding = (int64_t)HstPtrBegin % alignment;
235 if (padding) {
236 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
237 "\n", padding, DPxPTR(HstPtrBegin));
238 HstPtrBegin = (char *) HstPtrBegin - padding;
239 data_size += padding;
240 }
241 }
242
George Rokos2467df62017-01-25 21:27:24 +0000243 // Address of pointer on the host and device, respectively.
244 void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
245 bool IsNew, Pointer_IsNew;
246 bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
George Rokosa0da2462018-07-19 13:41:03 +0000247 // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
248 // have reached this point via __tgt_target_data_begin and not __tgt_target
249 // then no argument is marked as TARGET_PARAM ("omp target data map" is not
250 // associated with a target region, so there are no target parameters). This
251 // may be considered a hack, we could revise the scheme in the future.
George Rokos2467df62017-01-25 21:27:24 +0000252 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
253 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
254 DP("Has a pointer entry: \n");
255 // base is address of pointer.
256 Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
257 sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
258 if (!Pointer_TgtPtrBegin) {
259 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
260 "illegal mapping).\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000261 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000262 }
263 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
264 "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
265 (Pointer_IsNew ? "" : " not"));
266 Pointer_HstPtrBegin = HstPtrBase;
267 // modify current entry.
268 HstPtrBase = *(void **)HstPtrBase;
269 UpdateRef = true; // subsequently update ref count of pointee
270 }
271
272 void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
George Rokosa0da2462018-07-19 13:41:03 +0000273 data_size, IsNew, IsImplicit, UpdateRef);
274 if (!TgtPtrBegin && data_size) {
275 // If data_size==0, then the argument could be a zero-length pointer to
276 // NULL, so getOrAlloc() returning NULL is not an error.
George Rokos2467df62017-01-25 21:27:24 +0000277 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
278 "illegal mapping).\n");
279 }
280 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000281 " - is%s new\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000282 (IsNew ? "" : " not"));
283
284 if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
George Rokosa0da2462018-07-19 13:41:03 +0000285 uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
286 void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
287 DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
288 args_base[i] = TgtPtrBase;
George Rokos2467df62017-01-25 21:27:24 +0000289 }
290
291 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
292 bool copy = false;
293 if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
294 copy = true;
295 } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
296 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000297 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000298 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
299 assert(parent_rc > 0 && "parent struct not found");
300 if (parent_rc == 1) {
301 copy = true;
302 }
303 }
304
305 if (copy) {
306 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000307 data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
308 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000309 if (rt != OFFLOAD_SUCCESS) {
310 DP("Copying data to device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000311 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000312 }
313 }
314 }
315
316 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
317 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
318 DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
319 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
320 void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
321 int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase,
322 sizeof(void *));
323 if (rt != OFFLOAD_SUCCESS) {
324 DP("Copying data to device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000325 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000326 }
327 // create shadow pointers for this entry
328 Device.ShadowMtx.lock();
329 Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase,
330 Pointer_TgtPtrBegin, TgtPtrBase};
331 Device.ShadowMtx.unlock();
332 }
333 }
334
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000335 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000336}
337
George Rokos2467df62017-01-25 21:27:24 +0000338/// Internal function to undo the mapping and retrieve the data from the device.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000339int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
George Rokos2467df62017-01-25 21:27:24 +0000340 void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000341 // process each input.
342 for (int32_t i = arg_num - 1; i >= 0; --i) {
343 // Ignore private variables and arrays - there is no mapping for them.
344 // Also, ignore the use_device_ptr directive, it has no effect here.
345 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
346 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
347 continue;
348
349 void *HstPtrBegin = args[i];
George Rokosa0da2462018-07-19 13:41:03 +0000350 int64_t data_size = arg_sizes[i];
351 // Adjust for proper alignment if this is a combined entry (for structs).
352 // Look at the next argument - if that is MEMBER_OF this one, then this one
353 // is a combined entry.
354 int64_t padding = 0;
355 const int next_i = i+1;
356 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
357 member_of(arg_types[next_i]) == i) {
358 padding = (int64_t)HstPtrBegin % alignment;
359 if (padding) {
360 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
361 "\n", padding, DPxPTR(HstPtrBegin));
362 HstPtrBegin = (char *) HstPtrBegin - padding;
363 data_size += padding;
364 }
365 }
366
George Rokos2467df62017-01-25 21:27:24 +0000367 bool IsLast;
368 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
369 (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
370 bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
371
372 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
George Rokosa0da2462018-07-19 13:41:03 +0000373 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
George Rokos2467df62017-01-25 21:27:24 +0000374 UpdateRef);
375 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000376 " - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000377 (IsLast ? "" : " not"));
378
George Rokos15a6e7d2017-02-15 20:45:37 +0000379 bool DelEntry = IsLast || ForceDelete;
380
George Rokos2467df62017-01-25 21:27:24 +0000381 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
382 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
George Rokos15a6e7d2017-02-15 20:45:37 +0000383 DelEntry = false; // protect parent struct from being deallocated
George Rokos2467df62017-01-25 21:27:24 +0000384 }
385
George Rokos2467df62017-01-25 21:27:24 +0000386 if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
387 // Move data back to the host
388 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
389 bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
390 bool CopyMember = false;
391 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
392 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
393 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000394 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000395 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
396 assert(parent_rc > 0 && "parent struct not found");
397 if (parent_rc == 1) {
398 CopyMember = true;
399 }
400 }
401
402 if (DelEntry || Always || CopyMember) {
403 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000404 data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
405 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000406 if (rt != OFFLOAD_SUCCESS) {
407 DP("Copying data from device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000408 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000409 }
410 }
411 }
412
413 // If we copied back to the host a struct/array containing pointers, we
414 // need to restore the original host pointer values from their shadow
415 // copies. If the struct is going to be deallocated, remove any remaining
416 // shadow pointer entries for this struct.
417 uintptr_t lb = (uintptr_t) HstPtrBegin;
George Rokosa0da2462018-07-19 13:41:03 +0000418 uintptr_t ub = (uintptr_t) HstPtrBegin + data_size;
George Rokos2467df62017-01-25 21:27:24 +0000419 Device.ShadowMtx.lock();
420 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
Jonas Hahnfeldf7f86972018-09-04 15:13:23 +0000421 it != Device.ShadowPtrMap.end();) {
George Rokos2467df62017-01-25 21:27:24 +0000422 void **ShadowHstPtrAddr = (void**) it->first;
423
424 // An STL map is sorted on its keys; use this property
425 // to quickly determine when to break out of the loop.
Alexey Bataev06b6e0f2018-09-11 17:16:26 +0000426 if ((uintptr_t) ShadowHstPtrAddr < lb) {
427 ++it;
George Rokos2467df62017-01-25 21:27:24 +0000428 continue;
Alexey Bataev06b6e0f2018-09-11 17:16:26 +0000429 }
George Rokos2467df62017-01-25 21:27:24 +0000430 if ((uintptr_t) ShadowHstPtrAddr >= ub)
431 break;
432
433 // If we copied the struct to the host, we need to restore the pointer.
434 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
435 DP("Restoring original host pointer value " DPxMOD " for host "
436 "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
437 DPxPTR(ShadowHstPtrAddr));
438 *ShadowHstPtrAddr = it->second.HstPtrVal;
439 }
440 // If the struct is to be deallocated, remove the shadow entry.
441 if (DelEntry) {
442 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
Jonas Hahnfeldf7f86972018-09-04 15:13:23 +0000443 it = Device.ShadowPtrMap.erase(it);
444 } else {
445 ++it;
George Rokos2467df62017-01-25 21:27:24 +0000446 }
447 }
448 Device.ShadowMtx.unlock();
449
450 // Deallocate map
451 if (DelEntry) {
George Rokosa0da2462018-07-19 13:41:03 +0000452 int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete);
George Rokos2467df62017-01-25 21:27:24 +0000453 if (rt != OFFLOAD_SUCCESS) {
454 DP("Deallocating data from device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000455 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000456 }
457 }
458 }
459 }
460
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000461 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000462}
463
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000464/// Internal function to pass data to/from the target.
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000465int target_data_update(DeviceTy &Device, int32_t arg_num,
George Rokosb92dbb42017-11-21 18:26:41 +0000466 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000467 // process each input.
468 for (int32_t i = 0; i < arg_num; ++i) {
469 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
470 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
471 continue;
472
473 void *HstPtrBegin = args[i];
474 int64_t MapSize = arg_sizes[i];
475 bool IsLast;
476 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
477 false);
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000478 if (!TgtPtrBegin) {
479 DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
480 continue;
481 }
George Rokos2467df62017-01-25 21:27:24 +0000482
483 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
484 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
485 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000486 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize);
487 if (rt != OFFLOAD_SUCCESS) {
488 DP("Copying data from device failed.\n");
489 return OFFLOAD_FAIL;
490 }
George Rokos2467df62017-01-25 21:27:24 +0000491
492 uintptr_t lb = (uintptr_t) HstPtrBegin;
493 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
494 Device.ShadowMtx.lock();
495 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
496 it != Device.ShadowPtrMap.end(); ++it) {
497 void **ShadowHstPtrAddr = (void**) it->first;
498 if ((uintptr_t) ShadowHstPtrAddr < lb)
499 continue;
500 if ((uintptr_t) ShadowHstPtrAddr >= ub)
501 break;
502 DP("Restoring original host pointer value " DPxMOD " for host pointer "
503 DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
504 DPxPTR(ShadowHstPtrAddr));
505 *ShadowHstPtrAddr = it->second.HstPtrVal;
506 }
507 Device.ShadowMtx.unlock();
508 }
509
510 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
511 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
512 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000513 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize);
514 if (rt != OFFLOAD_SUCCESS) {
515 DP("Copying data to device failed.\n");
516 return OFFLOAD_FAIL;
517 }
George Rokos2467df62017-01-25 21:27:24 +0000518 uintptr_t lb = (uintptr_t) HstPtrBegin;
519 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
520 Device.ShadowMtx.lock();
521 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
522 it != Device.ShadowPtrMap.end(); ++it) {
523 void **ShadowHstPtrAddr = (void**) it->first;
524 if ((uintptr_t) ShadowHstPtrAddr < lb)
525 continue;
526 if ((uintptr_t) ShadowHstPtrAddr >= ub)
527 break;
528 DP("Restoring original target pointer value " DPxMOD " for target "
529 "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal),
530 DPxPTR(it->second.TgtPtrAddr));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000531 rt = Device.data_submit(it->second.TgtPtrAddr,
George Rokos2467df62017-01-25 21:27:24 +0000532 &it->second.TgtPtrVal, sizeof(void *));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000533 if (rt != OFFLOAD_SUCCESS) {
534 DP("Copying data to device failed.\n");
535 Device.ShadowMtx.unlock();
536 return OFFLOAD_FAIL;
537 }
George Rokos2467df62017-01-25 21:27:24 +0000538 }
539 Device.ShadowMtx.unlock();
540 }
541 }
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000542 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000543}
544
Alexey Bataeve5369882018-10-30 15:42:12 +0000545static const unsigned LambdaMapping = OMP_TGT_MAPTYPE_PTR_AND_OBJ |
Alexey Bataev9476ca72018-11-02 15:24:47 +0000546 OMP_TGT_MAPTYPE_LITERAL |
Alexey Bataeve5369882018-10-30 15:42:12 +0000547 OMP_TGT_MAPTYPE_IMPLICIT;
548static bool isLambdaMapping(int64_t Mapping) {
549 return (Mapping & LambdaMapping) == LambdaMapping;
550}
551
George Rokos2467df62017-01-25 21:27:24 +0000552/// performs the same actions as data_begin in case arg_num is
553/// non-zero and initiates run of the offloaded region on the target platform;
554/// if arg_num is non-zero after the region execution is done it also
555/// performs the same action as data_update and data_end above. This function
556/// returns 0 if it was able to transfer the execution to a target and an
557/// integer different from zero otherwise.
Jonas Hahnfeld43322802017-12-06 21:59:07 +0000558int target(int64_t device_id, void *host_ptr, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000559 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
560 int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
561 DeviceTy &Device = Devices[device_id];
562
563 // Find the table information in the map or look it up in the translation
564 // tables.
565 TableMap *TM = 0;
566 TblMapMtx.lock();
567 HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr);
568 if (TableMapIt == HostPtrToTableMap.end()) {
569 // We don't have a map. So search all the registered libraries.
570 TrlTblMtx.lock();
571 for (HostEntriesBeginToTransTableTy::iterator
572 ii = HostEntriesBeginToTransTable.begin(),
573 ie = HostEntriesBeginToTransTable.end();
574 !TM && ii != ie; ++ii) {
575 // get the translation table (which contains all the good info).
576 TranslationTable *TransTable = &ii->second;
577 // iterate over all the host table entries to see if we can locate the
578 // host_ptr.
579 __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin;
580 __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd;
581 __tgt_offload_entry *cur = begin;
582 for (uint32_t i = 0; cur < end; ++cur, ++i) {
583 if (cur->addr != host_ptr)
584 continue;
585 // we got a match, now fill the HostPtrToTableMap so that we
586 // may avoid this search next time.
587 TM = &HostPtrToTableMap[host_ptr];
588 TM->Table = TransTable;
589 TM->Index = i;
590 break;
591 }
592 }
593 TrlTblMtx.unlock();
594 } else {
595 TM = &TableMapIt->second;
596 }
597 TblMapMtx.unlock();
598
599 // No map for this host pointer found!
600 if (!TM) {
601 DP("Host ptr " DPxMOD " does not have a matching target pointer.\n",
602 DPxPTR(host_ptr));
603 return OFFLOAD_FAIL;
604 }
605
606 // get target table.
607 TrlTblMtx.lock();
608 assert(TM->Table->TargetsTable.size() > (size_t)device_id &&
609 "Not expecting a device ID outside the table's bounds!");
610 __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id];
611 TrlTblMtx.unlock();
612 assert(TargetTable && "Global data has not been mapped\n");
613
614 // Move data to device.
615 int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
616 arg_types);
George Rokos2467df62017-01-25 21:27:24 +0000617 if (rc != OFFLOAD_SUCCESS) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000618 DP("Call to target_data_begin failed, abort target.\n");
George Rokos2467df62017-01-25 21:27:24 +0000619 return OFFLOAD_FAIL;
620 }
621
622 std::vector<void *> tgt_args;
George Rokos1546d312017-05-10 14:12:36 +0000623 std::vector<ptrdiff_t> tgt_offsets;
George Rokos2467df62017-01-25 21:27:24 +0000624
625 // List of (first-)private arrays allocated for this target region
626 std::vector<void *> fpArrays;
Alexey Bataeve5369882018-10-30 15:42:12 +0000627 std::vector<int> tgtArgsPositions(arg_num, -1);
George Rokos2467df62017-01-25 21:27:24 +0000628
629 for (int32_t i = 0; i < arg_num; ++i) {
630 if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
631 // This is not a target parameter, do not push it into tgt_args.
Alexey Bataeve5369882018-10-30 15:42:12 +0000632 // Check for lambda mapping.
633 if (isLambdaMapping(arg_types[i])) {
634 assert((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
635 "PTR_AND_OBJ must be also MEMBER_OF.");
636 unsigned idx = member_of(arg_types[i]);
637 int tgtIdx = tgtArgsPositions[idx];
638 assert(tgtIdx != -1 && "Base address must be translated already.");
639 // The parent lambda must be processed already and it must be the last
640 // in tgt_args and tgt_offsets arrays.
Alexey Bataev15ab8912018-11-08 15:47:30 +0000641 void *HstPtrVal = args[i];
642 void *HstPtrBegin = args_base[i];
643 void *HstPtrBase = args[idx];
Alexey Bataeve5369882018-10-30 15:42:12 +0000644 bool IsLast; // unused.
645 void *TgtPtrBase =
646 (void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]);
647 DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
648 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
649 void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
Alexey Bataev15ab8912018-11-08 15:47:30 +0000650 void *Pointer_TgtPtrBegin =
651 Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false);
Alexey Bataeve5369882018-10-30 15:42:12 +0000652 if (!Pointer_TgtPtrBegin) {
653 DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
Alexey Bataev15ab8912018-11-08 15:47:30 +0000654 DPxPTR(HstPtrVal));
Alexey Bataeve5369882018-10-30 15:42:12 +0000655 continue;
656 }
657 DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
658 DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
659 int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin,
660 sizeof(void *));
661 if (rt != OFFLOAD_SUCCESS) {
662 DP("Copying data to device failed.\n");
663 return OFFLOAD_FAIL;
664 }
665 }
George Rokos2467df62017-01-25 21:27:24 +0000666 continue;
667 }
668 void *HstPtrBegin = args[i];
669 void *HstPtrBase = args_base[i];
George Rokos1546d312017-05-10 14:12:36 +0000670 void *TgtPtrBegin;
671 ptrdiff_t TgtBaseOffset;
George Rokos2467df62017-01-25 21:27:24 +0000672 bool IsLast; // unused.
673 if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
674 DP("Forwarding first-private value " DPxMOD " to the target construct\n",
675 DPxPTR(HstPtrBase));
George Rokos1546d312017-05-10 14:12:36 +0000676 TgtPtrBegin = HstPtrBase;
677 TgtBaseOffset = 0;
George Rokos2467df62017-01-25 21:27:24 +0000678 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
679 // Allocate memory for (first-)private array
George Rokos1546d312017-05-10 14:12:36 +0000680 TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
681 arg_sizes[i], HstPtrBegin);
George Rokos2467df62017-01-25 21:27:24 +0000682 if (!TgtPtrBegin) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000683 DP ("Data allocation for %sprivate array " DPxMOD " failed, "
684 "abort target.\n",
George Rokos2467df62017-01-25 21:27:24 +0000685 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
686 DPxPTR(HstPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000687 return OFFLOAD_FAIL;
688 }
689 fpArrays.push_back(TgtPtrBegin);
690 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000691#ifdef OMPTARGET_DEBUG
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000692 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
693 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
694 "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
695 arg_sizes[i], DPxPTR(TgtPtrBegin),
696 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
697 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000698#endif
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000699 // If first-private, copy data from host
700 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
701 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
702 if (rt != OFFLOAD_SUCCESS) {
703 DP ("Copying data to device failed, failed.\n");
704 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000705 }
706 }
707 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
George Rokos1546d312017-05-10 14:12:36 +0000708 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
709 false);
710 TgtBaseOffset = 0; // no offset for ptrs.
George Rokos2467df62017-01-25 21:27:24 +0000711 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
712 "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
713 DPxPTR(HstPtrBase));
714 } else {
George Rokos1546d312017-05-10 14:12:36 +0000715 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
716 false);
717 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000718#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000719 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000720 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
721 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000722#endif
George Rokos2467df62017-01-25 21:27:24 +0000723 }
Alexey Bataeve5369882018-10-30 15:42:12 +0000724 tgtArgsPositions[i] = tgt_args.size();
George Rokos1546d312017-05-10 14:12:36 +0000725 tgt_args.push_back(TgtPtrBegin);
726 tgt_offsets.push_back(TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000727 }
George Rokos1546d312017-05-10 14:12:36 +0000728
729 assert(tgt_args.size() == tgt_offsets.size() &&
730 "Size mismatch in arguments and offsets");
George Rokos2467df62017-01-25 21:27:24 +0000731
732 // Pop loop trip count
733 uint64_t ltc = Device.loopTripCnt;
734 Device.loopTripCnt = 0;
735
736 // Launch device execution.
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000737 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
738 TargetTable->EntriesBegin[TM->Index].name,
739 DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
740 if (IsTeamConstruct) {
741 rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
742 &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
743 thread_limit, ltc);
George Rokos2467df62017-01-25 21:27:24 +0000744 } else {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000745 rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
746 &tgt_args[0], &tgt_offsets[0], tgt_args.size());
747 }
748 if (rc != OFFLOAD_SUCCESS) {
749 DP ("Executing target region abort target.\n");
750 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000751 }
752
753 // Deallocate (first-)private arrays
754 for (auto it : fpArrays) {
755 int rt = Device.RTL->data_delete(Device.RTLDeviceID, it);
756 if (rt != OFFLOAD_SUCCESS) {
757 DP("Deallocation of (first-)private arrays failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000758 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000759 }
760 }
761
762 // Move data from device.
763 int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
764 arg_types);
George Rokos2467df62017-01-25 21:27:24 +0000765 if (rt != OFFLOAD_SUCCESS) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000766 DP("Call to target_data_end failed, abort targe.\n");
767 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000768 }
769
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000770 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000771}