blob: 14f45312030911c2667b8653060bcecd7d8344bf [file] [log] [blame]
George Rokos2467df62017-01-25 21:27:24 +00001//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the interface to be used by Clang during the codegen of a
11// target region.
12//
13//===----------------------------------------------------------------------===//
14
Jonas Hahnfeld43322802017-12-06 21:59:07 +000015#include <omptarget.h>
16
17#include "device.h"
18#include "private.h"
19#include "rtl.h"
20
George Rokos2467df62017-01-25 21:27:24 +000021#include <cassert>
George Rokos2467df62017-01-25 21:27:24 +000022#include <vector>
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +000023#include <mutex>
George Rokos2467df62017-01-25 21:27:24 +000024
Sergey Dmitrievb305d262017-08-14 15:09:59 +000025#ifdef OMPTARGET_DEBUG
Jonas Hahnfeld43322802017-12-06 21:59:07 +000026int DebugLevel = 0;
Sergey Dmitrievb305d262017-08-14 15:09:59 +000027#endif // OMPTARGET_DEBUG
28
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +000029
30
George Rokosa0da2462018-07-19 13:41:03 +000031/* All begin addresses for partially mapped structs must be 8-aligned in order
32 * to ensure proper alignment of members. E.g.
33 *
34 * struct S {
35 * int a; // 4-aligned
36 * int b; // 4-aligned
37 * int *p; // 8-aligned
38 * } s1;
39 * ...
40 * #pragma omp target map(tofrom: s1.b, s1.p[0:N])
41 * {
42 * s1.b = 5;
43 * for (int i...) s1.p[i] = ...;
44 * }
45 *
46 * Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
47 * BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
48 * then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
49 * requirements for its type. Now, when we allocate memory on the device, in
50 * CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
51 * This means that the chunk of the struct on the device will start at a
52 * 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
53 * address of p will be a misaligned 0x204 (on the host there was no need to add
54 * padding between b and p, so p comes exactly 4 bytes after b). If the device
55 * kernel tries to access s1.p, a misaligned address error occurs (as reported
56 * by the CUDA plugin). By padding the begin address down to a multiple of 8 and
57 * extending the size of the allocated chuck accordingly, the chuck on the
58 * device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
59 * &s1.p=0x208, as they should be to satisfy the alignment requirements.
60 */
61static const int64_t alignment = 8;
62
George Rokos2467df62017-01-25 21:27:24 +000063/// Map global data and execute pending ctors
64static int InitLibrary(DeviceTy& Device) {
65 /*
66 * Map global data
67 */
68 int32_t device_id = Device.DeviceID;
69 int rc = OFFLOAD_SUCCESS;
70
71 Device.PendingGlobalsMtx.lock();
72 TrlTblMtx.lock();
73 for (HostEntriesBeginToTransTableTy::iterator
74 ii = HostEntriesBeginToTransTable.begin();
75 ii != HostEntriesBeginToTransTable.end(); ++ii) {
76 TranslationTable *TransTable = &ii->second;
77 if (TransTable->TargetsTable[device_id] != 0) {
78 // Library entries have already been processed
79 continue;
80 }
81
82 // 1) get image.
83 assert(TransTable->TargetsImages.size() > (size_t)device_id &&
84 "Not expecting a device ID outside the table's bounds!");
85 __tgt_device_image *img = TransTable->TargetsImages[device_id];
86 if (!img) {
87 DP("No image loaded for device id %d.\n", device_id);
88 rc = OFFLOAD_FAIL;
89 break;
90 }
91 // 2) load image into the target table.
92 __tgt_target_table *TargetTable =
93 TransTable->TargetsTable[device_id] = Device.load_binary(img);
94 // Unable to get table for this image: invalidate image and fail.
95 if (!TargetTable) {
96 DP("Unable to generate entries table for device id %d.\n", device_id);
97 TransTable->TargetsImages[device_id] = 0;
98 rc = OFFLOAD_FAIL;
99 break;
100 }
101
102 // Verify whether the two table sizes match.
103 size_t hsize =
104 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
105 size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
106
107 // Invalid image for these host entries!
108 if (hsize != tsize) {
109 DP("Host and Target tables mismatch for device id %d [%zx != %zx].\n",
110 device_id, hsize, tsize);
111 TransTable->TargetsImages[device_id] = 0;
112 TransTable->TargetsTable[device_id] = 0;
113 rc = OFFLOAD_FAIL;
114 break;
115 }
116
117 // process global data that needs to be mapped.
George Rokosd57681b2017-04-22 11:45:03 +0000118 Device.DataMapMtx.lock();
George Rokos2467df62017-01-25 21:27:24 +0000119 __tgt_target_table *HostTable = &TransTable->HostTable;
120 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
121 *CurrHostEntry = HostTable->EntriesBegin,
122 *EntryDeviceEnd = TargetTable->EntriesEnd;
123 CurrDeviceEntry != EntryDeviceEnd;
124 CurrDeviceEntry++, CurrHostEntry++) {
125 if (CurrDeviceEntry->size != 0) {
126 // has data.
127 assert(CurrDeviceEntry->size == CurrHostEntry->size &&
128 "data size mismatch");
George Rokosba7380b2017-03-22 16:43:40 +0000129
130 // Fortran may use multiple weak declarations for the same symbol,
131 // therefore we must allow for multiple weak symbols to be loaded from
132 // the fat binary. Treat these mappings as any other "regular" mapping.
133 // Add entry to map.
George Rokosd57681b2017-04-22 11:45:03 +0000134 if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size))
135 continue;
George Rokos2467df62017-01-25 21:27:24 +0000136 DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
137 "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
138 CurrDeviceEntry->size);
George Rokosd57681b2017-04-22 11:45:03 +0000139 Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
140 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
141 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
142 (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
143 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
144 INF_REF_CNT /*RefCount*/));
George Rokos2467df62017-01-25 21:27:24 +0000145 }
146 }
George Rokosd57681b2017-04-22 11:45:03 +0000147 Device.DataMapMtx.unlock();
George Rokos2467df62017-01-25 21:27:24 +0000148 }
149 TrlTblMtx.unlock();
150
151 if (rc != OFFLOAD_SUCCESS) {
152 Device.PendingGlobalsMtx.unlock();
153 return rc;
154 }
155
156 /*
157 * Run ctors for static objects
158 */
159 if (!Device.PendingCtorsDtors.empty()) {
160 // Call all ctors for all libraries registered so far
161 for (auto &lib : Device.PendingCtorsDtors) {
162 if (!lib.second.PendingCtors.empty()) {
163 DP("Has pending ctors... call now\n");
164 for (auto &entry : lib.second.PendingCtors) {
165 void *ctor = entry;
166 int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
167 NULL, 1, 1, true /*team*/);
168 if (rc != OFFLOAD_SUCCESS) {
169 DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
170 Device.PendingGlobalsMtx.unlock();
171 return OFFLOAD_FAIL;
172 }
173 }
174 // Clear the list to indicate that this device has been used
175 lib.second.PendingCtors.clear();
176 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first));
177 }
178 }
179 }
180 Device.HasPendingGlobals = false;
181 Device.PendingGlobalsMtx.unlock();
182
183 return OFFLOAD_SUCCESS;
184}
185
186// Check whether a device has been initialized, global ctors have been
187// executed and global data has been mapped; do so if not already done.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000188int CheckDeviceAndCtors(int64_t device_id) {
George Rokos2467df62017-01-25 21:27:24 +0000189 // Is device ready?
190 if (!device_is_ready(device_id)) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000191 DP("Device %" PRId64 " is not ready.\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000192 return OFFLOAD_FAIL;
193 }
194
195 // Get device info.
196 DeviceTy &Device = Devices[device_id];
197
198 // Check whether global data has been mapped for this device
199 Device.PendingGlobalsMtx.lock();
200 bool hasPendingGlobals = Device.HasPendingGlobals;
201 Device.PendingGlobalsMtx.unlock();
202 if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000203 DP("Failed to init globals on device %" PRId64 "\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000204 return OFFLOAD_FAIL;
205 }
206
207 return OFFLOAD_SUCCESS;
208}
209
George Rokosa0da2462018-07-19 13:41:03 +0000210static int32_t member_of(int64_t type) {
George Rokos2467df62017-01-25 21:27:24 +0000211 return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
212}
213
214/// Internal function to do the mapping and transfer the data to the device
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000215int target_data_begin(DeviceTy &Device, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000216 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
217 // process each input.
George Rokos2467df62017-01-25 21:27:24 +0000218 for (int32_t i = 0; i < arg_num; ++i) {
219 // Ignore private variables and arrays - there is no mapping for them.
220 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
221 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
222 continue;
223
224 void *HstPtrBegin = args[i];
225 void *HstPtrBase = args_base[i];
George Rokosa0da2462018-07-19 13:41:03 +0000226 int64_t data_size = arg_sizes[i];
227
228 // Adjust for proper alignment if this is a combined entry (for structs).
229 // Look at the next argument - if that is MEMBER_OF this one, then this one
230 // is a combined entry.
231 int64_t padding = 0;
232 const int next_i = i+1;
233 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
234 member_of(arg_types[next_i]) == i) {
235 padding = (int64_t)HstPtrBegin % alignment;
236 if (padding) {
237 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
238 "\n", padding, DPxPTR(HstPtrBegin));
239 HstPtrBegin = (char *) HstPtrBegin - padding;
240 data_size += padding;
241 }
242 }
243
George Rokos2467df62017-01-25 21:27:24 +0000244 // Address of pointer on the host and device, respectively.
245 void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
246 bool IsNew, Pointer_IsNew;
247 bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
George Rokosa0da2462018-07-19 13:41:03 +0000248 // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
249 // have reached this point via __tgt_target_data_begin and not __tgt_target
250 // then no argument is marked as TARGET_PARAM ("omp target data map" is not
251 // associated with a target region, so there are no target parameters). This
252 // may be considered a hack, we could revise the scheme in the future.
George Rokos2467df62017-01-25 21:27:24 +0000253 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
254 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
255 DP("Has a pointer entry: \n");
256 // base is address of pointer.
257 Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
258 sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
259 if (!Pointer_TgtPtrBegin) {
260 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
261 "illegal mapping).\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000262 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000263 }
264 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
265 "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
266 (Pointer_IsNew ? "" : " not"));
267 Pointer_HstPtrBegin = HstPtrBase;
268 // modify current entry.
269 HstPtrBase = *(void **)HstPtrBase;
270 UpdateRef = true; // subsequently update ref count of pointee
271 }
272
273 void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
George Rokosa0da2462018-07-19 13:41:03 +0000274 data_size, IsNew, IsImplicit, UpdateRef);
275 if (!TgtPtrBegin && data_size) {
276 // If data_size==0, then the argument could be a zero-length pointer to
277 // NULL, so getOrAlloc() returning NULL is not an error.
George Rokos2467df62017-01-25 21:27:24 +0000278 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
279 "illegal mapping).\n");
280 }
281 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000282 " - is%s new\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000283 (IsNew ? "" : " not"));
284
285 if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
George Rokosa0da2462018-07-19 13:41:03 +0000286 uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
287 void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
288 DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
289 args_base[i] = TgtPtrBase;
George Rokos2467df62017-01-25 21:27:24 +0000290 }
291
292 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
293 bool copy = false;
294 if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
295 copy = true;
296 } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
297 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000298 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000299 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
300 assert(parent_rc > 0 && "parent struct not found");
301 if (parent_rc == 1) {
302 copy = true;
303 }
304 }
305
306 if (copy) {
307 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000308 data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
309 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000310 if (rt != OFFLOAD_SUCCESS) {
311 DP("Copying data to device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000312 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000313 }
314 }
315 }
316
317 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
318 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
319 DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
320 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
321 void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
322 int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase,
323 sizeof(void *));
324 if (rt != OFFLOAD_SUCCESS) {
325 DP("Copying data to device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000326 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000327 }
328 // create shadow pointers for this entry
329 Device.ShadowMtx.lock();
330 Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase,
331 Pointer_TgtPtrBegin, TgtPtrBase};
332 Device.ShadowMtx.unlock();
333 }
334 }
335
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000336 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000337}
338
George Rokos2467df62017-01-25 21:27:24 +0000339/// Internal function to undo the mapping and retrieve the data from the device.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000340int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
George Rokos2467df62017-01-25 21:27:24 +0000341 void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000342 // process each input.
343 for (int32_t i = arg_num - 1; i >= 0; --i) {
344 // Ignore private variables and arrays - there is no mapping for them.
345 // Also, ignore the use_device_ptr directive, it has no effect here.
346 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
347 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
348 continue;
349
350 void *HstPtrBegin = args[i];
George Rokosa0da2462018-07-19 13:41:03 +0000351 int64_t data_size = arg_sizes[i];
352 // Adjust for proper alignment if this is a combined entry (for structs).
353 // Look at the next argument - if that is MEMBER_OF this one, then this one
354 // is a combined entry.
355 int64_t padding = 0;
356 const int next_i = i+1;
357 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
358 member_of(arg_types[next_i]) == i) {
359 padding = (int64_t)HstPtrBegin % alignment;
360 if (padding) {
361 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
362 "\n", padding, DPxPTR(HstPtrBegin));
363 HstPtrBegin = (char *) HstPtrBegin - padding;
364 data_size += padding;
365 }
366 }
367
George Rokos2467df62017-01-25 21:27:24 +0000368 bool IsLast;
369 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
370 (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
371 bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
372
373 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
George Rokosa0da2462018-07-19 13:41:03 +0000374 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
George Rokos2467df62017-01-25 21:27:24 +0000375 UpdateRef);
376 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000377 " - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000378 (IsLast ? "" : " not"));
379
George Rokos15a6e7d2017-02-15 20:45:37 +0000380 bool DelEntry = IsLast || ForceDelete;
381
George Rokos2467df62017-01-25 21:27:24 +0000382 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
383 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
George Rokos15a6e7d2017-02-15 20:45:37 +0000384 DelEntry = false; // protect parent struct from being deallocated
George Rokos2467df62017-01-25 21:27:24 +0000385 }
386
George Rokos2467df62017-01-25 21:27:24 +0000387 if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
388 // Move data back to the host
389 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
390 bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
391 bool CopyMember = false;
392 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
393 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
394 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000395 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000396 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
397 assert(parent_rc > 0 && "parent struct not found");
398 if (parent_rc == 1) {
399 CopyMember = true;
400 }
401 }
402
403 if (DelEntry || Always || CopyMember) {
404 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000405 data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
406 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000407 if (rt != OFFLOAD_SUCCESS) {
408 DP("Copying data from device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000409 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000410 }
411 }
412 }
413
414 // If we copied back to the host a struct/array containing pointers, we
415 // need to restore the original host pointer values from their shadow
416 // copies. If the struct is going to be deallocated, remove any remaining
417 // shadow pointer entries for this struct.
418 uintptr_t lb = (uintptr_t) HstPtrBegin;
George Rokosa0da2462018-07-19 13:41:03 +0000419 uintptr_t ub = (uintptr_t) HstPtrBegin + data_size;
George Rokos2467df62017-01-25 21:27:24 +0000420 Device.ShadowMtx.lock();
421 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
422 it != Device.ShadowPtrMap.end(); ++it) {
423 void **ShadowHstPtrAddr = (void**) it->first;
424
425 // An STL map is sorted on its keys; use this property
426 // to quickly determine when to break out of the loop.
427 if ((uintptr_t) ShadowHstPtrAddr < lb)
428 continue;
429 if ((uintptr_t) ShadowHstPtrAddr >= ub)
430 break;
431
432 // If we copied the struct to the host, we need to restore the pointer.
433 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
434 DP("Restoring original host pointer value " DPxMOD " for host "
435 "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
436 DPxPTR(ShadowHstPtrAddr));
437 *ShadowHstPtrAddr = it->second.HstPtrVal;
438 }
439 // If the struct is to be deallocated, remove the shadow entry.
440 if (DelEntry) {
441 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
442 Device.ShadowPtrMap.erase(it);
443 }
444 }
445 Device.ShadowMtx.unlock();
446
447 // Deallocate map
448 if (DelEntry) {
George Rokosa0da2462018-07-19 13:41:03 +0000449 int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete);
George Rokos2467df62017-01-25 21:27:24 +0000450 if (rt != OFFLOAD_SUCCESS) {
451 DP("Deallocating data from device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000452 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000453 }
454 }
455 }
456 }
457
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000458 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000459}
460
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000461/// Internal function to pass data to/from the target.
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000462int target_data_update(DeviceTy &Device, int32_t arg_num,
George Rokosb92dbb42017-11-21 18:26:41 +0000463 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000464 // process each input.
465 for (int32_t i = 0; i < arg_num; ++i) {
466 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
467 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
468 continue;
469
470 void *HstPtrBegin = args[i];
471 int64_t MapSize = arg_sizes[i];
472 bool IsLast;
473 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
474 false);
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000475 if (!TgtPtrBegin) {
476 DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
477 continue;
478 }
George Rokos2467df62017-01-25 21:27:24 +0000479
480 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
481 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
482 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000483 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize);
484 if (rt != OFFLOAD_SUCCESS) {
485 DP("Copying data from device failed.\n");
486 return OFFLOAD_FAIL;
487 }
George Rokos2467df62017-01-25 21:27:24 +0000488
489 uintptr_t lb = (uintptr_t) HstPtrBegin;
490 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
491 Device.ShadowMtx.lock();
492 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
493 it != Device.ShadowPtrMap.end(); ++it) {
494 void **ShadowHstPtrAddr = (void**) it->first;
495 if ((uintptr_t) ShadowHstPtrAddr < lb)
496 continue;
497 if ((uintptr_t) ShadowHstPtrAddr >= ub)
498 break;
499 DP("Restoring original host pointer value " DPxMOD " for host pointer "
500 DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
501 DPxPTR(ShadowHstPtrAddr));
502 *ShadowHstPtrAddr = it->second.HstPtrVal;
503 }
504 Device.ShadowMtx.unlock();
505 }
506
507 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
508 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
509 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000510 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize);
511 if (rt != OFFLOAD_SUCCESS) {
512 DP("Copying data to device failed.\n");
513 return OFFLOAD_FAIL;
514 }
George Rokos2467df62017-01-25 21:27:24 +0000515 uintptr_t lb = (uintptr_t) HstPtrBegin;
516 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
517 Device.ShadowMtx.lock();
518 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
519 it != Device.ShadowPtrMap.end(); ++it) {
520 void **ShadowHstPtrAddr = (void**) it->first;
521 if ((uintptr_t) ShadowHstPtrAddr < lb)
522 continue;
523 if ((uintptr_t) ShadowHstPtrAddr >= ub)
524 break;
525 DP("Restoring original target pointer value " DPxMOD " for target "
526 "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal),
527 DPxPTR(it->second.TgtPtrAddr));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000528 rt = Device.data_submit(it->second.TgtPtrAddr,
George Rokos2467df62017-01-25 21:27:24 +0000529 &it->second.TgtPtrVal, sizeof(void *));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000530 if (rt != OFFLOAD_SUCCESS) {
531 DP("Copying data to device failed.\n");
532 Device.ShadowMtx.unlock();
533 return OFFLOAD_FAIL;
534 }
George Rokos2467df62017-01-25 21:27:24 +0000535 }
536 Device.ShadowMtx.unlock();
537 }
538 }
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000539 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000540}
541
George Rokos2467df62017-01-25 21:27:24 +0000542/// performs the same actions as data_begin in case arg_num is
543/// non-zero and initiates run of the offloaded region on the target platform;
544/// if arg_num is non-zero after the region execution is done it also
545/// performs the same action as data_update and data_end above. This function
546/// returns 0 if it was able to transfer the execution to a target and an
547/// integer different from zero otherwise.
Jonas Hahnfeld43322802017-12-06 21:59:07 +0000548int target(int64_t device_id, void *host_ptr, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000549 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
550 int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
551 DeviceTy &Device = Devices[device_id];
552
553 // Find the table information in the map or look it up in the translation
554 // tables.
555 TableMap *TM = 0;
556 TblMapMtx.lock();
557 HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr);
558 if (TableMapIt == HostPtrToTableMap.end()) {
559 // We don't have a map. So search all the registered libraries.
560 TrlTblMtx.lock();
561 for (HostEntriesBeginToTransTableTy::iterator
562 ii = HostEntriesBeginToTransTable.begin(),
563 ie = HostEntriesBeginToTransTable.end();
564 !TM && ii != ie; ++ii) {
565 // get the translation table (which contains all the good info).
566 TranslationTable *TransTable = &ii->second;
567 // iterate over all the host table entries to see if we can locate the
568 // host_ptr.
569 __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin;
570 __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd;
571 __tgt_offload_entry *cur = begin;
572 for (uint32_t i = 0; cur < end; ++cur, ++i) {
573 if (cur->addr != host_ptr)
574 continue;
575 // we got a match, now fill the HostPtrToTableMap so that we
576 // may avoid this search next time.
577 TM = &HostPtrToTableMap[host_ptr];
578 TM->Table = TransTable;
579 TM->Index = i;
580 break;
581 }
582 }
583 TrlTblMtx.unlock();
584 } else {
585 TM = &TableMapIt->second;
586 }
587 TblMapMtx.unlock();
588
589 // No map for this host pointer found!
590 if (!TM) {
591 DP("Host ptr " DPxMOD " does not have a matching target pointer.\n",
592 DPxPTR(host_ptr));
593 return OFFLOAD_FAIL;
594 }
595
596 // get target table.
597 TrlTblMtx.lock();
598 assert(TM->Table->TargetsTable.size() > (size_t)device_id &&
599 "Not expecting a device ID outside the table's bounds!");
600 __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id];
601 TrlTblMtx.unlock();
602 assert(TargetTable && "Global data has not been mapped\n");
603
604 // Move data to device.
605 int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
606 arg_types);
George Rokos2467df62017-01-25 21:27:24 +0000607 if (rc != OFFLOAD_SUCCESS) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000608 DP("Call to target_data_begin failed, abort target.\n");
George Rokos2467df62017-01-25 21:27:24 +0000609 return OFFLOAD_FAIL;
610 }
611
612 std::vector<void *> tgt_args;
George Rokos1546d312017-05-10 14:12:36 +0000613 std::vector<ptrdiff_t> tgt_offsets;
George Rokos2467df62017-01-25 21:27:24 +0000614
615 // List of (first-)private arrays allocated for this target region
616 std::vector<void *> fpArrays;
617
618 for (int32_t i = 0; i < arg_num; ++i) {
619 if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
620 // This is not a target parameter, do not push it into tgt_args.
621 continue;
622 }
623 void *HstPtrBegin = args[i];
624 void *HstPtrBase = args_base[i];
George Rokos1546d312017-05-10 14:12:36 +0000625 void *TgtPtrBegin;
626 ptrdiff_t TgtBaseOffset;
George Rokos2467df62017-01-25 21:27:24 +0000627 bool IsLast; // unused.
628 if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
629 DP("Forwarding first-private value " DPxMOD " to the target construct\n",
630 DPxPTR(HstPtrBase));
George Rokos1546d312017-05-10 14:12:36 +0000631 TgtPtrBegin = HstPtrBase;
632 TgtBaseOffset = 0;
George Rokos2467df62017-01-25 21:27:24 +0000633 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
634 // Allocate memory for (first-)private array
George Rokos1546d312017-05-10 14:12:36 +0000635 TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
636 arg_sizes[i], HstPtrBegin);
George Rokos2467df62017-01-25 21:27:24 +0000637 if (!TgtPtrBegin) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000638 DP ("Data allocation for %sprivate array " DPxMOD " failed, "
639 "abort target.\n",
George Rokos2467df62017-01-25 21:27:24 +0000640 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
641 DPxPTR(HstPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000642 return OFFLOAD_FAIL;
643 }
644 fpArrays.push_back(TgtPtrBegin);
645 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000646#ifdef OMPTARGET_DEBUG
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000647 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
648 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
649 "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
650 arg_sizes[i], DPxPTR(TgtPtrBegin),
651 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
652 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000653#endif
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000654 // If first-private, copy data from host
655 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
656 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
657 if (rt != OFFLOAD_SUCCESS) {
658 DP ("Copying data to device failed, failed.\n");
659 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000660 }
661 }
662 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
George Rokos1546d312017-05-10 14:12:36 +0000663 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
664 false);
665 TgtBaseOffset = 0; // no offset for ptrs.
George Rokos2467df62017-01-25 21:27:24 +0000666 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
667 "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
668 DPxPTR(HstPtrBase));
669 } else {
George Rokos1546d312017-05-10 14:12:36 +0000670 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
671 false);
672 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000673#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000674 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000675 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
676 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000677#endif
George Rokos2467df62017-01-25 21:27:24 +0000678 }
George Rokos1546d312017-05-10 14:12:36 +0000679 tgt_args.push_back(TgtPtrBegin);
680 tgt_offsets.push_back(TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000681 }
George Rokos1546d312017-05-10 14:12:36 +0000682
683 assert(tgt_args.size() == tgt_offsets.size() &&
684 "Size mismatch in arguments and offsets");
George Rokos2467df62017-01-25 21:27:24 +0000685
686 // Pop loop trip count
687 uint64_t ltc = Device.loopTripCnt;
688 Device.loopTripCnt = 0;
689
690 // Launch device execution.
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000691 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
692 TargetTable->EntriesBegin[TM->Index].name,
693 DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
694 if (IsTeamConstruct) {
695 rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
696 &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
697 thread_limit, ltc);
George Rokos2467df62017-01-25 21:27:24 +0000698 } else {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000699 rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
700 &tgt_args[0], &tgt_offsets[0], tgt_args.size());
701 }
702 if (rc != OFFLOAD_SUCCESS) {
703 DP ("Executing target region abort target.\n");
704 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000705 }
706
707 // Deallocate (first-)private arrays
708 for (auto it : fpArrays) {
709 int rt = Device.RTL->data_delete(Device.RTLDeviceID, it);
710 if (rt != OFFLOAD_SUCCESS) {
711 DP("Deallocation of (first-)private arrays failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000712 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000713 }
714 }
715
716 // Move data from device.
717 int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
718 arg_types);
George Rokos2467df62017-01-25 21:27:24 +0000719 if (rt != OFFLOAD_SUCCESS) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000720 DP("Call to target_data_end failed, abort targe.\n");
721 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000722 }
723
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000724 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000725}