blob: d522c72abc28f615f04d3e467e1c29370aeba838 [file] [log] [blame]
George Rokos2467df62017-01-25 21:27:24 +00001//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.txt for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the interface to be used by Clang during the codegen of a
11// target region.
12//
13//===----------------------------------------------------------------------===//
14
Jonas Hahnfeld43322802017-12-06 21:59:07 +000015#include <omptarget.h>
16
17#include "device.h"
18#include "private.h"
19#include "rtl.h"
20
George Rokos2467df62017-01-25 21:27:24 +000021#include <cassert>
George Rokos2467df62017-01-25 21:27:24 +000022#include <vector>
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +000023#include <mutex>
George Rokos2467df62017-01-25 21:27:24 +000024
Sergey Dmitrievb305d262017-08-14 15:09:59 +000025#ifdef OMPTARGET_DEBUG
Jonas Hahnfeld43322802017-12-06 21:59:07 +000026int DebugLevel = 0;
Sergey Dmitrievb305d262017-08-14 15:09:59 +000027#endif // OMPTARGET_DEBUG
28
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +000029////////////////////////////////////////////////////////////////////////////////
30/// support for fatal messages
31
32// mutex
33std::mutex LibomptargetPrintMtx;
34
35void FatalMessage(const int errorNum, const char *fmt, ...) {
36 va_list args;
37 va_start(args, fmt);
38 LibomptargetPrintMtx.lock();
39 fprintf(stderr, "Libomptarget error %d:", errorNum);
40 vfprintf(stderr, fmt, args);
41 fprintf(stderr, "\n");
42 LibomptargetPrintMtx.unlock();
43 va_end(args);
44 exit(1);
45}
46
47
George Rokosa0da2462018-07-19 13:41:03 +000048/* All begin addresses for partially mapped structs must be 8-aligned in order
49 * to ensure proper alignment of members. E.g.
50 *
51 * struct S {
52 * int a; // 4-aligned
53 * int b; // 4-aligned
54 * int *p; // 8-aligned
55 * } s1;
56 * ...
57 * #pragma omp target map(tofrom: s1.b, s1.p[0:N])
58 * {
59 * s1.b = 5;
60 * for (int i...) s1.p[i] = ...;
61 * }
62 *
63 * Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
64 * BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
65 * then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
66 * requirements for its type. Now, when we allocate memory on the device, in
67 * CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
68 * This means that the chunk of the struct on the device will start at a
69 * 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
70 * address of p will be a misaligned 0x204 (on the host there was no need to add
71 * padding between b and p, so p comes exactly 4 bytes after b). If the device
72 * kernel tries to access s1.p, a misaligned address error occurs (as reported
73 * by the CUDA plugin). By padding the begin address down to a multiple of 8 and
74 * extending the size of the allocated chuck accordingly, the chuck on the
75 * device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
76 * &s1.p=0x208, as they should be to satisfy the alignment requirements.
77 */
78static const int64_t alignment = 8;
79
George Rokos2467df62017-01-25 21:27:24 +000080/// Map global data and execute pending ctors
81static int InitLibrary(DeviceTy& Device) {
82 /*
83 * Map global data
84 */
85 int32_t device_id = Device.DeviceID;
86 int rc = OFFLOAD_SUCCESS;
87
88 Device.PendingGlobalsMtx.lock();
89 TrlTblMtx.lock();
90 for (HostEntriesBeginToTransTableTy::iterator
91 ii = HostEntriesBeginToTransTable.begin();
92 ii != HostEntriesBeginToTransTable.end(); ++ii) {
93 TranslationTable *TransTable = &ii->second;
94 if (TransTable->TargetsTable[device_id] != 0) {
95 // Library entries have already been processed
96 continue;
97 }
98
99 // 1) get image.
100 assert(TransTable->TargetsImages.size() > (size_t)device_id &&
101 "Not expecting a device ID outside the table's bounds!");
102 __tgt_device_image *img = TransTable->TargetsImages[device_id];
103 if (!img) {
104 DP("No image loaded for device id %d.\n", device_id);
105 rc = OFFLOAD_FAIL;
106 break;
107 }
108 // 2) load image into the target table.
109 __tgt_target_table *TargetTable =
110 TransTable->TargetsTable[device_id] = Device.load_binary(img);
111 // Unable to get table for this image: invalidate image and fail.
112 if (!TargetTable) {
113 DP("Unable to generate entries table for device id %d.\n", device_id);
114 TransTable->TargetsImages[device_id] = 0;
115 rc = OFFLOAD_FAIL;
116 break;
117 }
118
119 // Verify whether the two table sizes match.
120 size_t hsize =
121 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
122 size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
123
124 // Invalid image for these host entries!
125 if (hsize != tsize) {
126 DP("Host and Target tables mismatch for device id %d [%zx != %zx].\n",
127 device_id, hsize, tsize);
128 TransTable->TargetsImages[device_id] = 0;
129 TransTable->TargetsTable[device_id] = 0;
130 rc = OFFLOAD_FAIL;
131 break;
132 }
133
134 // process global data that needs to be mapped.
George Rokosd57681b2017-04-22 11:45:03 +0000135 Device.DataMapMtx.lock();
George Rokos2467df62017-01-25 21:27:24 +0000136 __tgt_target_table *HostTable = &TransTable->HostTable;
137 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
138 *CurrHostEntry = HostTable->EntriesBegin,
139 *EntryDeviceEnd = TargetTable->EntriesEnd;
140 CurrDeviceEntry != EntryDeviceEnd;
141 CurrDeviceEntry++, CurrHostEntry++) {
142 if (CurrDeviceEntry->size != 0) {
143 // has data.
144 assert(CurrDeviceEntry->size == CurrHostEntry->size &&
145 "data size mismatch");
George Rokosba7380b2017-03-22 16:43:40 +0000146
147 // Fortran may use multiple weak declarations for the same symbol,
148 // therefore we must allow for multiple weak symbols to be loaded from
149 // the fat binary. Treat these mappings as any other "regular" mapping.
150 // Add entry to map.
George Rokosd57681b2017-04-22 11:45:03 +0000151 if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size))
152 continue;
George Rokos2467df62017-01-25 21:27:24 +0000153 DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
154 "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
155 CurrDeviceEntry->size);
George Rokosd57681b2017-04-22 11:45:03 +0000156 Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
157 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
158 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
159 (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
160 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
161 INF_REF_CNT /*RefCount*/));
George Rokos2467df62017-01-25 21:27:24 +0000162 }
163 }
George Rokosd57681b2017-04-22 11:45:03 +0000164 Device.DataMapMtx.unlock();
George Rokos2467df62017-01-25 21:27:24 +0000165 }
166 TrlTblMtx.unlock();
167
168 if (rc != OFFLOAD_SUCCESS) {
169 Device.PendingGlobalsMtx.unlock();
170 return rc;
171 }
172
173 /*
174 * Run ctors for static objects
175 */
176 if (!Device.PendingCtorsDtors.empty()) {
177 // Call all ctors for all libraries registered so far
178 for (auto &lib : Device.PendingCtorsDtors) {
179 if (!lib.second.PendingCtors.empty()) {
180 DP("Has pending ctors... call now\n");
181 for (auto &entry : lib.second.PendingCtors) {
182 void *ctor = entry;
183 int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
184 NULL, 1, 1, true /*team*/);
185 if (rc != OFFLOAD_SUCCESS) {
186 DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
187 Device.PendingGlobalsMtx.unlock();
188 return OFFLOAD_FAIL;
189 }
190 }
191 // Clear the list to indicate that this device has been used
192 lib.second.PendingCtors.clear();
193 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first));
194 }
195 }
196 }
197 Device.HasPendingGlobals = false;
198 Device.PendingGlobalsMtx.unlock();
199
200 return OFFLOAD_SUCCESS;
201}
202
203// Check whether a device has been initialized, global ctors have been
204// executed and global data has been mapped; do so if not already done.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000205int CheckDeviceAndCtors(int64_t device_id) {
George Rokos2467df62017-01-25 21:27:24 +0000206 // Is device ready?
207 if (!device_is_ready(device_id)) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000208 DP("Device %" PRId64 " is not ready.\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000209 return OFFLOAD_FAIL;
210 }
211
212 // Get device info.
213 DeviceTy &Device = Devices[device_id];
214
215 // Check whether global data has been mapped for this device
216 Device.PendingGlobalsMtx.lock();
217 bool hasPendingGlobals = Device.HasPendingGlobals;
218 Device.PendingGlobalsMtx.unlock();
219 if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) {
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000220 DP("Failed to init globals on device %" PRId64 "\n", device_id);
George Rokos2467df62017-01-25 21:27:24 +0000221 return OFFLOAD_FAIL;
222 }
223
224 return OFFLOAD_SUCCESS;
225}
226
George Rokosa0da2462018-07-19 13:41:03 +0000227static int32_t member_of(int64_t type) {
George Rokos2467df62017-01-25 21:27:24 +0000228 return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
229}
230
231/// Internal function to do the mapping and transfer the data to the device
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000232int target_data_begin(DeviceTy &Device, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000233 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
234 // process each input.
George Rokos2467df62017-01-25 21:27:24 +0000235 for (int32_t i = 0; i < arg_num; ++i) {
236 // Ignore private variables and arrays - there is no mapping for them.
237 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
238 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
239 continue;
240
241 void *HstPtrBegin = args[i];
242 void *HstPtrBase = args_base[i];
George Rokosa0da2462018-07-19 13:41:03 +0000243 int64_t data_size = arg_sizes[i];
244
245 // Adjust for proper alignment if this is a combined entry (for structs).
246 // Look at the next argument - if that is MEMBER_OF this one, then this one
247 // is a combined entry.
248 int64_t padding = 0;
249 const int next_i = i+1;
250 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
251 member_of(arg_types[next_i]) == i) {
252 padding = (int64_t)HstPtrBegin % alignment;
253 if (padding) {
254 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
255 "\n", padding, DPxPTR(HstPtrBegin));
256 HstPtrBegin = (char *) HstPtrBegin - padding;
257 data_size += padding;
258 }
259 }
260
George Rokos2467df62017-01-25 21:27:24 +0000261 // Address of pointer on the host and device, respectively.
262 void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
263 bool IsNew, Pointer_IsNew;
264 bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
George Rokosa0da2462018-07-19 13:41:03 +0000265 // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
266 // have reached this point via __tgt_target_data_begin and not __tgt_target
267 // then no argument is marked as TARGET_PARAM ("omp target data map" is not
268 // associated with a target region, so there are no target parameters). This
269 // may be considered a hack, we could revise the scheme in the future.
George Rokos2467df62017-01-25 21:27:24 +0000270 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
271 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
272 DP("Has a pointer entry: \n");
273 // base is address of pointer.
274 Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
275 sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
276 if (!Pointer_TgtPtrBegin) {
277 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
278 "illegal mapping).\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000279 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000280 }
281 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
282 "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
283 (Pointer_IsNew ? "" : " not"));
284 Pointer_HstPtrBegin = HstPtrBase;
285 // modify current entry.
286 HstPtrBase = *(void **)HstPtrBase;
287 UpdateRef = true; // subsequently update ref count of pointee
288 }
289
290 void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
George Rokosa0da2462018-07-19 13:41:03 +0000291 data_size, IsNew, IsImplicit, UpdateRef);
292 if (!TgtPtrBegin && data_size) {
293 // If data_size==0, then the argument could be a zero-length pointer to
294 // NULL, so getOrAlloc() returning NULL is not an error.
George Rokos2467df62017-01-25 21:27:24 +0000295 DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
296 "illegal mapping).\n");
297 }
298 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000299 " - is%s new\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000300 (IsNew ? "" : " not"));
301
302 if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
George Rokosa0da2462018-07-19 13:41:03 +0000303 uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
304 void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
305 DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
306 args_base[i] = TgtPtrBase;
George Rokos2467df62017-01-25 21:27:24 +0000307 }
308
309 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
310 bool copy = false;
311 if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
312 copy = true;
313 } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
314 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000315 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000316 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
317 assert(parent_rc > 0 && "parent struct not found");
318 if (parent_rc == 1) {
319 copy = true;
320 }
321 }
322
323 if (copy) {
324 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000325 data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
326 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000327 if (rt != OFFLOAD_SUCCESS) {
328 DP("Copying data to device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000329 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000330 }
331 }
332 }
333
334 if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
335 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
336 DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
337 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
338 void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
339 int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase,
340 sizeof(void *));
341 if (rt != OFFLOAD_SUCCESS) {
342 DP("Copying data to device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000343 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000344 }
345 // create shadow pointers for this entry
346 Device.ShadowMtx.lock();
347 Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase,
348 Pointer_TgtPtrBegin, TgtPtrBase};
349 Device.ShadowMtx.unlock();
350 }
351 }
352
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000353 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000354}
355
George Rokos2467df62017-01-25 21:27:24 +0000356/// Internal function to undo the mapping and retrieve the data from the device.
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000357int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
George Rokos2467df62017-01-25 21:27:24 +0000358 void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000359 // process each input.
360 for (int32_t i = arg_num - 1; i >= 0; --i) {
361 // Ignore private variables and arrays - there is no mapping for them.
362 // Also, ignore the use_device_ptr directive, it has no effect here.
363 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
364 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
365 continue;
366
367 void *HstPtrBegin = args[i];
George Rokosa0da2462018-07-19 13:41:03 +0000368 int64_t data_size = arg_sizes[i];
369 // Adjust for proper alignment if this is a combined entry (for structs).
370 // Look at the next argument - if that is MEMBER_OF this one, then this one
371 // is a combined entry.
372 int64_t padding = 0;
373 const int next_i = i+1;
374 if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
375 member_of(arg_types[next_i]) == i) {
376 padding = (int64_t)HstPtrBegin % alignment;
377 if (padding) {
378 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
379 "\n", padding, DPxPTR(HstPtrBegin));
380 HstPtrBegin = (char *) HstPtrBegin - padding;
381 data_size += padding;
382 }
383 }
384
George Rokos2467df62017-01-25 21:27:24 +0000385 bool IsLast;
386 bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
387 (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
388 bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
389
390 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
George Rokosa0da2462018-07-19 13:41:03 +0000391 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
George Rokos2467df62017-01-25 21:27:24 +0000392 UpdateRef);
393 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
George Rokosa0da2462018-07-19 13:41:03 +0000394 " - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
George Rokos2467df62017-01-25 21:27:24 +0000395 (IsLast ? "" : " not"));
396
George Rokos15a6e7d2017-02-15 20:45:37 +0000397 bool DelEntry = IsLast || ForceDelete;
398
George Rokos2467df62017-01-25 21:27:24 +0000399 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
400 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
George Rokos15a6e7d2017-02-15 20:45:37 +0000401 DelEntry = false; // protect parent struct from being deallocated
George Rokos2467df62017-01-25 21:27:24 +0000402 }
403
George Rokos2467df62017-01-25 21:27:24 +0000404 if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
405 // Move data back to the host
406 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
407 bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
408 bool CopyMember = false;
409 if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
410 !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
411 // Copy data only if the "parent" struct has RefCount==1.
George Rokosa0da2462018-07-19 13:41:03 +0000412 int32_t parent_idx = member_of(arg_types[i]);
George Rokos2467df62017-01-25 21:27:24 +0000413 long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
414 assert(parent_rc > 0 && "parent struct not found");
415 if (parent_rc == 1) {
416 CopyMember = true;
417 }
418 }
419
420 if (DelEntry || Always || CopyMember) {
421 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
George Rokosa0da2462018-07-19 13:41:03 +0000422 data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
423 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
George Rokos2467df62017-01-25 21:27:24 +0000424 if (rt != OFFLOAD_SUCCESS) {
425 DP("Copying data from device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000426 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000427 }
428 }
429 }
430
431 // If we copied back to the host a struct/array containing pointers, we
432 // need to restore the original host pointer values from their shadow
433 // copies. If the struct is going to be deallocated, remove any remaining
434 // shadow pointer entries for this struct.
435 uintptr_t lb = (uintptr_t) HstPtrBegin;
George Rokosa0da2462018-07-19 13:41:03 +0000436 uintptr_t ub = (uintptr_t) HstPtrBegin + data_size;
George Rokos2467df62017-01-25 21:27:24 +0000437 Device.ShadowMtx.lock();
438 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
439 it != Device.ShadowPtrMap.end(); ++it) {
440 void **ShadowHstPtrAddr = (void**) it->first;
441
442 // An STL map is sorted on its keys; use this property
443 // to quickly determine when to break out of the loop.
444 if ((uintptr_t) ShadowHstPtrAddr < lb)
445 continue;
446 if ((uintptr_t) ShadowHstPtrAddr >= ub)
447 break;
448
449 // If we copied the struct to the host, we need to restore the pointer.
450 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
451 DP("Restoring original host pointer value " DPxMOD " for host "
452 "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
453 DPxPTR(ShadowHstPtrAddr));
454 *ShadowHstPtrAddr = it->second.HstPtrVal;
455 }
456 // If the struct is to be deallocated, remove the shadow entry.
457 if (DelEntry) {
458 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
459 Device.ShadowPtrMap.erase(it);
460 }
461 }
462 Device.ShadowMtx.unlock();
463
464 // Deallocate map
465 if (DelEntry) {
George Rokosa0da2462018-07-19 13:41:03 +0000466 int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete);
George Rokos2467df62017-01-25 21:27:24 +0000467 if (rt != OFFLOAD_SUCCESS) {
468 DP("Deallocating data from device failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000469 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000470 }
471 }
472 }
473 }
474
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000475 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000476}
477
Jonas Hahnfelda7c4f322017-12-06 21:59:15 +0000478/// Internal function to pass data to/from the target.
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000479int target_data_update(DeviceTy &Device, int32_t arg_num,
George Rokosb92dbb42017-11-21 18:26:41 +0000480 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
George Rokos2467df62017-01-25 21:27:24 +0000481 // process each input.
482 for (int32_t i = 0; i < arg_num; ++i) {
483 if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
484 (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
485 continue;
486
487 void *HstPtrBegin = args[i];
488 int64_t MapSize = arg_sizes[i];
489 bool IsLast;
490 void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
491 false);
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000492 if (!TgtPtrBegin) {
493 DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
494 continue;
495 }
George Rokos2467df62017-01-25 21:27:24 +0000496
497 if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
498 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
499 arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000500 int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize);
501 if (rt != OFFLOAD_SUCCESS) {
502 DP("Copying data from device failed.\n");
503 return OFFLOAD_FAIL;
504 }
George Rokos2467df62017-01-25 21:27:24 +0000505
506 uintptr_t lb = (uintptr_t) HstPtrBegin;
507 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
508 Device.ShadowMtx.lock();
509 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
510 it != Device.ShadowPtrMap.end(); ++it) {
511 void **ShadowHstPtrAddr = (void**) it->first;
512 if ((uintptr_t) ShadowHstPtrAddr < lb)
513 continue;
514 if ((uintptr_t) ShadowHstPtrAddr >= ub)
515 break;
516 DP("Restoring original host pointer value " DPxMOD " for host pointer "
517 DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
518 DPxPTR(ShadowHstPtrAddr));
519 *ShadowHstPtrAddr = it->second.HstPtrVal;
520 }
521 Device.ShadowMtx.unlock();
522 }
523
524 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
525 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
526 arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000527 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize);
528 if (rt != OFFLOAD_SUCCESS) {
529 DP("Copying data to device failed.\n");
530 return OFFLOAD_FAIL;
531 }
George Rokos2467df62017-01-25 21:27:24 +0000532 uintptr_t lb = (uintptr_t) HstPtrBegin;
533 uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
534 Device.ShadowMtx.lock();
535 for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
536 it != Device.ShadowPtrMap.end(); ++it) {
537 void **ShadowHstPtrAddr = (void**) it->first;
538 if ((uintptr_t) ShadowHstPtrAddr < lb)
539 continue;
540 if ((uintptr_t) ShadowHstPtrAddr >= ub)
541 break;
542 DP("Restoring original target pointer value " DPxMOD " for target "
543 "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal),
544 DPxPTR(it->second.TgtPtrAddr));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000545 rt = Device.data_submit(it->second.TgtPtrAddr,
George Rokos2467df62017-01-25 21:27:24 +0000546 &it->second.TgtPtrVal, sizeof(void *));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000547 if (rt != OFFLOAD_SUCCESS) {
548 DP("Copying data to device failed.\n");
549 Device.ShadowMtx.unlock();
550 return OFFLOAD_FAIL;
551 }
George Rokos2467df62017-01-25 21:27:24 +0000552 }
553 Device.ShadowMtx.unlock();
554 }
555 }
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000556 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000557}
558
George Rokos2467df62017-01-25 21:27:24 +0000559/// performs the same actions as data_begin in case arg_num is
560/// non-zero and initiates run of the offloaded region on the target platform;
561/// if arg_num is non-zero after the region execution is done it also
562/// performs the same action as data_update and data_end above. This function
563/// returns 0 if it was able to transfer the execution to a target and an
564/// integer different from zero otherwise.
Jonas Hahnfeld43322802017-12-06 21:59:07 +0000565int target(int64_t device_id, void *host_ptr, int32_t arg_num,
George Rokos2467df62017-01-25 21:27:24 +0000566 void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
567 int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
568 DeviceTy &Device = Devices[device_id];
569
570 // Find the table information in the map or look it up in the translation
571 // tables.
572 TableMap *TM = 0;
573 TblMapMtx.lock();
574 HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr);
575 if (TableMapIt == HostPtrToTableMap.end()) {
576 // We don't have a map. So search all the registered libraries.
577 TrlTblMtx.lock();
578 for (HostEntriesBeginToTransTableTy::iterator
579 ii = HostEntriesBeginToTransTable.begin(),
580 ie = HostEntriesBeginToTransTable.end();
581 !TM && ii != ie; ++ii) {
582 // get the translation table (which contains all the good info).
583 TranslationTable *TransTable = &ii->second;
584 // iterate over all the host table entries to see if we can locate the
585 // host_ptr.
586 __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin;
587 __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd;
588 __tgt_offload_entry *cur = begin;
589 for (uint32_t i = 0; cur < end; ++cur, ++i) {
590 if (cur->addr != host_ptr)
591 continue;
592 // we got a match, now fill the HostPtrToTableMap so that we
593 // may avoid this search next time.
594 TM = &HostPtrToTableMap[host_ptr];
595 TM->Table = TransTable;
596 TM->Index = i;
597 break;
598 }
599 }
600 TrlTblMtx.unlock();
601 } else {
602 TM = &TableMapIt->second;
603 }
604 TblMapMtx.unlock();
605
606 // No map for this host pointer found!
607 if (!TM) {
608 DP("Host ptr " DPxMOD " does not have a matching target pointer.\n",
609 DPxPTR(host_ptr));
610 return OFFLOAD_FAIL;
611 }
612
613 // get target table.
614 TrlTblMtx.lock();
615 assert(TM->Table->TargetsTable.size() > (size_t)device_id &&
616 "Not expecting a device ID outside the table's bounds!");
617 __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id];
618 TrlTblMtx.unlock();
619 assert(TargetTable && "Global data has not been mapped\n");
620
621 // Move data to device.
622 int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
623 arg_types);
George Rokos2467df62017-01-25 21:27:24 +0000624 if (rc != OFFLOAD_SUCCESS) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000625 DP("Call to target_data_begin failed, abort target.\n");
George Rokos2467df62017-01-25 21:27:24 +0000626 return OFFLOAD_FAIL;
627 }
628
629 std::vector<void *> tgt_args;
George Rokos1546d312017-05-10 14:12:36 +0000630 std::vector<ptrdiff_t> tgt_offsets;
George Rokos2467df62017-01-25 21:27:24 +0000631
632 // List of (first-)private arrays allocated for this target region
633 std::vector<void *> fpArrays;
634
635 for (int32_t i = 0; i < arg_num; ++i) {
636 if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
637 // This is not a target parameter, do not push it into tgt_args.
638 continue;
639 }
640 void *HstPtrBegin = args[i];
641 void *HstPtrBase = args_base[i];
George Rokos1546d312017-05-10 14:12:36 +0000642 void *TgtPtrBegin;
643 ptrdiff_t TgtBaseOffset;
George Rokos2467df62017-01-25 21:27:24 +0000644 bool IsLast; // unused.
645 if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
646 DP("Forwarding first-private value " DPxMOD " to the target construct\n",
647 DPxPTR(HstPtrBase));
George Rokos1546d312017-05-10 14:12:36 +0000648 TgtPtrBegin = HstPtrBase;
649 TgtBaseOffset = 0;
George Rokos2467df62017-01-25 21:27:24 +0000650 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
651 // Allocate memory for (first-)private array
George Rokos1546d312017-05-10 14:12:36 +0000652 TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
653 arg_sizes[i], HstPtrBegin);
George Rokos2467df62017-01-25 21:27:24 +0000654 if (!TgtPtrBegin) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000655 DP ("Data allocation for %sprivate array " DPxMOD " failed, "
656 "abort target.\n",
George Rokos2467df62017-01-25 21:27:24 +0000657 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
658 DPxPTR(HstPtrBegin));
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000659 return OFFLOAD_FAIL;
660 }
661 fpArrays.push_back(TgtPtrBegin);
662 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000663#ifdef OMPTARGET_DEBUG
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000664 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
665 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for "
666 "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n",
667 arg_sizes[i], DPxPTR(TgtPtrBegin),
668 (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""),
669 DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000670#endif
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000671 // If first-private, copy data from host
672 if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
673 int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]);
674 if (rt != OFFLOAD_SUCCESS) {
675 DP ("Copying data to device failed, failed.\n");
676 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000677 }
678 }
679 } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
George Rokos1546d312017-05-10 14:12:36 +0000680 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
681 false);
682 TgtBaseOffset = 0; // no offset for ptrs.
George Rokos2467df62017-01-25 21:27:24 +0000683 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
684 "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
685 DPxPTR(HstPtrBase));
686 } else {
George Rokos1546d312017-05-10 14:12:36 +0000687 TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
688 false);
689 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
Samuel Antao8933ffb2017-06-09 16:46:07 +0000690#ifdef OMPTARGET_DEBUG
George Rokos1546d312017-05-10 14:12:36 +0000691 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000692 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",
693 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin));
Samuel Antao8933ffb2017-06-09 16:46:07 +0000694#endif
George Rokos2467df62017-01-25 21:27:24 +0000695 }
George Rokos1546d312017-05-10 14:12:36 +0000696 tgt_args.push_back(TgtPtrBegin);
697 tgt_offsets.push_back(TgtBaseOffset);
George Rokos2467df62017-01-25 21:27:24 +0000698 }
George Rokos1546d312017-05-10 14:12:36 +0000699
700 assert(tgt_args.size() == tgt_offsets.size() &&
701 "Size mismatch in arguments and offsets");
George Rokos2467df62017-01-25 21:27:24 +0000702
703 // Pop loop trip count
704 uint64_t ltc = Device.loopTripCnt;
705 Device.loopTripCnt = 0;
706
707 // Launch device execution.
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000708 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",
709 TargetTable->EntriesBegin[TM->Index].name,
710 DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index);
711 if (IsTeamConstruct) {
712 rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr,
713 &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num,
714 thread_limit, ltc);
George Rokos2467df62017-01-25 21:27:24 +0000715 } else {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000716 rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr,
717 &tgt_args[0], &tgt_offsets[0], tgt_args.size());
718 }
719 if (rc != OFFLOAD_SUCCESS) {
720 DP ("Executing target region abort target.\n");
721 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000722 }
723
724 // Deallocate (first-)private arrays
725 for (auto it : fpArrays) {
726 int rt = Device.RTL->data_delete(Device.RTLDeviceID, it);
727 if (rt != OFFLOAD_SUCCESS) {
728 DP("Deallocation of (first-)private arrays failed.\n");
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000729 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000730 }
731 }
732
733 // Move data from device.
734 int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
735 arg_types);
George Rokos2467df62017-01-25 21:27:24 +0000736 if (rt != OFFLOAD_SUCCESS) {
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000737 DP("Call to target_data_end failed, abort targe.\n");
738 return OFFLOAD_FAIL;
George Rokos2467df62017-01-25 21:27:24 +0000739 }
740
Alexandre Eichenberger1b4a6662018-08-23 16:22:42 +0000741 return OFFLOAD_SUCCESS;
George Rokos2467df62017-01-25 21:27:24 +0000742}