George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1 | //===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is dual licensed under the MIT and the University of Illinois Open |
| 6 | // Source Licenses. See LICENSE.txt for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // Implementation of the interface to be used by Clang during the codegen of a |
| 11 | // target region. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
Jonas Hahnfeld | 4332280 | 2017-12-06 21:59:07 +0000 | [diff] [blame] | 15 | #include <omptarget.h> |
| 16 | |
| 17 | #include "device.h" |
| 18 | #include "private.h" |
| 19 | #include "rtl.h" |
| 20 | |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 21 | #include <cassert> |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 22 | #include <vector> |
| 23 | |
Sergey Dmitriev | b305d26 | 2017-08-14 15:09:59 +0000 | [diff] [blame] | 24 | #ifdef OMPTARGET_DEBUG |
Jonas Hahnfeld | 4332280 | 2017-12-06 21:59:07 +0000 | [diff] [blame] | 25 | int DebugLevel = 0; |
Sergey Dmitriev | b305d26 | 2017-08-14 15:09:59 +0000 | [diff] [blame] | 26 | #endif // OMPTARGET_DEBUG |
| 27 | |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 28 | //////////////////////////////////////////////////////////////////////////////// |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 29 | /// adds a target shared library to the target execution image |
| 30 | EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { |
Jonas Hahnfeld | 4332280 | 2017-12-06 21:59:07 +0000 | [diff] [blame] | 31 | RTLs.RegisterLib(desc); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 32 | } |
| 33 | |
| 34 | //////////////////////////////////////////////////////////////////////////////// |
| 35 | /// unloads a target shared library |
| 36 | EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { |
Jonas Hahnfeld | 4332280 | 2017-12-06 21:59:07 +0000 | [diff] [blame] | 37 | RTLs.UnregisterLib(desc); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 38 | } |
| 39 | |
| 40 | /// Map global data and execute pending ctors |
| 41 | static int InitLibrary(DeviceTy& Device) { |
| 42 | /* |
| 43 | * Map global data |
| 44 | */ |
| 45 | int32_t device_id = Device.DeviceID; |
| 46 | int rc = OFFLOAD_SUCCESS; |
| 47 | |
| 48 | Device.PendingGlobalsMtx.lock(); |
| 49 | TrlTblMtx.lock(); |
| 50 | for (HostEntriesBeginToTransTableTy::iterator |
| 51 | ii = HostEntriesBeginToTransTable.begin(); |
| 52 | ii != HostEntriesBeginToTransTable.end(); ++ii) { |
| 53 | TranslationTable *TransTable = &ii->second; |
| 54 | if (TransTable->TargetsTable[device_id] != 0) { |
| 55 | // Library entries have already been processed |
| 56 | continue; |
| 57 | } |
| 58 | |
| 59 | // 1) get image. |
| 60 | assert(TransTable->TargetsImages.size() > (size_t)device_id && |
| 61 | "Not expecting a device ID outside the table's bounds!"); |
| 62 | __tgt_device_image *img = TransTable->TargetsImages[device_id]; |
| 63 | if (!img) { |
| 64 | DP("No image loaded for device id %d.\n", device_id); |
| 65 | rc = OFFLOAD_FAIL; |
| 66 | break; |
| 67 | } |
| 68 | // 2) load image into the target table. |
| 69 | __tgt_target_table *TargetTable = |
| 70 | TransTable->TargetsTable[device_id] = Device.load_binary(img); |
| 71 | // Unable to get table for this image: invalidate image and fail. |
| 72 | if (!TargetTable) { |
| 73 | DP("Unable to generate entries table for device id %d.\n", device_id); |
| 74 | TransTable->TargetsImages[device_id] = 0; |
| 75 | rc = OFFLOAD_FAIL; |
| 76 | break; |
| 77 | } |
| 78 | |
| 79 | // Verify whether the two table sizes match. |
| 80 | size_t hsize = |
| 81 | TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin; |
| 82 | size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin; |
| 83 | |
| 84 | // Invalid image for these host entries! |
| 85 | if (hsize != tsize) { |
| 86 | DP("Host and Target tables mismatch for device id %d [%zx != %zx].\n", |
| 87 | device_id, hsize, tsize); |
| 88 | TransTable->TargetsImages[device_id] = 0; |
| 89 | TransTable->TargetsTable[device_id] = 0; |
| 90 | rc = OFFLOAD_FAIL; |
| 91 | break; |
| 92 | } |
| 93 | |
| 94 | // process global data that needs to be mapped. |
George Rokos | d57681b | 2017-04-22 11:45:03 +0000 | [diff] [blame] | 95 | Device.DataMapMtx.lock(); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 96 | __tgt_target_table *HostTable = &TransTable->HostTable; |
| 97 | for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin, |
| 98 | *CurrHostEntry = HostTable->EntriesBegin, |
| 99 | *EntryDeviceEnd = TargetTable->EntriesEnd; |
| 100 | CurrDeviceEntry != EntryDeviceEnd; |
| 101 | CurrDeviceEntry++, CurrHostEntry++) { |
| 102 | if (CurrDeviceEntry->size != 0) { |
| 103 | // has data. |
| 104 | assert(CurrDeviceEntry->size == CurrHostEntry->size && |
| 105 | "data size mismatch"); |
George Rokos | ba7380b | 2017-03-22 16:43:40 +0000 | [diff] [blame] | 106 | |
| 107 | // Fortran may use multiple weak declarations for the same symbol, |
| 108 | // therefore we must allow for multiple weak symbols to be loaded from |
| 109 | // the fat binary. Treat these mappings as any other "regular" mapping. |
| 110 | // Add entry to map. |
George Rokos | d57681b | 2017-04-22 11:45:03 +0000 | [diff] [blame] | 111 | if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size)) |
| 112 | continue; |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 113 | DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu" |
| 114 | "\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr), |
| 115 | CurrDeviceEntry->size); |
George Rokos | d57681b | 2017-04-22 11:45:03 +0000 | [diff] [blame] | 116 | Device.HostDataToTargetMap.push_front(HostDataToTargetTy( |
| 117 | (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/, |
| 118 | (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/, |
| 119 | (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/, |
| 120 | (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/, |
| 121 | INF_REF_CNT /*RefCount*/)); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 122 | } |
| 123 | } |
George Rokos | d57681b | 2017-04-22 11:45:03 +0000 | [diff] [blame] | 124 | Device.DataMapMtx.unlock(); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 125 | } |
| 126 | TrlTblMtx.unlock(); |
| 127 | |
| 128 | if (rc != OFFLOAD_SUCCESS) { |
| 129 | Device.PendingGlobalsMtx.unlock(); |
| 130 | return rc; |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * Run ctors for static objects |
| 135 | */ |
| 136 | if (!Device.PendingCtorsDtors.empty()) { |
| 137 | // Call all ctors for all libraries registered so far |
| 138 | for (auto &lib : Device.PendingCtorsDtors) { |
| 139 | if (!lib.second.PendingCtors.empty()) { |
| 140 | DP("Has pending ctors... call now\n"); |
| 141 | for (auto &entry : lib.second.PendingCtors) { |
| 142 | void *ctor = entry; |
| 143 | int rc = target(device_id, ctor, 0, NULL, NULL, NULL, |
| 144 | NULL, 1, 1, true /*team*/); |
| 145 | if (rc != OFFLOAD_SUCCESS) { |
| 146 | DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor)); |
| 147 | Device.PendingGlobalsMtx.unlock(); |
| 148 | return OFFLOAD_FAIL; |
| 149 | } |
| 150 | } |
| 151 | // Clear the list to indicate that this device has been used |
| 152 | lib.second.PendingCtors.clear(); |
| 153 | DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first)); |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | Device.HasPendingGlobals = false; |
| 158 | Device.PendingGlobalsMtx.unlock(); |
| 159 | |
| 160 | return OFFLOAD_SUCCESS; |
| 161 | } |
| 162 | |
| 163 | // Check whether a device has been initialized, global ctors have been |
| 164 | // executed and global data has been mapped; do so if not already done. |
| 165 | static int CheckDevice(int32_t device_id) { |
| 166 | // Is device ready? |
| 167 | if (!device_is_ready(device_id)) { |
| 168 | DP("Device %d is not ready.\n", device_id); |
| 169 | return OFFLOAD_FAIL; |
| 170 | } |
| 171 | |
| 172 | // Get device info. |
| 173 | DeviceTy &Device = Devices[device_id]; |
| 174 | |
| 175 | // Check whether global data has been mapped for this device |
| 176 | Device.PendingGlobalsMtx.lock(); |
| 177 | bool hasPendingGlobals = Device.HasPendingGlobals; |
| 178 | Device.PendingGlobalsMtx.unlock(); |
| 179 | if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) { |
| 180 | DP("Failed to init globals on device %d\n", device_id); |
| 181 | return OFFLOAD_FAIL; |
| 182 | } |
| 183 | |
| 184 | return OFFLOAD_SUCCESS; |
| 185 | } |
| 186 | |
| 187 | // Following datatypes and functions (tgt_oldmap_type, combined_entry_t, |
| 188 | // translate_map, cleanup_map) will be removed once the compiler starts using |
| 189 | // the new map types. |
| 190 | |
| 191 | // Old map types |
| 192 | enum tgt_oldmap_type { |
| 193 | OMP_TGT_OLDMAPTYPE_TO = 0x001, // copy data from host to device |
| 194 | OMP_TGT_OLDMAPTYPE_FROM = 0x002, // copy data from device to host |
| 195 | OMP_TGT_OLDMAPTYPE_ALWAYS = 0x004, // copy regardless of the ref. count |
| 196 | OMP_TGT_OLDMAPTYPE_DELETE = 0x008, // force unmapping of data |
| 197 | OMP_TGT_OLDMAPTYPE_MAP_PTR = 0x010, // map pointer as well as pointee |
| 198 | OMP_TGT_OLDMAPTYPE_FIRST_MAP = 0x020, // first occurrence of mapped variable |
| 199 | OMP_TGT_OLDMAPTYPE_RETURN_PTR = 0x040, // return TgtBase addr of mapped data |
| 200 | OMP_TGT_OLDMAPTYPE_PRIVATE_PTR = 0x080, // private variable - not mapped |
| 201 | OMP_TGT_OLDMAPTYPE_PRIVATE_VAL = 0x100 // copy by value - not mapped |
| 202 | }; |
| 203 | |
| 204 | // Temporary functions for map translation and cleanup |
| 205 | struct combined_entry_t { |
| 206 | int num_members; // number of members in combined entry |
| 207 | void *base_addr; // base address of combined entry |
| 208 | void *begin_addr; // begin address of combined entry |
| 209 | void *end_addr; // size of combined entry |
| 210 | }; |
| 211 | |
| 212 | static void translate_map(int32_t arg_num, void **args_base, void **args, |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 213 | int64_t *arg_sizes, int64_t *arg_types, int32_t &new_arg_num, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 214 | void **&new_args_base, void **&new_args, int64_t *&new_arg_sizes, |
| 215 | int64_t *&new_arg_types, bool is_target_construct) { |
| 216 | if (arg_num <= 0) { |
| 217 | DP("Nothing to translate\n"); |
| 218 | new_arg_num = 0; |
| 219 | return; |
| 220 | } |
| 221 | |
| 222 | // array of combined entries |
| 223 | combined_entry_t *cmb_entries = |
| 224 | (combined_entry_t *) alloca(arg_num * sizeof(combined_entry_t)); |
| 225 | // number of combined entries |
| 226 | long num_combined = 0; |
| 227 | // old entry is MAP_PTR? |
| 228 | bool *is_ptr_old = (bool *) alloca(arg_num * sizeof(bool)); |
| 229 | // old entry is member of member_of[old] cmb_entry |
| 230 | int *member_of = (int *) alloca(arg_num * sizeof(int)); |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 231 | // temporary storage for modifications of the original arg_types |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 232 | int64_t *mod_arg_types = (int64_t *) alloca(arg_num *sizeof(int64_t)); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 233 | |
| 234 | DP("Translating %d map entries\n", arg_num); |
| 235 | for (int i = 0; i < arg_num; ++i) { |
| 236 | member_of[i] = -1; |
| 237 | is_ptr_old[i] = false; |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 238 | mod_arg_types[i] = arg_types[i]; |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 239 | // Scan previous entries to see whether this entry shares the same base |
| 240 | for (int j = 0; j < i; ++j) { |
| 241 | void *new_begin_addr = NULL; |
| 242 | void *new_end_addr = NULL; |
| 243 | |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 244 | if (mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_MAP_PTR) { |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 245 | if (args_base[i] == args[j]) { |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 246 | if (!(mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR)) { |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 247 | DP("Entry %d has the same base as entry %d's begin address\n", i, |
| 248 | j); |
| 249 | new_begin_addr = args_base[i]; |
| 250 | new_end_addr = (char *)args_base[i] + sizeof(void *); |
| 251 | assert(arg_sizes[j] == sizeof(void *)); |
| 252 | is_ptr_old[j] = true; |
| 253 | } else { |
| 254 | DP("Entry %d has the same base as entry %d's begin address, but " |
| 255 | "%d's base was a MAP_PTR too\n", i, j, j); |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 256 | int32_t to_from_always_delete = |
| 257 | OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM | |
| 258 | OMP_TGT_OLDMAPTYPE_ALWAYS | OMP_TGT_OLDMAPTYPE_DELETE; |
| 259 | if (mod_arg_types[j] & to_from_always_delete) { |
| 260 | DP("Resetting to/from/always/delete flags for entry %d because " |
| 261 | "it is only a pointer to pointer\n", j); |
| 262 | mod_arg_types[j] &= ~to_from_always_delete; |
| 263 | } |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 264 | } |
| 265 | } |
| 266 | } else { |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 267 | if (!(mod_arg_types[i] & OMP_TGT_OLDMAPTYPE_FIRST_MAP) && |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 268 | args_base[i] == args_base[j]) { |
| 269 | DP("Entry %d has the same base address as entry %d\n", i, j); |
| 270 | new_begin_addr = args[i]; |
| 271 | new_end_addr = (char *)args[i] + arg_sizes[i]; |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | // If we have combined the entry with a previous one |
| 276 | if (new_begin_addr) { |
| 277 | int id; |
| 278 | if(member_of[j] == -1) { |
| 279 | // We have a new entry |
| 280 | id = num_combined++; |
| 281 | DP("Creating new combined entry %d for old entry %d\n", id, j); |
| 282 | // Initialize new entry |
| 283 | cmb_entries[id].num_members = 1; |
| 284 | cmb_entries[id].base_addr = args_base[j]; |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 285 | if (mod_arg_types[j] & OMP_TGT_OLDMAPTYPE_MAP_PTR) { |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 286 | cmb_entries[id].begin_addr = args_base[j]; |
| 287 | cmb_entries[id].end_addr = (char *)args_base[j] + arg_sizes[j]; |
| 288 | } else { |
| 289 | cmb_entries[id].begin_addr = args[j]; |
| 290 | cmb_entries[id].end_addr = (char *)args[j] + arg_sizes[j]; |
| 291 | } |
| 292 | member_of[j] = id; |
| 293 | } else { |
| 294 | // Reuse existing combined entry |
| 295 | DP("Reusing existing combined entry %d\n", member_of[j]); |
| 296 | id = member_of[j]; |
| 297 | } |
| 298 | |
| 299 | // Update combined entry |
| 300 | DP("Adding entry %d to combined entry %d\n", i, id); |
| 301 | cmb_entries[id].num_members++; |
| 302 | // base_addr stays the same |
| 303 | cmb_entries[id].begin_addr = |
| 304 | std::min(cmb_entries[id].begin_addr, new_begin_addr); |
| 305 | cmb_entries[id].end_addr = |
| 306 | std::max(cmb_entries[id].end_addr, new_end_addr); |
| 307 | member_of[i] = id; |
| 308 | break; |
| 309 | } |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | DP("New entries: %ld combined + %d original\n", num_combined, arg_num); |
| 314 | new_arg_num = arg_num + num_combined; |
| 315 | new_args_base = (void **) malloc(new_arg_num * sizeof(void *)); |
| 316 | new_args = (void **) malloc(new_arg_num * sizeof(void *)); |
| 317 | new_arg_sizes = (int64_t *) malloc(new_arg_num * sizeof(int64_t)); |
| 318 | new_arg_types = (int64_t *) malloc(new_arg_num * sizeof(int64_t)); |
| 319 | |
| 320 | const int64_t alignment = 8; |
| 321 | |
| 322 | int next_id = 0; // next ID |
| 323 | int next_cid = 0; // next combined ID |
| 324 | int *combined_to_new_id = (int *) alloca(num_combined * sizeof(int)); |
| 325 | for (int i = 0; i < arg_num; ++i) { |
| 326 | // It is member_of |
| 327 | if (member_of[i] == next_cid) { |
| 328 | int cid = next_cid++; // ID of this combined entry |
| 329 | int nid = next_id++; // ID of the new (global) entry |
| 330 | combined_to_new_id[cid] = nid; |
| 331 | DP("Combined entry %3d will become new entry %3d\n", cid, nid); |
| 332 | |
| 333 | int64_t padding = (int64_t)cmb_entries[cid].begin_addr % alignment; |
| 334 | if (padding) { |
| 335 | DP("Using a padding of %" PRId64 " for begin address " DPxMOD "\n", |
| 336 | padding, DPxPTR(cmb_entries[cid].begin_addr)); |
| 337 | cmb_entries[cid].begin_addr = |
| 338 | (char *)cmb_entries[cid].begin_addr - padding; |
| 339 | } |
| 340 | |
| 341 | new_args_base[nid] = cmb_entries[cid].base_addr; |
| 342 | new_args[nid] = cmb_entries[cid].begin_addr; |
| 343 | new_arg_sizes[nid] = (int64_t) ((char *)cmb_entries[cid].end_addr - |
| 344 | (char *)cmb_entries[cid].begin_addr); |
| 345 | new_arg_types[nid] = OMP_TGT_MAPTYPE_TARGET_PARAM; |
| 346 | DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", " |
| 347 | "size %" PRId64 ", type 0x%" PRIx64 "\n", nid, |
| 348 | DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], |
| 349 | new_arg_types[nid]); |
| 350 | } else if (member_of[i] != -1) { |
| 351 | DP("Combined entry %3d has been encountered before, do nothing\n", |
| 352 | member_of[i]); |
| 353 | } |
| 354 | |
| 355 | // Now that the combined entry (the one the old entry was a member of) has |
| 356 | // been inserted into the new arguments list, proceed with the old entry. |
| 357 | int nid = next_id++; |
| 358 | DP("Old entry %3d will become new entry %3d\n", i, nid); |
| 359 | |
| 360 | new_args_base[nid] = args_base[i]; |
| 361 | new_args[nid] = args[i]; |
| 362 | new_arg_sizes[nid] = arg_sizes[i]; |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 363 | int64_t old_type = mod_arg_types[i]; |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 364 | |
| 365 | if (is_ptr_old[i]) { |
| 366 | // Reset TO and FROM flags |
| 367 | old_type &= ~(OMP_TGT_OLDMAPTYPE_TO | OMP_TGT_OLDMAPTYPE_FROM); |
| 368 | } |
| 369 | |
| 370 | if (member_of[i] == -1) { |
| 371 | if (!is_target_construct) |
| 372 | old_type &= ~OMP_TGT_MAPTYPE_TARGET_PARAM; |
| 373 | new_arg_types[nid] = old_type; |
| 374 | DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64 |
| 375 | ", type 0x%" PRIx64 " (old entry %d not MEMBER_OF)\n", nid, |
| 376 | DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], |
| 377 | new_arg_types[nid], i); |
| 378 | } else { |
| 379 | // Old entry is not FIRST_MAP |
| 380 | old_type &= ~OMP_TGT_OLDMAPTYPE_FIRST_MAP; |
| 381 | // Add MEMBER_OF |
| 382 | int new_member_of = combined_to_new_id[member_of[i]]; |
| 383 | old_type |= ((int64_t)new_member_of + 1) << 48; |
| 384 | new_arg_types[nid] = old_type; |
| 385 | DP("Entry %3d: base_addr " DPxMOD ", begin_addr " DPxMOD ", size %" PRId64 |
| 386 | ", type 0x%" PRIx64 " (old entry %d MEMBER_OF %d)\n", nid, |
| 387 | DPxPTR(new_args_base[nid]), DPxPTR(new_args[nid]), new_arg_sizes[nid], |
| 388 | new_arg_types[nid], i, new_member_of); |
| 389 | } |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | static void cleanup_map(int32_t new_arg_num, void **new_args_base, |
| 394 | void **new_args, int64_t *new_arg_sizes, int64_t *new_arg_types, |
| 395 | int32_t arg_num, void **args_base) { |
| 396 | if (new_arg_num > 0) { |
| 397 | int offset = new_arg_num - arg_num; |
| 398 | for (int32_t i = 0; i < arg_num; ++i) { |
| 399 | // Restore old base address |
| 400 | args_base[i] = new_args_base[i+offset]; |
| 401 | } |
| 402 | free(new_args_base); |
| 403 | free(new_args); |
| 404 | free(new_arg_sizes); |
| 405 | free(new_arg_types); |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | static short member_of(int64_t type) { |
| 410 | return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; |
| 411 | } |
| 412 | |
| 413 | /// Internal function to do the mapping and transfer the data to the device |
| 414 | static int target_data_begin(DeviceTy &Device, int32_t arg_num, |
| 415 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { |
| 416 | // process each input. |
| 417 | int rc = OFFLOAD_SUCCESS; |
| 418 | for (int32_t i = 0; i < arg_num; ++i) { |
| 419 | // Ignore private variables and arrays - there is no mapping for them. |
| 420 | if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || |
| 421 | (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) |
| 422 | continue; |
| 423 | |
| 424 | void *HstPtrBegin = args[i]; |
| 425 | void *HstPtrBase = args_base[i]; |
| 426 | // Address of pointer on the host and device, respectively. |
| 427 | void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin; |
| 428 | bool IsNew, Pointer_IsNew; |
| 429 | bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; |
| 430 | bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF); |
| 431 | if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { |
| 432 | DP("Has a pointer entry: \n"); |
| 433 | // base is address of pointer. |
| 434 | Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase, |
| 435 | sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef); |
| 436 | if (!Pointer_TgtPtrBegin) { |
| 437 | DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " |
| 438 | "illegal mapping).\n"); |
| 439 | } |
| 440 | DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" |
| 441 | "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin), |
| 442 | (Pointer_IsNew ? "" : " not")); |
| 443 | Pointer_HstPtrBegin = HstPtrBase; |
| 444 | // modify current entry. |
| 445 | HstPtrBase = *(void **)HstPtrBase; |
| 446 | UpdateRef = true; // subsequently update ref count of pointee |
| 447 | } |
| 448 | |
| 449 | void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, |
| 450 | arg_sizes[i], IsNew, IsImplicit, UpdateRef); |
| 451 | if (!TgtPtrBegin && arg_sizes[i]) { |
| 452 | // If arg_sizes[i]==0, then the argument is a pointer to NULL, so |
| 453 | // getOrAlloc() returning NULL is not an error. |
| 454 | DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " |
| 455 | "illegal mapping).\n"); |
| 456 | } |
| 457 | DP("There are %" PRId64 " bytes allocated at target address " DPxMOD |
| 458 | " - is%s new\n", arg_sizes[i], DPxPTR(TgtPtrBegin), |
| 459 | (IsNew ? "" : " not")); |
| 460 | |
| 461 | if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) { |
| 462 | void *ret_ptr; |
| 463 | if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) |
| 464 | ret_ptr = Pointer_TgtPtrBegin; |
| 465 | else { |
| 466 | bool IsLast; // not used |
| 467 | ret_ptr = Device.getTgtPtrBegin(HstPtrBegin, 0, IsLast, false); |
| 468 | } |
| 469 | |
| 470 | DP("Returning device pointer " DPxMOD "\n", DPxPTR(ret_ptr)); |
| 471 | args_base[i] = ret_ptr; |
| 472 | } |
| 473 | |
| 474 | if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { |
| 475 | bool copy = false; |
| 476 | if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { |
| 477 | copy = true; |
| 478 | } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) { |
| 479 | // Copy data only if the "parent" struct has RefCount==1. |
| 480 | short parent_idx = member_of(arg_types[i]); |
| 481 | long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); |
| 482 | assert(parent_rc > 0 && "parent struct not found"); |
| 483 | if (parent_rc == 1) { |
| 484 | copy = true; |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | if (copy) { |
| 489 | DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", |
| 490 | arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); |
| 491 | int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); |
| 492 | if (rt != OFFLOAD_SUCCESS) { |
| 493 | DP("Copying data to device failed.\n"); |
| 494 | rc = OFFLOAD_FAIL; |
| 495 | } |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { |
| 500 | DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", |
| 501 | DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); |
| 502 | uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; |
| 503 | void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); |
| 504 | int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase, |
| 505 | sizeof(void *)); |
| 506 | if (rt != OFFLOAD_SUCCESS) { |
| 507 | DP("Copying data to device failed.\n"); |
| 508 | rc = OFFLOAD_FAIL; |
| 509 | } |
| 510 | // create shadow pointers for this entry |
| 511 | Device.ShadowMtx.lock(); |
| 512 | Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase, |
| 513 | Pointer_TgtPtrBegin, TgtPtrBase}; |
| 514 | Device.ShadowMtx.unlock(); |
| 515 | } |
| 516 | } |
| 517 | |
| 518 | return rc; |
| 519 | } |
| 520 | |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 521 | EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, |
| 522 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 523 | int32_t depNum, void *depList, int32_t noAliasDepNum, |
| 524 | void *noAliasDepList) { |
| 525 | if (depNum + noAliasDepNum > 0) |
| 526 | __kmpc_omp_taskwait(NULL, 0); |
| 527 | |
| 528 | __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, |
| 529 | arg_types); |
| 530 | } |
| 531 | |
| 532 | /// creates host-to-target data mapping, stores it in the |
| 533 | /// libomptarget.so internal structure (an entry in a stack of data maps) |
| 534 | /// and passes the data to the device. |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 535 | EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, |
| 536 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { |
| 537 | DP("Entering data begin region for device %ld with %d mappings\n", device_id, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 538 | arg_num); |
| 539 | |
| 540 | // No devices available? |
| 541 | if (device_id == OFFLOAD_DEVICE_DEFAULT) { |
| 542 | device_id = omp_get_default_device(); |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 543 | DP("Use default device id %ld\n", device_id); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 544 | } |
| 545 | |
| 546 | if (CheckDevice(device_id) != OFFLOAD_SUCCESS) { |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 547 | DP("Failed to get device %ld ready\n", device_id); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 548 | return; |
| 549 | } |
| 550 | |
| 551 | DeviceTy& Device = Devices[device_id]; |
| 552 | |
| 553 | // Translate maps |
| 554 | int32_t new_arg_num; |
| 555 | void **new_args_base; |
| 556 | void **new_args; |
| 557 | int64_t *new_arg_sizes; |
| 558 | int64_t *new_arg_types; |
| 559 | translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, |
| 560 | new_args_base, new_args, new_arg_sizes, new_arg_types, false); |
| 561 | |
| 562 | //target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); |
| 563 | target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, |
| 564 | new_arg_types); |
| 565 | |
| 566 | // Cleanup translation memory |
| 567 | cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, |
| 568 | new_arg_types, arg_num, args_base); |
| 569 | } |
| 570 | |
| 571 | /// Internal function to undo the mapping and retrieve the data from the device. |
| 572 | static int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, |
| 573 | void **args, int64_t *arg_sizes, int64_t *arg_types) { |
| 574 | int rc = OFFLOAD_SUCCESS; |
| 575 | // process each input. |
| 576 | for (int32_t i = arg_num - 1; i >= 0; --i) { |
| 577 | // Ignore private variables and arrays - there is no mapping for them. |
| 578 | // Also, ignore the use_device_ptr directive, it has no effect here. |
| 579 | if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || |
| 580 | (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) |
| 581 | continue; |
| 582 | |
| 583 | void *HstPtrBegin = args[i]; |
| 584 | bool IsLast; |
| 585 | bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) || |
| 586 | (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ); |
| 587 | bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE; |
| 588 | |
| 589 | // If PTR_AND_OBJ, HstPtrBegin is address of pointee |
| 590 | void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast, |
| 591 | UpdateRef); |
| 592 | DP("There are %" PRId64 " bytes allocated at target address " DPxMOD |
| 593 | " - is%s last\n", arg_sizes[i], DPxPTR(TgtPtrBegin), |
| 594 | (IsLast ? "" : " not")); |
| 595 | |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 596 | bool DelEntry = IsLast || ForceDelete; |
| 597 | |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 598 | if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && |
| 599 | !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { |
George Rokos | 15a6e7d | 2017-02-15 20:45:37 +0000 | [diff] [blame] | 600 | DelEntry = false; // protect parent struct from being deallocated |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 601 | } |
| 602 | |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 603 | if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) { |
| 604 | // Move data back to the host |
| 605 | if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { |
| 606 | bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; |
| 607 | bool CopyMember = false; |
| 608 | if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && |
| 609 | !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { |
| 610 | // Copy data only if the "parent" struct has RefCount==1. |
| 611 | short parent_idx = member_of(arg_types[i]); |
| 612 | long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); |
| 613 | assert(parent_rc > 0 && "parent struct not found"); |
| 614 | if (parent_rc == 1) { |
| 615 | CopyMember = true; |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | if (DelEntry || Always || CopyMember) { |
| 620 | DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", |
| 621 | arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); |
| 622 | int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, arg_sizes[i]); |
| 623 | if (rt != OFFLOAD_SUCCESS) { |
| 624 | DP("Copying data from device failed.\n"); |
| 625 | rc = OFFLOAD_FAIL; |
| 626 | } |
| 627 | } |
| 628 | } |
| 629 | |
| 630 | // If we copied back to the host a struct/array containing pointers, we |
| 631 | // need to restore the original host pointer values from their shadow |
| 632 | // copies. If the struct is going to be deallocated, remove any remaining |
| 633 | // shadow pointer entries for this struct. |
| 634 | uintptr_t lb = (uintptr_t) HstPtrBegin; |
| 635 | uintptr_t ub = (uintptr_t) HstPtrBegin + arg_sizes[i]; |
| 636 | Device.ShadowMtx.lock(); |
| 637 | for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); |
| 638 | it != Device.ShadowPtrMap.end(); ++it) { |
| 639 | void **ShadowHstPtrAddr = (void**) it->first; |
| 640 | |
| 641 | // An STL map is sorted on its keys; use this property |
| 642 | // to quickly determine when to break out of the loop. |
| 643 | if ((uintptr_t) ShadowHstPtrAddr < lb) |
| 644 | continue; |
| 645 | if ((uintptr_t) ShadowHstPtrAddr >= ub) |
| 646 | break; |
| 647 | |
| 648 | // If we copied the struct to the host, we need to restore the pointer. |
| 649 | if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { |
| 650 | DP("Restoring original host pointer value " DPxMOD " for host " |
| 651 | "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal), |
| 652 | DPxPTR(ShadowHstPtrAddr)); |
| 653 | *ShadowHstPtrAddr = it->second.HstPtrVal; |
| 654 | } |
| 655 | // If the struct is to be deallocated, remove the shadow entry. |
| 656 | if (DelEntry) { |
| 657 | DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr)); |
| 658 | Device.ShadowPtrMap.erase(it); |
| 659 | } |
| 660 | } |
| 661 | Device.ShadowMtx.unlock(); |
| 662 | |
| 663 | // Deallocate map |
| 664 | if (DelEntry) { |
| 665 | int rt = Device.deallocTgtPtr(HstPtrBegin, arg_sizes[i], ForceDelete); |
| 666 | if (rt != OFFLOAD_SUCCESS) { |
| 667 | DP("Deallocating data from device failed.\n"); |
| 668 | rc = OFFLOAD_FAIL; |
| 669 | } |
| 670 | } |
| 671 | } |
| 672 | } |
| 673 | |
| 674 | return rc; |
| 675 | } |
| 676 | |
| 677 | /// passes data from the target, releases target memory and destroys |
| 678 | /// the host-target mapping (top entry from the stack of data maps) |
| 679 | /// created by the last __tgt_target_data_begin. |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 680 | EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, |
| 681 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 682 | DP("Entering data end region with %d mappings\n", arg_num); |
| 683 | |
| 684 | // No devices available? |
| 685 | if (device_id == OFFLOAD_DEVICE_DEFAULT) { |
| 686 | device_id = omp_get_default_device(); |
| 687 | } |
| 688 | |
| 689 | RTLsMtx.lock(); |
| 690 | size_t Devices_size = Devices.size(); |
| 691 | RTLsMtx.unlock(); |
| 692 | if (Devices_size <= (size_t)device_id) { |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 693 | DP("Device ID %ld does not have a matching RTL.\n", device_id); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 694 | return; |
| 695 | } |
| 696 | |
| 697 | DeviceTy &Device = Devices[device_id]; |
| 698 | if (!Device.IsInit) { |
| 699 | DP("uninit device: ignore"); |
| 700 | return; |
| 701 | } |
| 702 | |
| 703 | // Translate maps |
| 704 | int32_t new_arg_num; |
| 705 | void **new_args_base; |
| 706 | void **new_args; |
| 707 | int64_t *new_arg_sizes; |
| 708 | int64_t *new_arg_types; |
| 709 | translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, |
| 710 | new_args_base, new_args, new_arg_sizes, new_arg_types, false); |
| 711 | |
| 712 | //target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); |
| 713 | target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, |
| 714 | new_arg_types); |
| 715 | |
| 716 | // Cleanup translation memory |
| 717 | cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, |
| 718 | new_arg_types, arg_num, args_base); |
| 719 | } |
| 720 | |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 721 | EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, |
| 722 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 723 | int32_t depNum, void *depList, int32_t noAliasDepNum, |
| 724 | void *noAliasDepList) { |
| 725 | if (depNum + noAliasDepNum > 0) |
| 726 | __kmpc_omp_taskwait(NULL, 0); |
| 727 | |
| 728 | __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, |
| 729 | arg_types); |
| 730 | } |
| 731 | |
| 732 | /// passes data to/from the target. |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 733 | EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, |
| 734 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 735 | DP("Entering data update with %d mappings\n", arg_num); |
| 736 | |
| 737 | // No devices available? |
| 738 | if (device_id == OFFLOAD_DEVICE_DEFAULT) { |
| 739 | device_id = omp_get_default_device(); |
| 740 | } |
| 741 | |
| 742 | if (CheckDevice(device_id) != OFFLOAD_SUCCESS) { |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 743 | DP("Failed to get device %ld ready\n", device_id); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 744 | return; |
| 745 | } |
| 746 | |
| 747 | DeviceTy& Device = Devices[device_id]; |
| 748 | |
| 749 | // process each input. |
| 750 | for (int32_t i = 0; i < arg_num; ++i) { |
| 751 | if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || |
| 752 | (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) |
| 753 | continue; |
| 754 | |
| 755 | void *HstPtrBegin = args[i]; |
| 756 | int64_t MapSize = arg_sizes[i]; |
| 757 | bool IsLast; |
| 758 | void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, |
| 759 | false); |
| 760 | |
| 761 | if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { |
| 762 | DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", |
| 763 | arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); |
| 764 | Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); |
| 765 | |
| 766 | uintptr_t lb = (uintptr_t) HstPtrBegin; |
| 767 | uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; |
| 768 | Device.ShadowMtx.lock(); |
| 769 | for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); |
| 770 | it != Device.ShadowPtrMap.end(); ++it) { |
| 771 | void **ShadowHstPtrAddr = (void**) it->first; |
| 772 | if ((uintptr_t) ShadowHstPtrAddr < lb) |
| 773 | continue; |
| 774 | if ((uintptr_t) ShadowHstPtrAddr >= ub) |
| 775 | break; |
| 776 | DP("Restoring original host pointer value " DPxMOD " for host pointer " |
| 777 | DPxMOD "\n", DPxPTR(it->second.HstPtrVal), |
| 778 | DPxPTR(ShadowHstPtrAddr)); |
| 779 | *ShadowHstPtrAddr = it->second.HstPtrVal; |
| 780 | } |
| 781 | Device.ShadowMtx.unlock(); |
| 782 | } |
| 783 | |
| 784 | if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { |
| 785 | DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", |
| 786 | arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); |
| 787 | Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); |
| 788 | |
| 789 | uintptr_t lb = (uintptr_t) HstPtrBegin; |
| 790 | uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; |
| 791 | Device.ShadowMtx.lock(); |
| 792 | for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); |
| 793 | it != Device.ShadowPtrMap.end(); ++it) { |
| 794 | void **ShadowHstPtrAddr = (void**) it->first; |
| 795 | if ((uintptr_t) ShadowHstPtrAddr < lb) |
| 796 | continue; |
| 797 | if ((uintptr_t) ShadowHstPtrAddr >= ub) |
| 798 | break; |
| 799 | DP("Restoring original target pointer value " DPxMOD " for target " |
| 800 | "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal), |
| 801 | DPxPTR(it->second.TgtPtrAddr)); |
| 802 | Device.data_submit(it->second.TgtPtrAddr, |
| 803 | &it->second.TgtPtrVal, sizeof(void *)); |
| 804 | } |
| 805 | Device.ShadowMtx.unlock(); |
| 806 | } |
| 807 | } |
| 808 | } |
| 809 | |
| 810 | EXTERN void __tgt_target_data_update_nowait( |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 811 | int64_t device_id, int32_t arg_num, void **args_base, void **args, |
| 812 | int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 813 | int32_t noAliasDepNum, void *noAliasDepList) { |
| 814 | if (depNum + noAliasDepNum > 0) |
| 815 | __kmpc_omp_taskwait(NULL, 0); |
| 816 | |
| 817 | __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, |
| 818 | arg_types); |
| 819 | } |
| 820 | |
| 821 | /// performs the same actions as data_begin in case arg_num is |
| 822 | /// non-zero and initiates run of the offloaded region on the target platform; |
| 823 | /// if arg_num is non-zero after the region execution is done it also |
| 824 | /// performs the same action as data_update and data_end above. This function |
| 825 | /// returns 0 if it was able to transfer the execution to a target and an |
| 826 | /// integer different from zero otherwise. |
Jonas Hahnfeld | 4332280 | 2017-12-06 21:59:07 +0000 | [diff] [blame] | 827 | int target(int64_t device_id, void *host_ptr, int32_t arg_num, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 828 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, |
| 829 | int32_t team_num, int32_t thread_limit, int IsTeamConstruct) { |
| 830 | DeviceTy &Device = Devices[device_id]; |
| 831 | |
| 832 | // Find the table information in the map or look it up in the translation |
| 833 | // tables. |
| 834 | TableMap *TM = 0; |
| 835 | TblMapMtx.lock(); |
| 836 | HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr); |
| 837 | if (TableMapIt == HostPtrToTableMap.end()) { |
| 838 | // We don't have a map. So search all the registered libraries. |
| 839 | TrlTblMtx.lock(); |
| 840 | for (HostEntriesBeginToTransTableTy::iterator |
| 841 | ii = HostEntriesBeginToTransTable.begin(), |
| 842 | ie = HostEntriesBeginToTransTable.end(); |
| 843 | !TM && ii != ie; ++ii) { |
| 844 | // get the translation table (which contains all the good info). |
| 845 | TranslationTable *TransTable = &ii->second; |
| 846 | // iterate over all the host table entries to see if we can locate the |
| 847 | // host_ptr. |
| 848 | __tgt_offload_entry *begin = TransTable->HostTable.EntriesBegin; |
| 849 | __tgt_offload_entry *end = TransTable->HostTable.EntriesEnd; |
| 850 | __tgt_offload_entry *cur = begin; |
| 851 | for (uint32_t i = 0; cur < end; ++cur, ++i) { |
| 852 | if (cur->addr != host_ptr) |
| 853 | continue; |
| 854 | // we got a match, now fill the HostPtrToTableMap so that we |
| 855 | // may avoid this search next time. |
| 856 | TM = &HostPtrToTableMap[host_ptr]; |
| 857 | TM->Table = TransTable; |
| 858 | TM->Index = i; |
| 859 | break; |
| 860 | } |
| 861 | } |
| 862 | TrlTblMtx.unlock(); |
| 863 | } else { |
| 864 | TM = &TableMapIt->second; |
| 865 | } |
| 866 | TblMapMtx.unlock(); |
| 867 | |
| 868 | // No map for this host pointer found! |
| 869 | if (!TM) { |
| 870 | DP("Host ptr " DPxMOD " does not have a matching target pointer.\n", |
| 871 | DPxPTR(host_ptr)); |
| 872 | return OFFLOAD_FAIL; |
| 873 | } |
| 874 | |
| 875 | // get target table. |
| 876 | TrlTblMtx.lock(); |
| 877 | assert(TM->Table->TargetsTable.size() > (size_t)device_id && |
| 878 | "Not expecting a device ID outside the table's bounds!"); |
| 879 | __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id]; |
| 880 | TrlTblMtx.unlock(); |
| 881 | assert(TargetTable && "Global data has not been mapped\n"); |
| 882 | |
| 883 | // Move data to device. |
| 884 | int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, |
| 885 | arg_types); |
| 886 | |
| 887 | if (rc != OFFLOAD_SUCCESS) { |
| 888 | DP("Call to target_data_begin failed, skipping target execution.\n"); |
| 889 | // Call target_data_end to dealloc whatever target_data_begin allocated |
| 890 | // and return OFFLOAD_FAIL. |
| 891 | target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); |
| 892 | return OFFLOAD_FAIL; |
| 893 | } |
| 894 | |
| 895 | std::vector<void *> tgt_args; |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 896 | std::vector<ptrdiff_t> tgt_offsets; |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 897 | |
| 898 | // List of (first-)private arrays allocated for this target region |
| 899 | std::vector<void *> fpArrays; |
| 900 | |
| 901 | for (int32_t i = 0; i < arg_num; ++i) { |
| 902 | if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) { |
| 903 | // This is not a target parameter, do not push it into tgt_args. |
| 904 | continue; |
| 905 | } |
| 906 | void *HstPtrBegin = args[i]; |
| 907 | void *HstPtrBase = args_base[i]; |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 908 | void *TgtPtrBegin; |
| 909 | ptrdiff_t TgtBaseOffset; |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 910 | bool IsLast; // unused. |
| 911 | if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) { |
| 912 | DP("Forwarding first-private value " DPxMOD " to the target construct\n", |
| 913 | DPxPTR(HstPtrBase)); |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 914 | TgtPtrBegin = HstPtrBase; |
| 915 | TgtBaseOffset = 0; |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 916 | } else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) { |
| 917 | // Allocate memory for (first-)private array |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 918 | TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID, |
| 919 | arg_sizes[i], HstPtrBegin); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 920 | if (!TgtPtrBegin) { |
| 921 | DP ("Data allocation for %sprivate array " DPxMOD " failed\n", |
| 922 | (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), |
| 923 | DPxPTR(HstPtrBegin)); |
| 924 | rc = OFFLOAD_FAIL; |
| 925 | break; |
| 926 | } else { |
| 927 | fpArrays.push_back(TgtPtrBegin); |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 928 | TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; |
Samuel Antao | 8933ffb | 2017-06-09 16:46:07 +0000 | [diff] [blame] | 929 | #ifdef OMPTARGET_DEBUG |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 930 | void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 931 | DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for " |
| 932 | "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n", |
| 933 | arg_sizes[i], DPxPTR(TgtPtrBegin), |
| 934 | (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), |
| 935 | DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase)); |
Samuel Antao | 8933ffb | 2017-06-09 16:46:07 +0000 | [diff] [blame] | 936 | #endif |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 937 | // If first-private, copy data from host |
| 938 | if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { |
| 939 | int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); |
| 940 | if (rt != OFFLOAD_SUCCESS) { |
| 941 | DP ("Copying data to device failed.\n"); |
| 942 | rc = OFFLOAD_FAIL; |
| 943 | break; |
| 944 | } |
| 945 | } |
| 946 | } |
| 947 | } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 948 | TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, |
| 949 | false); |
| 950 | TgtBaseOffset = 0; // no offset for ptrs. |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 951 | DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to " |
| 952 | "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), |
| 953 | DPxPTR(HstPtrBase)); |
| 954 | } else { |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 955 | TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast, |
| 956 | false); |
| 957 | TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; |
Samuel Antao | 8933ffb | 2017-06-09 16:46:07 +0000 | [diff] [blame] | 958 | #ifdef OMPTARGET_DEBUG |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 959 | void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 960 | DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n", |
| 961 | DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin)); |
Samuel Antao | 8933ffb | 2017-06-09 16:46:07 +0000 | [diff] [blame] | 962 | #endif |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 963 | } |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 964 | tgt_args.push_back(TgtPtrBegin); |
| 965 | tgt_offsets.push_back(TgtBaseOffset); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 966 | } |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 967 | |
| 968 | assert(tgt_args.size() == tgt_offsets.size() && |
| 969 | "Size mismatch in arguments and offsets"); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 970 | |
| 971 | // Pop loop trip count |
| 972 | uint64_t ltc = Device.loopTripCnt; |
| 973 | Device.loopTripCnt = 0; |
| 974 | |
| 975 | // Launch device execution. |
| 976 | if (rc == OFFLOAD_SUCCESS) { |
| 977 | DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", |
| 978 | TargetTable->EntriesBegin[TM->Index].name, |
| 979 | DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index); |
| 980 | if (IsTeamConstruct) { |
| 981 | rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 982 | &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num, |
| 983 | thread_limit, ltc); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 984 | } else { |
| 985 | rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, |
George Rokos | 1546d31 | 2017-05-10 14:12:36 +0000 | [diff] [blame] | 986 | &tgt_args[0], &tgt_offsets[0], tgt_args.size()); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 987 | } |
| 988 | } else { |
| 989 | DP("Errors occurred while obtaining target arguments, skipping kernel " |
| 990 | "execution\n"); |
| 991 | } |
| 992 | |
| 993 | // Deallocate (first-)private arrays |
| 994 | for (auto it : fpArrays) { |
| 995 | int rt = Device.RTL->data_delete(Device.RTLDeviceID, it); |
| 996 | if (rt != OFFLOAD_SUCCESS) { |
| 997 | DP("Deallocation of (first-)private arrays failed.\n"); |
| 998 | rc = OFFLOAD_FAIL; |
| 999 | } |
| 1000 | } |
| 1001 | |
| 1002 | // Move data from device. |
| 1003 | int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, |
| 1004 | arg_types); |
| 1005 | |
| 1006 | if (rt != OFFLOAD_SUCCESS) { |
| 1007 | DP("Call to target_data_end failed.\n"); |
| 1008 | rc = OFFLOAD_FAIL; |
| 1009 | } |
| 1010 | |
| 1011 | return rc; |
| 1012 | } |
| 1013 | |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1014 | EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, |
| 1015 | void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { |
| 1016 | DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1017 | DPxPTR(host_ptr), device_id); |
| 1018 | |
| 1019 | if (device_id == OFFLOAD_DEVICE_DEFAULT) { |
| 1020 | device_id = omp_get_default_device(); |
| 1021 | } |
| 1022 | |
| 1023 | if (CheckDevice(device_id) != OFFLOAD_SUCCESS) { |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1024 | DP("Failed to get device %ld ready\n", device_id); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1025 | return OFFLOAD_FAIL; |
| 1026 | } |
| 1027 | |
| 1028 | // Translate maps |
| 1029 | int32_t new_arg_num; |
| 1030 | void **new_args_base; |
| 1031 | void **new_args; |
| 1032 | int64_t *new_arg_sizes; |
| 1033 | int64_t *new_arg_types; |
| 1034 | translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, |
| 1035 | new_args_base, new_args, new_arg_sizes, new_arg_types, true); |
| 1036 | |
| 1037 | //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, |
| 1038 | // arg_types, 0, 0, false /*team*/, false /*recursive*/); |
| 1039 | int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args, |
| 1040 | new_arg_sizes, new_arg_types, 0, 0, false /*team*/); |
| 1041 | |
| 1042 | // Cleanup translation memory |
| 1043 | cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, |
| 1044 | new_arg_types, arg_num, args_base); |
| 1045 | |
| 1046 | return rc; |
| 1047 | } |
| 1048 | |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1049 | EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1050 | int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1051 | int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1052 | void *noAliasDepList) { |
| 1053 | if (depNum + noAliasDepNum > 0) |
| 1054 | __kmpc_omp_taskwait(NULL, 0); |
| 1055 | |
| 1056 | return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, |
| 1057 | arg_types); |
| 1058 | } |
| 1059 | |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1060 | EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1061 | int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1062 | int64_t *arg_types, int32_t team_num, int32_t thread_limit) { |
| 1063 | DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1064 | DPxPTR(host_ptr), device_id); |
| 1065 | |
| 1066 | if (device_id == OFFLOAD_DEVICE_DEFAULT) { |
| 1067 | device_id = omp_get_default_device(); |
| 1068 | } |
| 1069 | |
| 1070 | if (CheckDevice(device_id) != OFFLOAD_SUCCESS) { |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1071 | DP("Failed to get device %ld ready\n", device_id); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1072 | return OFFLOAD_FAIL; |
| 1073 | } |
| 1074 | |
| 1075 | // Translate maps |
| 1076 | int32_t new_arg_num; |
| 1077 | void **new_args_base; |
| 1078 | void **new_args; |
| 1079 | int64_t *new_arg_sizes; |
| 1080 | int64_t *new_arg_types; |
| 1081 | translate_map(arg_num, args_base, args, arg_sizes, arg_types, new_arg_num, |
| 1082 | new_args_base, new_args, new_arg_sizes, new_arg_types, true); |
| 1083 | |
| 1084 | //return target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, |
| 1085 | // arg_types, team_num, thread_limit, true /*team*/, |
| 1086 | // false /*recursive*/); |
| 1087 | int rc = target(device_id, host_ptr, new_arg_num, new_args_base, new_args, |
| 1088 | new_arg_sizes, new_arg_types, team_num, thread_limit, true /*team*/); |
| 1089 | |
| 1090 | // Cleanup translation memory |
| 1091 | cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, |
| 1092 | new_arg_types, arg_num, args_base); |
| 1093 | |
| 1094 | return rc; |
| 1095 | } |
| 1096 | |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1097 | EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1098 | int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1099 | int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1100 | void *depList, int32_t noAliasDepNum, void *noAliasDepList) { |
| 1101 | if (depNum + noAliasDepNum > 0) |
| 1102 | __kmpc_omp_taskwait(NULL, 0); |
| 1103 | |
| 1104 | return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, |
| 1105 | arg_sizes, arg_types, team_num, thread_limit); |
| 1106 | } |
| 1107 | |
| 1108 | |
| 1109 | // The trip count mechanism will be revised - this scheme is not thread-safe. |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1110 | EXTERN void __kmpc_push_target_tripcount(int64_t device_id, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1111 | uint64_t loop_tripcount) { |
| 1112 | if (device_id == OFFLOAD_DEVICE_DEFAULT) { |
| 1113 | device_id = omp_get_default_device(); |
| 1114 | } |
| 1115 | |
| 1116 | if (CheckDevice(device_id) != OFFLOAD_SUCCESS) { |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1117 | DP("Failed to get device %ld ready\n", device_id); |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1118 | return; |
| 1119 | } |
| 1120 | |
George Rokos | b92dbb4 | 2017-11-21 18:26:41 +0000 | [diff] [blame] | 1121 | DP("__kmpc_push_target_tripcount(%ld, %" PRIu64 ")\n", device_id, |
George Rokos | 2467df6 | 2017-01-25 21:27:24 +0000 | [diff] [blame] | 1122 | loop_tripcount); |
| 1123 | Devices[device_id].loopTripCnt = loop_tripcount; |
| 1124 | } |
| 1125 | |