Jim Cownie | 33f7b24 | 2014-04-09 15:40:23 +0000 | [diff] [blame] | 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is dual licensed under the MIT and the University of Illinois Open |
| 6 | // Source Licenses. See LICENSE.txt for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | |
| 11 | #include "offload_myo_host.h" |
| 12 | #include <errno.h> |
| 13 | #include <malloc.h> |
| 14 | #include "offload_host.h" |
| 15 | |
| 16 | #if defined(LINUX) || defined(FREEBSD) |
| 17 | #include <mm_malloc.h> |
| 18 | #endif |
| 19 | |
| 20 | #define MYO_VERSION1 "MYO_1.0" |
| 21 | |
| 22 | extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t); |
| 23 | extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t); |
| 24 | |
| 25 | #ifndef TARGET_WINNT |
| 26 | #pragma weak __cilkrts_cilk_for_32 |
| 27 | #pragma weak __cilkrts_cilk_for_64 |
| 28 | #endif // TARGET_WINNT |
| 29 | |
| 30 | #ifdef TARGET_WINNT |
| 31 | #define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1) |
| 32 | #else // TARGET_WINNT |
| 33 | #define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0) |
| 34 | #endif // TARGET_WINNT |
| 35 | |
| 36 | class MyoWrapper { |
| 37 | public: |
| 38 | MyoWrapper() : m_lib_handle(0), m_is_available(false) |
| 39 | {} |
| 40 | |
| 41 | bool is_available() const { |
| 42 | return m_is_available; |
| 43 | } |
| 44 | |
| 45 | bool LoadLibrary(void); |
| 46 | |
| 47 | // unloads the library |
| 48 | void UnloadLibrary(void) { |
| 49 | // if (m_lib_handle != 0) { |
| 50 | // DL_close(m_lib_handle); |
| 51 | // m_lib_handle = 0; |
| 52 | // } |
| 53 | } |
| 54 | |
| 55 | // Wrappers for MYO client functions |
| 56 | void LibInit(void *arg, void *func) const { |
| 57 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoinit, |
| 58 | "%s(%p, %p)\n", __func__, arg, func); |
| 59 | CheckResult(__func__, m_lib_init(arg, func)); |
| 60 | } |
| 61 | |
| 62 | void LibFini(void) const { |
| 63 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myofini, "%s()\n", __func__); |
| 64 | m_lib_fini(); |
| 65 | } |
| 66 | |
| 67 | void* SharedMalloc(size_t size) const { |
| 68 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedmalloc, |
| 69 | "%s(%lld)\n", __func__, size); |
| 70 | return m_shared_malloc(size); |
| 71 | } |
| 72 | |
| 73 | void SharedFree(void *ptr) const { |
| 74 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedfree, |
| 75 | "%s(%p)\n", __func__, ptr); |
| 76 | m_shared_free(ptr); |
| 77 | } |
| 78 | |
| 79 | void* SharedAlignedMalloc(size_t size, size_t align) const { |
| 80 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedmalloc, |
| 81 | "%s(%lld, %lld)\n", __func__, size, align); |
| 82 | return m_shared_aligned_malloc(size, align); |
| 83 | } |
| 84 | |
| 85 | void SharedAlignedFree(void *ptr) const { |
| 86 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedfree, |
| 87 | "%s(%p)\n", __func__, ptr); |
| 88 | m_shared_aligned_free(ptr); |
| 89 | } |
| 90 | |
| 91 | void Acquire(void) const { |
| 92 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoacquire, |
| 93 | "%s()\n", __func__); |
| 94 | CheckResult(__func__, m_acquire()); |
| 95 | } |
| 96 | |
| 97 | void Release(void) const { |
| 98 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myorelease, |
| 99 | "%s()\n", __func__); |
| 100 | CheckResult(__func__, m_release()); |
| 101 | } |
| 102 | |
| 103 | void HostVarTablePropagate(void *table, int num_entries) const { |
| 104 | OFFLOAD_DEBUG_TRACE(4, "%s(%p, %d)\n", __func__, table, num_entries); |
| 105 | CheckResult(__func__, m_host_var_table_propagate(table, num_entries)); |
| 106 | } |
| 107 | |
| 108 | void HostFptrTableRegister(void *table, int num_entries, |
| 109 | int ordered) const { |
| 110 | OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoregister, |
| 111 | "%s(%p, %d, %d)\n", __func__, table, |
| 112 | num_entries, ordered); |
| 113 | CheckResult(__func__, |
| 114 | m_host_fptr_table_register(table, num_entries, ordered)); |
| 115 | } |
| 116 | |
| 117 | void RemoteThunkCall(void *thunk, void *args, int device) { |
| 118 | OFFLOAD_DEBUG_TRACE(4, "%s(%p, %p, %d)\n", __func__, thunk, args, |
| 119 | device); |
| 120 | CheckResult(__func__, m_remote_thunk_call(thunk, args, device)); |
| 121 | } |
| 122 | |
| 123 | MyoiRFuncCallHandle RemoteCall(char *func, void *args, int device) const { |
| 124 | OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args, |
| 125 | device); |
| 126 | return m_remote_call(func, args, device); |
| 127 | } |
| 128 | |
| 129 | void GetResult(MyoiRFuncCallHandle handle) const { |
| 130 | OFFLOAD_DEBUG_TRACE(4, "%s(%p)\n", __func__, handle); |
| 131 | CheckResult(__func__, m_get_result(handle)); |
| 132 | } |
| 133 | |
| 134 | private: |
| 135 | void CheckResult(const char *func, MyoError error) const { |
| 136 | if (error != MYO_SUCCESS) { |
| 137 | LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error); |
| 138 | exit(1); |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | private: |
| 143 | void* m_lib_handle; |
| 144 | bool m_is_available; |
| 145 | |
| 146 | // pointers to functions from myo library |
| 147 | MyoError (*m_lib_init)(void*, void*); |
| 148 | void (*m_lib_fini)(void); |
| 149 | void* (*m_shared_malloc)(size_t); |
| 150 | void (*m_shared_free)(void*); |
| 151 | void* (*m_shared_aligned_malloc)(size_t, size_t); |
| 152 | void (*m_shared_aligned_free)(void*); |
| 153 | MyoError (*m_acquire)(void); |
| 154 | MyoError (*m_release)(void); |
| 155 | MyoError (*m_host_var_table_propagate)(void*, int); |
| 156 | MyoError (*m_host_fptr_table_register)(void*, int, int); |
| 157 | MyoError (*m_remote_thunk_call)(void*, void*, int); |
| 158 | MyoiRFuncCallHandle (*m_remote_call)(char*, void*, int); |
| 159 | MyoError (*m_get_result)(MyoiRFuncCallHandle); |
| 160 | }; |
| 161 | |
| 162 | bool MyoWrapper::LoadLibrary(void) |
| 163 | { |
| 164 | #ifndef TARGET_WINNT |
| 165 | const char *lib_name = "libmyo-client.so"; |
| 166 | #else // TARGET_WINNT |
| 167 | const char *lib_name = "myo-client.dll"; |
| 168 | #endif // TARGET_WINNT |
| 169 | |
| 170 | OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name); |
| 171 | |
| 172 | m_lib_handle = DL_open(lib_name); |
| 173 | if (m_lib_handle == 0) { |
| 174 | OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n", |
| 175 | errno); |
| 176 | return false; |
| 177 | } |
| 178 | |
| 179 | m_lib_init = (MyoError (*)(void*, void*)) |
| 180 | DL_sym(m_lib_handle, "myoiLibInit", MYO_VERSION1); |
| 181 | if (m_lib_init == 0) { |
| 182 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 183 | "myoiLibInit"); |
| 184 | UnloadLibrary(); |
| 185 | return false; |
| 186 | } |
| 187 | |
| 188 | m_lib_fini = (void (*)(void)) |
| 189 | DL_sym(m_lib_handle, "myoiLibFini", MYO_VERSION1); |
| 190 | if (m_lib_fini == 0) { |
| 191 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 192 | "myoiLibFini"); |
| 193 | UnloadLibrary(); |
| 194 | return false; |
| 195 | } |
| 196 | |
| 197 | m_shared_malloc = (void* (*)(size_t)) |
| 198 | DL_sym(m_lib_handle, "myoSharedMalloc", MYO_VERSION1); |
| 199 | if (m_shared_malloc == 0) { |
| 200 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 201 | "myoSharedMalloc"); |
| 202 | UnloadLibrary(); |
| 203 | return false; |
| 204 | } |
| 205 | |
| 206 | m_shared_free = (void (*)(void*)) |
| 207 | DL_sym(m_lib_handle, "myoSharedFree", MYO_VERSION1); |
| 208 | if (m_shared_free == 0) { |
| 209 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 210 | "myoSharedFree"); |
| 211 | UnloadLibrary(); |
| 212 | return false; |
| 213 | } |
| 214 | |
| 215 | m_shared_aligned_malloc = (void* (*)(size_t, size_t)) |
| 216 | DL_sym(m_lib_handle, "myoSharedAlignedMalloc", MYO_VERSION1); |
| 217 | if (m_shared_aligned_malloc == 0) { |
| 218 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 219 | "myoSharedAlignedMalloc"); |
| 220 | UnloadLibrary(); |
| 221 | return false; |
| 222 | } |
| 223 | |
| 224 | m_shared_aligned_free = (void (*)(void*)) |
| 225 | DL_sym(m_lib_handle, "myoSharedAlignedFree", MYO_VERSION1); |
| 226 | if (m_shared_aligned_free == 0) { |
| 227 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 228 | "myoSharedAlignedFree"); |
| 229 | UnloadLibrary(); |
| 230 | return false; |
| 231 | } |
| 232 | |
| 233 | m_acquire = (MyoError (*)(void)) |
| 234 | DL_sym(m_lib_handle, "myoAcquire", MYO_VERSION1); |
| 235 | if (m_acquire == 0) { |
| 236 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 237 | "myoAcquire"); |
| 238 | UnloadLibrary(); |
| 239 | return false; |
| 240 | } |
| 241 | |
| 242 | m_release = (MyoError (*)(void)) |
| 243 | DL_sym(m_lib_handle, "myoRelease", MYO_VERSION1); |
| 244 | if (m_release == 0) { |
| 245 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 246 | "myoRelease"); |
| 247 | UnloadLibrary(); |
| 248 | return false; |
| 249 | } |
| 250 | |
| 251 | m_host_var_table_propagate = (MyoError (*)(void*, int)) |
| 252 | DL_sym(m_lib_handle, "myoiHostVarTablePropagate", MYO_VERSION1); |
| 253 | if (m_host_var_table_propagate == 0) { |
| 254 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 255 | "myoiHostVarTablePropagate"); |
| 256 | UnloadLibrary(); |
| 257 | return false; |
| 258 | } |
| 259 | |
| 260 | m_host_fptr_table_register = (MyoError (*)(void*, int, int)) |
| 261 | DL_sym(m_lib_handle, "myoiHostFptrTableRegister", MYO_VERSION1); |
| 262 | if (m_host_fptr_table_register == 0) { |
| 263 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 264 | "myoiHostFptrTableRegister"); |
| 265 | UnloadLibrary(); |
| 266 | return false; |
| 267 | } |
| 268 | |
| 269 | m_remote_thunk_call = (MyoError (*)(void*, void*, int)) |
| 270 | DL_sym(m_lib_handle, "myoiRemoteThunkCall", MYO_VERSION1); |
| 271 | if (m_remote_thunk_call == 0) { |
| 272 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 273 | "myoiRemoteThunkCall"); |
| 274 | UnloadLibrary(); |
| 275 | return false; |
| 276 | } |
| 277 | |
| 278 | m_remote_call = (MyoiRFuncCallHandle (*)(char*, void*, int)) |
| 279 | DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1); |
| 280 | if (m_remote_call == 0) { |
| 281 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 282 | "myoiRemoteCall"); |
| 283 | UnloadLibrary(); |
| 284 | return false; |
| 285 | } |
| 286 | |
| 287 | m_get_result = (MyoError (*)(MyoiRFuncCallHandle)) |
| 288 | DL_sym(m_lib_handle, "myoiGetResult", MYO_VERSION1); |
| 289 | if (m_get_result == 0) { |
| 290 | OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n", |
| 291 | "myoiGetResult"); |
| 292 | UnloadLibrary(); |
| 293 | return false; |
| 294 | } |
| 295 | |
| 296 | OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n"); |
| 297 | |
| 298 | m_is_available = true; |
| 299 | |
| 300 | return true; |
| 301 | } |
| 302 | |
| 303 | static bool myo_is_available; |
| 304 | static MyoWrapper myo_wrapper; |
| 305 | |
| 306 | struct MyoTable |
| 307 | { |
| 308 | MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len) |
| 309 | {} |
| 310 | |
| 311 | SharedTableEntry* var_tab; |
| 312 | int var_tab_len; |
| 313 | }; |
| 314 | |
| 315 | typedef std::list<MyoTable> MyoTableList; |
| 316 | static MyoTableList __myo_table_list; |
| 317 | static mutex_t __myo_table_lock; |
| 318 | static bool __myo_tables = false; |
| 319 | |
| 320 | static void __offload_myo_shared_table_register(SharedTableEntry *entry); |
| 321 | static void __offload_myo_shared_init_table_register(InitTableEntry* entry); |
| 322 | static void __offload_myo_fptr_table_register(FptrTableEntry *entry); |
| 323 | |
| 324 | static void __offload_myoLoadLibrary_once(void) |
| 325 | { |
| 326 | if (__offload_init_library()) { |
| 327 | myo_wrapper.LoadLibrary(); |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | static bool __offload_myoLoadLibrary(void) |
| 332 | { |
| 333 | static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT; |
| 334 | __offload_run_once(&ctrl, __offload_myoLoadLibrary_once); |
| 335 | |
| 336 | return myo_wrapper.is_available(); |
| 337 | } |
| 338 | |
| 339 | static void __offload_myoInit_once(void) |
| 340 | { |
| 341 | if (!__offload_myoLoadLibrary()) { |
| 342 | return; |
| 343 | } |
| 344 | |
| 345 | // initialize all devices |
| 346 | for (int i = 0; i < mic_engines_total; i++) { |
| 347 | mic_engines[i].init(); |
| 348 | } |
| 349 | |
| 350 | // load and initialize MYO library |
| 351 | OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n"); |
| 352 | |
| 353 | COIEVENT events[MIC_ENGINES_MAX]; |
| 354 | MyoiUserParams params[MIC_ENGINES_MAX+1]; |
| 355 | |
| 356 | // load target library to all devices |
| 357 | for (int i = 0; i < mic_engines_total; i++) { |
| 358 | mic_engines[i].init_myo(&events[i]); |
| 359 | |
| 360 | params[i].type = MYOI_USERPARAMS_DEVID; |
| 361 | params[i].nodeid = mic_engines[i].get_physical_index() + 1; |
| 362 | } |
| 363 | |
| 364 | params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG; |
| 365 | |
| 366 | // initialize myo runtime on host |
| 367 | myo_wrapper.LibInit(params, 0); |
| 368 | |
| 369 | // wait for the target init calls to finish |
| 370 | COIRESULT res; |
| 371 | res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); |
| 372 | if (res != COI_SUCCESS) { |
| 373 | LIBOFFLOAD_ERROR(c_event_wait, res); |
| 374 | exit(1); |
| 375 | } |
| 376 | |
| 377 | myo_is_available = true; |
| 378 | |
| 379 | OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n"); |
| 380 | } |
| 381 | |
| 382 | static bool __offload_myoInit(void) |
| 383 | { |
| 384 | static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT; |
| 385 | __offload_run_once(&ctrl, __offload_myoInit_once); |
| 386 | |
| 387 | // register pending shared var tables |
| 388 | if (myo_is_available && __myo_tables) { |
| 389 | mutex_locker_t locker(__myo_table_lock); |
| 390 | |
| 391 | if (__myo_tables) { |
| 392 | // Register tables with MYO so it can propagate to target. |
| 393 | for(MyoTableList::const_iterator it = __myo_table_list.begin(); |
| 394 | it != __myo_table_list.end(); ++it) { |
| 395 | #ifdef TARGET_WINNT |
| 396 | for (SharedTableEntry *entry = it->var_tab; |
| 397 | entry->varName != MYO_TABLE_END_MARKER(); entry++) { |
| 398 | if (entry->varName == 0) { |
| 399 | continue; |
| 400 | } |
| 401 | myo_wrapper.HostVarTablePropagate(entry, 1); |
| 402 | } |
| 403 | #else // TARGET_WINNT |
| 404 | myo_wrapper.HostVarTablePropagate(it->var_tab, |
| 405 | it->var_tab_len); |
| 406 | #endif // TARGET_WINNT |
| 407 | } |
| 408 | |
| 409 | __myo_table_list.clear(); |
| 410 | __myo_tables = false; |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | return myo_is_available; |
| 415 | } |
| 416 | |
| 417 | static bool shared_table_entries( |
| 418 | SharedTableEntry *entry |
| 419 | ) |
| 420 | { |
| 421 | OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); |
| 422 | |
| 423 | for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) { |
| 424 | #ifdef TARGET_WINNT |
| 425 | if (entry->varName == 0) { |
| 426 | continue; |
| 427 | } |
| 428 | #endif // TARGET_WINNT |
| 429 | |
| 430 | return true; |
| 431 | } |
| 432 | |
| 433 | return false; |
| 434 | } |
| 435 | |
| 436 | static bool fptr_table_entries( |
| 437 | FptrTableEntry *entry |
| 438 | ) |
| 439 | { |
| 440 | OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); |
| 441 | |
| 442 | for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { |
| 443 | #ifdef TARGET_WINNT |
| 444 | if (entry->funcName == 0) { |
| 445 | continue; |
| 446 | } |
| 447 | #endif // TARGET_WINNT |
| 448 | |
| 449 | return true; |
| 450 | } |
| 451 | |
| 452 | return false; |
| 453 | } |
| 454 | |
| 455 | extern "C" void __offload_myoRegisterTables( |
| 456 | InitTableEntry* init_table, |
| 457 | SharedTableEntry *shared_table, |
| 458 | FptrTableEntry *fptr_table |
| 459 | ) |
| 460 | { |
| 461 | // check whether we need to initialize MYO library. It is |
| 462 | // initialized only if at least one myo table is not empty |
| 463 | if (shared_table_entries(shared_table) || fptr_table_entries(fptr_table)) { |
| 464 | // make sure myo library is loaded |
| 465 | __offload_myoLoadLibrary(); |
| 466 | |
| 467 | // register tables |
| 468 | __offload_myo_shared_table_register(shared_table); |
| 469 | __offload_myo_fptr_table_register(fptr_table); |
| 470 | __offload_myo_shared_init_table_register(init_table); |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | void __offload_myoFini(void) |
| 475 | { |
| 476 | if (myo_is_available) { |
| 477 | OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); |
| 478 | |
| 479 | COIEVENT events[MIC_ENGINES_MAX]; |
| 480 | |
| 481 | // kick off myoiLibFini calls on all devices |
| 482 | for (int i = 0; i < mic_engines_total; i++) { |
| 483 | mic_engines[i].fini_myo(&events[i]); |
| 484 | } |
| 485 | |
| 486 | // cleanup myo runtime on host |
| 487 | myo_wrapper.LibFini(); |
| 488 | |
| 489 | // wait for the target fini calls to finish |
| 490 | COIRESULT res; |
| 491 | res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0); |
| 492 | if (res != COI_SUCCESS) { |
| 493 | LIBOFFLOAD_ERROR(c_event_wait, res); |
| 494 | exit(1); |
| 495 | } |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | static void __offload_myo_shared_table_register( |
| 500 | SharedTableEntry *entry |
| 501 | ) |
| 502 | { |
| 503 | OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); |
| 504 | |
| 505 | SharedTableEntry *start = entry; |
| 506 | int entries = 0; |
| 507 | |
| 508 | // allocate shared memory for vars |
| 509 | for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) { |
| 510 | #ifdef TARGET_WINNT |
| 511 | if (entry->varName == 0) { |
| 512 | OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedTable entry\n"); |
| 513 | continue; |
| 514 | } |
| 515 | #endif // TARGET_WINNT |
| 516 | |
| 517 | OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedTable entry for %s @%p\n", |
| 518 | entry->varName, entry); |
| 519 | |
| 520 | // Invoke the function to create shared memory |
| 521 | reinterpret_cast<void(*)(void)>(entry->sharedAddr)(); |
| 522 | entries++; |
| 523 | } |
| 524 | |
| 525 | // and table to the list if it is not empty |
| 526 | if (entries > 0) { |
| 527 | mutex_locker_t locker(__myo_table_lock); |
| 528 | __myo_table_list.push_back(MyoTable(start, entries)); |
| 529 | __myo_tables = true; |
| 530 | } |
| 531 | } |
| 532 | |
| 533 | static void __offload_myo_shared_init_table_register(InitTableEntry* entry) |
| 534 | { |
| 535 | OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); |
| 536 | |
| 537 | #ifdef TARGET_WINNT |
| 538 | for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { |
| 539 | if (entry->funcName == 0) { |
| 540 | OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n"); |
| 541 | continue; |
| 542 | } |
| 543 | |
| 544 | // Invoke the function to init the shared memory |
| 545 | entry->func(); |
| 546 | } |
| 547 | #else // TARGET_WINNT |
| 548 | for (; entry->func != 0; entry++) { |
| 549 | // Invoke the function to init the shared memory |
| 550 | entry->func(); |
| 551 | } |
| 552 | #endif // TARGET_WINNT |
| 553 | } |
| 554 | |
| 555 | static void __offload_myo_fptr_table_register( |
| 556 | FptrTableEntry *entry |
| 557 | ) |
| 558 | { |
| 559 | OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry); |
| 560 | |
| 561 | FptrTableEntry *start = entry; |
| 562 | int entries = 0; |
| 563 | |
| 564 | for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) { |
| 565 | #ifdef TARGET_WINNT |
| 566 | if (entry->funcName == 0) { |
| 567 | OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoFptrTable entry\n"); |
| 568 | continue; |
| 569 | } |
| 570 | #endif // TARGET_WINNT |
| 571 | |
| 572 | if (!myo_wrapper.is_available()) { |
| 573 | *(static_cast<void**>(entry->localThunkAddr)) = entry->funcAddr; |
| 574 | } |
| 575 | |
| 576 | OFFLOAD_DEBUG_TRACE(4, "registering MyoFptrTable entry for %s @%p\n", |
| 577 | entry->funcName, entry); |
| 578 | |
| 579 | #ifdef TARGET_WINNT |
| 580 | if (myo_wrapper.is_available()) { |
| 581 | myo_wrapper.HostFptrTableRegister(entry, 1, false); |
| 582 | } |
| 583 | #endif // TARGET_WINNT |
| 584 | |
| 585 | entries++; |
| 586 | } |
| 587 | |
| 588 | #ifndef TARGET_WINNT |
| 589 | if (myo_wrapper.is_available() && entries > 0) { |
| 590 | myo_wrapper.HostFptrTableRegister(start, entries, false); |
| 591 | } |
| 592 | #endif // TARGET_WINNT |
| 593 | } |
| 594 | |
| 595 | extern "C" int __offload_myoIsAvailable(int target_number) |
| 596 | { |
| 597 | OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number); |
| 598 | |
| 599 | if (target_number >= -2) { |
| 600 | bool is_default_number = (target_number == -2); |
| 601 | |
| 602 | if (__offload_myoInit()) { |
| 603 | if (target_number >= 0) { |
| 604 | // User provided the device number |
| 605 | int num = target_number % mic_engines_total; |
| 606 | |
| 607 | // reserve device in ORSL |
| 608 | target_number = ORSL::reserve(num) ? num : -1; |
| 609 | } |
| 610 | else { |
| 611 | // try to use device 0 |
| 612 | target_number = ORSL::reserve(0) ? 0 : -1; |
| 613 | } |
| 614 | |
| 615 | // make sure device is initialized |
| 616 | if (target_number >= 0) { |
| 617 | mic_engines[target_number].init(); |
| 618 | } |
| 619 | } |
| 620 | else { |
| 621 | // fallback to CPU |
| 622 | target_number = -1; |
| 623 | } |
| 624 | |
| 625 | if (target_number < 0 && !is_default_number) { |
| 626 | LIBOFFLOAD_ERROR(c_device_is_not_available); |
| 627 | exit(1); |
| 628 | } |
| 629 | } |
| 630 | else { |
| 631 | LIBOFFLOAD_ERROR(c_invalid_device_number); |
| 632 | exit(1); |
| 633 | } |
| 634 | |
| 635 | return target_number; |
| 636 | } |
| 637 | |
| 638 | extern "C" void __offload_myoiRemoteIThunkCall( |
| 639 | void *thunk, |
| 640 | void *arg, |
| 641 | int target_number |
| 642 | ) |
| 643 | { |
| 644 | OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__, thunk, arg, |
| 645 | target_number); |
| 646 | |
| 647 | myo_wrapper.Release(); |
| 648 | myo_wrapper.RemoteThunkCall(thunk, arg, target_number); |
| 649 | myo_wrapper.Acquire(); |
| 650 | |
| 651 | ORSL::release(target_number); |
| 652 | } |
| 653 | |
| 654 | extern "C" void* _Offload_shared_malloc(size_t size) |
| 655 | { |
| 656 | OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size); |
| 657 | |
| 658 | if (__offload_myoLoadLibrary()) { |
| 659 | return myo_wrapper.SharedMalloc(size); |
| 660 | } |
| 661 | else { |
| 662 | return malloc(size); |
| 663 | } |
| 664 | } |
| 665 | |
| 666 | extern "C" void _Offload_shared_free(void *ptr) |
| 667 | { |
| 668 | OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr); |
| 669 | |
| 670 | if (__offload_myoLoadLibrary()) { |
| 671 | myo_wrapper.SharedFree(ptr); |
| 672 | } |
| 673 | else { |
| 674 | free(ptr); |
| 675 | } |
| 676 | } |
| 677 | |
| 678 | extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align) |
| 679 | { |
| 680 | OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align); |
| 681 | |
| 682 | if (__offload_myoLoadLibrary()) { |
| 683 | return myo_wrapper.SharedAlignedMalloc(size, align); |
| 684 | } |
| 685 | else { |
| 686 | if (align < sizeof(void*)) { |
| 687 | align = sizeof(void*); |
| 688 | } |
| 689 | return _mm_malloc(size, align); |
| 690 | } |
| 691 | } |
| 692 | |
| 693 | extern "C" void _Offload_shared_aligned_free(void *ptr) |
| 694 | { |
| 695 | OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr); |
| 696 | |
| 697 | if (__offload_myoLoadLibrary()) { |
| 698 | myo_wrapper.SharedAlignedFree(ptr); |
| 699 | } |
| 700 | else { |
| 701 | _mm_free(ptr); |
| 702 | } |
| 703 | } |
| 704 | |
| 705 | extern "C" void __intel_cilk_for_32_offload( |
| 706 | int size, |
| 707 | void (*copy_constructor)(void*, void*), |
| 708 | int target_number, |
| 709 | void *raddr, |
| 710 | void *closure_object, |
| 711 | unsigned int iters, |
| 712 | unsigned int grain_size) |
| 713 | { |
| 714 | OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); |
| 715 | |
| 716 | target_number = __offload_myoIsAvailable(target_number); |
| 717 | if (target_number >= 0) { |
| 718 | struct S { |
| 719 | void *M1; |
| 720 | unsigned int M2; |
| 721 | unsigned int M3; |
| 722 | char closure[]; |
| 723 | } *args; |
| 724 | |
| 725 | args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size); |
| 726 | args->M1 = raddr; |
| 727 | args->M2 = iters; |
| 728 | args->M3 = grain_size; |
| 729 | |
| 730 | if (copy_constructor == 0) { |
| 731 | memcpy(args->closure, closure_object, size); |
| 732 | } |
| 733 | else { |
| 734 | copy_constructor(args->closure, closure_object); |
| 735 | } |
| 736 | |
| 737 | myo_wrapper.Release(); |
| 738 | myo_wrapper.GetResult( |
| 739 | myo_wrapper.RemoteCall("__intel_cilk_for_32_offload", |
| 740 | args, target_number) |
| 741 | ); |
| 742 | myo_wrapper.Acquire(); |
| 743 | |
| 744 | _Offload_shared_free(args); |
| 745 | |
| 746 | ORSL::release(target_number); |
| 747 | } |
| 748 | else { |
| 749 | __cilkrts_cilk_for_32(raddr, |
| 750 | closure_object, |
| 751 | iters, |
| 752 | grain_size); |
| 753 | } |
| 754 | } |
| 755 | |
| 756 | extern "C" void __intel_cilk_for_64_offload( |
| 757 | int size, |
| 758 | void (*copy_constructor)(void*, void*), |
| 759 | int target_number, |
| 760 | void *raddr, |
| 761 | void *closure_object, |
| 762 | uint64_t iters, |
| 763 | uint64_t grain_size) |
| 764 | { |
| 765 | OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__); |
| 766 | |
| 767 | target_number = __offload_myoIsAvailable(target_number); |
| 768 | if (target_number >= 0) { |
| 769 | struct S { |
| 770 | void *M1; |
| 771 | uint64_t M2; |
| 772 | uint64_t M3; |
| 773 | char closure[]; |
| 774 | } *args; |
| 775 | |
| 776 | args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size); |
| 777 | args->M1 = raddr; |
| 778 | args->M2 = iters; |
| 779 | args->M3 = grain_size; |
| 780 | |
| 781 | if (copy_constructor == 0) { |
| 782 | memcpy(args->closure, closure_object, size); |
| 783 | } |
| 784 | else { |
| 785 | copy_constructor(args->closure, closure_object); |
| 786 | } |
| 787 | |
| 788 | myo_wrapper.Release(); |
| 789 | myo_wrapper.GetResult( |
| 790 | myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args, |
| 791 | target_number) |
| 792 | ); |
| 793 | myo_wrapper.Acquire(); |
| 794 | |
| 795 | _Offload_shared_free(args); |
| 796 | |
| 797 | ORSL::release(target_number); |
| 798 | } |
| 799 | else { |
| 800 | __cilkrts_cilk_for_64(raddr, |
| 801 | closure_object, |
| 802 | iters, |
| 803 | grain_size); |
| 804 | } |
| 805 | } |