Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 1 | //===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// This file defines the standard OpenCL implementation of the Acxxel API. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "acxxel.h" |
| 15 | |
| 16 | #include "CL/cl.h" |
| 17 | |
| 18 | #include <mutex> |
| 19 | #include <sstream> |
| 20 | #include <utility> |
| 21 | #include <vector> |
| 22 | |
| 23 | namespace acxxel { |
| 24 | |
| 25 | namespace { |
| 26 | |
| 27 | /// An ID containing the platform ID and the device ID within the platform. |
| 28 | struct FullDeviceID { |
| 29 | cl_platform_id PlatformID; |
| 30 | cl_device_id DeviceID; |
| 31 | |
| 32 | FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID) |
| 33 | : PlatformID(PlatformID), DeviceID(DeviceID) {} |
| 34 | }; |
| 35 | |
| 36 | thread_local int ActiveDeviceIndex = 0; |
| 37 | |
| 38 | static std::string getOpenCLErrorMessage(cl_int Result) { |
| 39 | if (!Result) |
| 40 | return "success"; |
| 41 | std::ostringstream OutStream; |
| 42 | OutStream << "OpenCL error: code = " << Result; |
| 43 | return OutStream.str(); |
| 44 | } |
| 45 | |
| 46 | static Status getOpenCLError(cl_int Result, const std::string &Message) { |
| 47 | if (!Result) |
| 48 | return Status(); |
| 49 | std::ostringstream OutStream; |
| 50 | OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message; |
| 51 | return Status(OutStream.str()); |
| 52 | } |
| 53 | |
| 54 | static void logOpenCLWarning(cl_int Result, const std::string &Message) { |
| 55 | if (Result) { |
| 56 | std::ostringstream OutStream; |
| 57 | OutStream << Message << ": " << getOpenCLErrorMessage(Result); |
| 58 | logWarning(OutStream.str()); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | class OpenCLPlatform : public Platform { |
| 63 | public: |
| 64 | ~OpenCLPlatform() override = default; |
| 65 | |
| 66 | static Expected<OpenCLPlatform> create(); |
| 67 | |
| 68 | Expected<int> getDeviceCount() override; |
| 69 | |
| 70 | Status setActiveDeviceForThread(int DeviceIndex) override; |
| 71 | |
| 72 | int getActiveDeviceForThread() override; |
| 73 | |
| 74 | Expected<Stream> createStream() override; |
| 75 | |
| 76 | Expected<Event> createEvent() override; |
| 77 | |
| 78 | Expected<Program> createProgramFromSource(Span<const char> Source) override; |
| 79 | |
| 80 | protected: |
| 81 | Status streamSync(void *Stream) override; |
| 82 | |
| 83 | Status streamWaitOnEvent(void *Stream, void *Event) override; |
| 84 | |
| 85 | Expected<void *> rawMallocD(ptrdiff_t ByteCount) override; |
| 86 | HandleDestructor getDeviceMemoryHandleDestructor() override; |
| 87 | void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize, |
| 88 | size_t ByteOffset) override; |
| 89 | void rawDestroyDeviceMemorySpanHandle(void *Handle) override; |
| 90 | |
| 91 | Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol) override; |
| 92 | Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol) override; |
| 93 | |
| 94 | Status rawCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, |
| 95 | void *DeviceDst, ptrdiff_t DeviceDstByteOffset, |
| 96 | ptrdiff_t ByteCount) override; |
| 97 | Status rawCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, |
| 98 | void *HostDst, ptrdiff_t ByteCount) override; |
| 99 | Status rawCopyHToD(const void *HostSrc, void *DeviceDst, |
| 100 | ptrdiff_t DeviceDstByteOffset, |
| 101 | ptrdiff_t ByteCount) override; |
| 102 | |
| 103 | Status rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, ptrdiff_t ByteCount, |
| 104 | char ByteValue) override; |
| 105 | |
| 106 | Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override; |
| 107 | HandleDestructor getUnregisterHostMemoryHandleDestructor() override; |
| 108 | |
| 109 | Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override; |
| 110 | HandleDestructor getFreeHostMemoryHandleDestructor() override; |
| 111 | |
| 112 | Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, |
| 113 | void *DeviceDst, ptrdiff_t DeviceDstByteOffset, |
| 114 | ptrdiff_t ByteCount, void *Stream) override; |
| 115 | Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, |
| 116 | void *HostDst, ptrdiff_t ByteCount, |
| 117 | void *Stream) override; |
| 118 | Status asyncCopyHToD(const void *HostSrc, void *DeviceDst, |
| 119 | ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount, |
| 120 | void *Stream) override; |
| 121 | |
| 122 | Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, |
| 123 | ptrdiff_t ByteCount, char ByteValue, |
| 124 | void *Stream) override; |
| 125 | |
| 126 | Status addStreamCallback(Stream &Stream, StreamCallback Callback) override; |
| 127 | |
| 128 | Status enqueueEvent(void *Event, void *Stream) override; |
| 129 | bool eventIsDone(void *Event) override; |
| 130 | Status eventSync(void *Event) override; |
| 131 | Expected<float> getSecondsBetweenEvents(void *StartEvent, |
| 132 | void *EndEvent) override; |
| 133 | |
| 134 | Expected<void *> rawCreateKernel(void *Program, |
| 135 | const std::string &Name) override; |
| 136 | HandleDestructor getKernelHandleDestructor() override; |
| 137 | |
| 138 | Status rawEnqueueKernelLaunch(void *Stream, void *Kernel, |
| 139 | KernelLaunchDimensions LaunchDimensions, |
| 140 | Span<void *> Arguments, |
| 141 | Span<size_t> ArgumentSizes, |
| 142 | size_t SharedMemoryBytes) override; |
| 143 | |
| 144 | private: |
| 145 | OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs, |
| 146 | std::vector<cl_context> &&Contexts, |
| 147 | std::vector<cl_command_queue> &&CommandQueues) |
| 148 | : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)), |
| 149 | CommandQueues(std::move(CommandQueues)) {} |
| 150 | |
| 151 | std::vector<FullDeviceID> FullDeviceIDs; |
| 152 | std::vector<cl_context> Contexts; |
| 153 | std::vector<cl_command_queue> CommandQueues; |
| 154 | }; |
| 155 | |
| 156 | Expected<OpenCLPlatform> OpenCLPlatform::create() { |
| 157 | constexpr cl_uint MaxNumEntries = 100; |
| 158 | cl_platform_id Platforms[MaxNumEntries]; |
| 159 | cl_uint NumPlatforms; |
| 160 | if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms)) |
| 161 | return getOpenCLError(Result, "clGetPlatformIDs"); |
| 162 | |
| 163 | std::vector<FullDeviceID> FullDeviceIDs; |
| 164 | for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms; |
| 165 | ++PlatformIndex) { |
| 166 | cl_uint NumDevices; |
| 167 | cl_device_id Devices[MaxNumEntries]; |
| 168 | if (cl_int Result = |
| 169 | clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL, |
| 170 | MaxNumEntries, Devices, &NumDevices)) |
| 171 | return getOpenCLError(Result, "clGetDeviceIDs"); |
| 172 | for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex) |
| 173 | FullDeviceIDs.emplace_back(Platforms[PlatformIndex], |
| 174 | Devices[DeviceIndex]); |
| 175 | } |
| 176 | |
| 177 | if (FullDeviceIDs.empty()) |
| 178 | return Status("No OpenCL device available on this system."); |
| 179 | |
| 180 | std::vector<cl_context> Contexts(FullDeviceIDs.size()); |
| 181 | std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size()); |
| 182 | for (size_t I = 0; I < FullDeviceIDs.size(); ++I) { |
| 183 | cl_int CreateContextResult; |
| 184 | Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID, |
| 185 | nullptr, nullptr, &CreateContextResult); |
| 186 | if (CreateContextResult) |
| 187 | return getOpenCLError(CreateContextResult, "clCreateContext"); |
| 188 | |
| 189 | cl_int CreateCommandQueueResult; |
| 190 | CommandQueues[I] = clCreateCommandQueue( |
| 191 | Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE, |
| 192 | &CreateCommandQueueResult); |
| 193 | if (CreateCommandQueueResult) |
| 194 | return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue"); |
| 195 | } |
| 196 | |
| 197 | return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts), |
| 198 | std::move(CommandQueues)); |
| 199 | } |
| 200 | |
| 201 | Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); } |
| 202 | |
| 203 | Status OpenCLPlatform::setActiveDeviceForThread(int DeviceIndex) { |
| 204 | if (static_cast<size_t>(DeviceIndex) >= FullDeviceIDs.size()) |
| 205 | return Status("Could not set active device index to " + |
| 206 | std::to_string(DeviceIndex) + " because there are only " + |
| 207 | std::to_string(FullDeviceIDs.size()) + |
| 208 | " devices in the system"); |
| 209 | ActiveDeviceIndex = DeviceIndex; |
| 210 | return Status(); |
| 211 | } |
| 212 | |
| 213 | int OpenCLPlatform::getActiveDeviceForThread() { return ActiveDeviceIndex; } |
| 214 | |
| 215 | static void openCLDestroyStream(void *H) { |
| 216 | logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)), |
| 217 | "clReleaseCommandQueue"); |
| 218 | } |
| 219 | |
| 220 | Expected<Stream> OpenCLPlatform::createStream() { |
| 221 | cl_int Result; |
| 222 | cl_command_queue Queue = clCreateCommandQueue( |
| 223 | Contexts[ActiveDeviceIndex], FullDeviceIDs[ActiveDeviceIndex].DeviceID, |
| 224 | CL_QUEUE_PROFILING_ENABLE, &Result); |
| 225 | if (Result) |
| 226 | return getOpenCLError(Result, "clCreateCommandQueue"); |
| 227 | return constructStream(this, Queue, openCLDestroyStream); |
| 228 | } |
| 229 | |
| 230 | static void openCLEventDestroy(void *H) { |
| 231 | cl_event *CLEvent = static_cast<cl_event *>(H); |
| 232 | logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent"); |
| 233 | delete CLEvent; |
| 234 | } |
| 235 | |
| 236 | Status OpenCLPlatform::streamSync(void *Stream) { |
| 237 | return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)), |
| 238 | "clFinish"); |
| 239 | } |
| 240 | |
| 241 | Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) { |
| 242 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 243 | return getOpenCLError( |
| 244 | clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1, |
| 245 | CLEvent, nullptr), |
| 246 | "clEnqueueMarkerWithWaitList"); |
| 247 | } |
| 248 | |
| 249 | Expected<Event> OpenCLPlatform::createEvent() { |
| 250 | cl_int Result; |
| 251 | cl_event Event = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result); |
| 252 | if (Result) |
| 253 | return getOpenCLError(Result, "clCreateUserEvent"); |
| 254 | if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE)) |
| 255 | return getOpenCLError(Result, "clSetUserEventStatus"); |
| 256 | return constructEvent(this, new cl_event(Event), openCLEventDestroy); |
| 257 | } |
| 258 | |
| 259 | static void openCLDestroyProgram(void *H) { |
| 260 | logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)), |
| 261 | "clReleaseProgram"); |
| 262 | } |
| 263 | |
| 264 | Expected<Program> |
| 265 | OpenCLPlatform::createProgramFromSource(Span<const char> Source) { |
| 266 | cl_int Error; |
| 267 | const char *CSource = Source.data(); |
| 268 | size_t SourceSize = Source.size(); |
| 269 | cl_program Program = clCreateProgramWithSource(Contexts[ActiveDeviceIndex], 1, |
| 270 | &CSource, &SourceSize, &Error); |
| 271 | if (Error) |
| 272 | return getOpenCLError(Error, "clCreateProgramWithSource"); |
| 273 | cl_device_id DeviceID = FullDeviceIDs[ActiveDeviceIndex].DeviceID; |
| 274 | if (cl_int Error = |
| 275 | clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr)) |
| 276 | return getOpenCLError(Error, "clBuildProgram"); |
| 277 | return constructProgram(this, Program, openCLDestroyProgram); |
| 278 | } |
| 279 | |
| 280 | Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount) { |
| 281 | cl_int Result; |
| 282 | cl_mem Memory = clCreateBuffer(Contexts[ActiveDeviceIndex], CL_MEM_READ_WRITE, |
| 283 | ByteCount, nullptr, &Result); |
| 284 | if (Result) |
| 285 | return getOpenCLError(Result, "clCreateBuffer"); |
| 286 | return reinterpret_cast<void *>(Memory); |
| 287 | } |
| 288 | |
| 289 | static void openCLDestroyDeviceMemory(void *H) { |
| 290 | logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)), |
| 291 | "clReleaseMemObject"); |
| 292 | } |
| 293 | |
| 294 | HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() { |
| 295 | return openCLDestroyDeviceMemory; |
| 296 | } |
| 297 | |
| 298 | void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle, |
| 299 | size_t ByteSize, |
| 300 | size_t ByteOffset) { |
| 301 | cl_int Error; |
| 302 | cl_buffer_region Region; |
| 303 | Region.origin = ByteOffset; |
| 304 | Region.size = ByteSize; |
| 305 | cl_mem SubBuffer = |
| 306 | clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0, |
| 307 | CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error); |
| 308 | logOpenCLWarning(Error, "clCreateSubBuffer"); |
| 309 | if (Error) |
| 310 | return nullptr; |
| 311 | return SubBuffer; |
| 312 | } |
| 313 | |
| 314 | void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) { |
| 315 | openCLDestroyDeviceMemory(Handle); |
| 316 | } |
| 317 | |
| 318 | Expected<void *> |
| 319 | OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/) { |
| 320 | // This doesn't seem to have any equivalent in OpenCL. |
| 321 | return Status("not implemented"); |
| 322 | } |
| 323 | |
| 324 | Expected<ptrdiff_t> |
| 325 | OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/) { |
| 326 | // This doesn't seem to have any equivalent in OpenCL. |
| 327 | return Status("not implemented"); |
| 328 | } |
| 329 | |
| 330 | Status OpenCLPlatform::rawCopyDToD(const void *DeviceSrc, |
| 331 | ptrdiff_t DeviceSrcByteOffset, |
| 332 | void *DeviceDst, |
| 333 | ptrdiff_t DeviceDstByteOffset, |
| 334 | ptrdiff_t ByteCount) { |
| 335 | cl_event DoneEvent; |
| 336 | if (cl_int Result = clEnqueueCopyBuffer( |
| 337 | CommandQueues[ActiveDeviceIndex], |
| 338 | static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), |
| 339 | static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset, |
| 340 | DeviceDstByteOffset, ByteCount, 0, nullptr, &DoneEvent)) |
| 341 | return getOpenCLError(Result, "clEnqueueCopyBuffer"); |
| 342 | return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents"); |
| 343 | } |
| 344 | |
| 345 | Status OpenCLPlatform::rawCopyDToH(const void *DeviceSrc, |
| 346 | ptrdiff_t DeviceSrcByteOffset, void *HostDst, |
| 347 | ptrdiff_t ByteCount) { |
| 348 | cl_event DoneEvent; |
| 349 | if (cl_int Result = clEnqueueReadBuffer( |
| 350 | CommandQueues[ActiveDeviceIndex], |
| 351 | static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), CL_TRUE, |
| 352 | DeviceSrcByteOffset, ByteCount, HostDst, 0, nullptr, &DoneEvent)) |
| 353 | return getOpenCLError(Result, "clEnqueueReadBuffer"); |
| 354 | return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents"); |
| 355 | } |
| 356 | |
| 357 | Status OpenCLPlatform::rawCopyHToD(const void *HostSrc, void *DeviceDst, |
| 358 | ptrdiff_t DeviceDstByteOffset, |
| 359 | ptrdiff_t ByteCount) { |
| 360 | cl_event DoneEvent; |
| 361 | if (cl_int Result = clEnqueueWriteBuffer( |
| 362 | CommandQueues[ActiveDeviceIndex], static_cast<cl_mem>(DeviceDst), |
| 363 | CL_TRUE, DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr, |
| 364 | &DoneEvent)) |
| 365 | return getOpenCLError(Result, "clEnqueueWriteBuffer"); |
| 366 | return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents"); |
| 367 | } |
| 368 | |
| 369 | Status OpenCLPlatform::rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, |
| 370 | ptrdiff_t ByteCount, char ByteValue) { |
| 371 | cl_event DoneEvent; |
| 372 | if (cl_int Result = clEnqueueFillBuffer( |
| 373 | CommandQueues[ActiveDeviceIndex], static_cast<cl_mem>(DeviceDst), |
| 374 | &ByteValue, 1, ByteOffset, ByteCount, 0, nullptr, &DoneEvent)) |
| 375 | return getOpenCLError(Result, "clEnqueueFillBuffer"); |
| 376 | return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents"); |
| 377 | } |
| 378 | |
| 379 | static void noOpHandleDestructor(void *) {} |
| 380 | |
| 381 | Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/, |
| 382 | ptrdiff_t /*ByteCount*/) { |
| 383 | // TODO(jhen): Do we want to do something to pin the memory here? |
| 384 | return Status(); |
| 385 | } |
| 386 | |
| 387 | HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() { |
| 388 | // TODO(jhen): Do we want to unpin the memory here? |
| 389 | return noOpHandleDestructor; |
| 390 | } |
| 391 | |
| 392 | Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) { |
| 393 | // TODO(jhen): Do we want to do something to pin the memory here? |
| 394 | return std::malloc(ByteCount); |
| 395 | } |
| 396 | |
| 397 | static void freeMemoryHandleDestructor(void *Memory) { |
| 398 | // TODO(jhen): Do we want to unpin the memory here? |
| 399 | std::free(Memory); |
| 400 | } |
| 401 | |
| 402 | HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() { |
| 403 | return freeMemoryHandleDestructor; |
| 404 | } |
| 405 | |
| 406 | Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc, |
| 407 | ptrdiff_t DeviceSrcByteOffset, |
| 408 | void *DeviceDst, |
| 409 | ptrdiff_t DeviceDstByteOffset, |
| 410 | ptrdiff_t ByteCount, void *Stream) { |
| 411 | return getOpenCLError( |
| 412 | clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream), |
| 413 | static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), |
| 414 | static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset, |
| 415 | DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr), |
| 416 | "clEnqueueCopyBuffer"); |
| 417 | } |
| 418 | |
| 419 | Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc, |
| 420 | ptrdiff_t DeviceSrcByteOffset, |
| 421 | void *HostDst, ptrdiff_t ByteCount, |
| 422 | void *Stream) { |
| 423 | return getOpenCLError( |
| 424 | clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream), |
| 425 | static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), |
| 426 | CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0, |
| 427 | nullptr, nullptr), |
| 428 | "clEnqueueReadBuffer"); |
| 429 | } |
| 430 | |
| 431 | Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst, |
| 432 | ptrdiff_t DeviceDstByteOffset, |
| 433 | ptrdiff_t ByteCount, void *Stream) { |
| 434 | return getOpenCLError( |
| 435 | clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream), |
| 436 | static_cast<cl_mem>(DeviceDst), CL_TRUE, |
| 437 | DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr, |
| 438 | nullptr), |
| 439 | "clEnqueueWriteBuffer"); |
| 440 | } |
| 441 | |
| 442 | Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, |
| 443 | ptrdiff_t ByteCount, char ByteValue, |
| 444 | void *Stream) { |
| 445 | return getOpenCLError( |
| 446 | clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream), |
| 447 | static_cast<cl_mem>(DeviceDst), &ByteValue, 1, |
| 448 | ByteOffset, ByteCount, 0, nullptr, nullptr), |
| 449 | "clEnqueueFillBuffer"); |
| 450 | } |
| 451 | |
| 452 | struct StreamCallbackUserData { |
| 453 | StreamCallbackUserData(Stream &TheStream, StreamCallback Function, |
| 454 | cl_event EndEvent) |
| 455 | : TheStream(TheStream), TheFunction(std::move(Function)), |
| 456 | EndEvent(EndEvent) {} |
| 457 | |
| 458 | Stream &TheStream; |
| 459 | StreamCallback TheFunction; |
| 460 | cl_event EndEvent; |
| 461 | }; |
| 462 | |
| 463 | // A function with the right signature to pass to clSetEventCallback. |
| 464 | void CL_CALLBACK openCLStreamCallbackShim(cl_event, |
| 465 | cl_int EventCommandExecStatus, |
| 466 | void *UserData) { |
| 467 | std::unique_ptr<StreamCallbackUserData> Data( |
| 468 | static_cast<StreamCallbackUserData *>(UserData)); |
| 469 | Data->TheFunction( |
| 470 | Data->TheStream, |
| 471 | getOpenCLError(EventCommandExecStatus, "stream callback error state")); |
| 472 | if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE)) |
| 473 | logOpenCLWarning(Result, "clSetUserEventStatus"); |
| 474 | if (cl_int Result = clReleaseEvent(Data->EndEvent)) |
| 475 | logOpenCLWarning(Result, "clReleaseEvent"); |
| 476 | } |
| 477 | |
| 478 | Status OpenCLPlatform::addStreamCallback(Stream &TheStream, |
| 479 | StreamCallback Callback) { |
| 480 | cl_int Result; |
| 481 | cl_event StartEvent = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result); |
| 482 | if (Result) |
| 483 | return getOpenCLError(Result, "clCreateUserEvent"); |
| 484 | cl_event EndEvent = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result); |
| 485 | if (Result) |
| 486 | return getOpenCLError(Result, "clCreateUserEvent"); |
| 487 | cl_event StartBarrierEvent; |
| 488 | if (cl_int Result = clEnqueueBarrierWithWaitList( |
| 489 | static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1, |
| 490 | &StartEvent, &StartBarrierEvent)) |
| 491 | return getOpenCLError(Result, "clEnqueueBarrierWithWaitList"); |
| 492 | |
| 493 | if (cl_int Result = clEnqueueBarrierWithWaitList( |
| 494 | static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1, |
| 495 | &EndEvent, nullptr)) |
| 496 | return getOpenCLError(Result, "clEnqueueBarrierWithWaitList"); |
| 497 | |
| 498 | std::unique_ptr<StreamCallbackUserData> UserData( |
| 499 | new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent)); |
| 500 | if (cl_int Result = |
| 501 | clSetEventCallback(StartBarrierEvent, CL_RUNNING, |
| 502 | openCLStreamCallbackShim, UserData.release())) |
| 503 | return getOpenCLError(Result, "clSetEventCallback"); |
| 504 | |
| 505 | if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE)) |
| 506 | return getOpenCLError(Result, "clSetUserEventStatus"); |
| 507 | |
| 508 | if (cl_int Result = clReleaseEvent(StartBarrierEvent)) |
| 509 | return getOpenCLError(Result, "clReleaseEvent"); |
| 510 | |
| 511 | return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent"); |
| 512 | } |
| 513 | |
| 514 | Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) { |
| 515 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 516 | cl_event OldEvent = *CLEvent; |
| 517 | cl_event NewEvent; |
| 518 | if (cl_int Result = clEnqueueMarkerWithWaitList( |
| 519 | static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent)) |
| 520 | return getOpenCLError(Result, "clEnqueueMarkerWithWaitList"); |
| 521 | *CLEvent = NewEvent; |
| 522 | return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent"); |
| 523 | } |
| 524 | |
| 525 | bool OpenCLPlatform::eventIsDone(void *Event) { |
| 526 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 527 | cl_int EventStatus; |
| 528 | logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, |
| 529 | sizeof(EventStatus), &EventStatus, nullptr), |
| 530 | "clGetEventInfo"); |
| 531 | return EventStatus == CL_COMPLETE || EventStatus < 0; |
| 532 | } |
| 533 | |
| 534 | Status OpenCLPlatform::eventSync(void *Event) { |
| 535 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 536 | return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents"); |
| 537 | } |
| 538 | |
| 539 | Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent, |
| 540 | void *EndEvent) { |
| 541 | cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent); |
| 542 | cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent); |
| 543 | |
| 544 | cl_profiling_info ParamName = CL_PROFILING_COMMAND_END; |
| 545 | cl_ulong StartNanoseconds; |
| 546 | cl_ulong EndNanoseconds; |
| 547 | if (cl_int Result = |
| 548 | clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong), |
| 549 | &StartNanoseconds, nullptr)) |
| 550 | return getOpenCLError(Result, "clGetEventProfilingInfo"); |
| 551 | if (cl_int Result = clGetEventProfilingInfo( |
| 552 | *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr)) |
| 553 | return getOpenCLError(Result, "clGetEventProfilingInfo"); |
| 554 | return (EndNanoseconds - StartNanoseconds) * 1e-12; |
| 555 | } |
| 556 | |
| 557 | Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program, |
| 558 | const std::string &Name) { |
| 559 | |
| 560 | cl_int Error; |
| 561 | cl_kernel Kernel = |
| 562 | clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error); |
| 563 | if (Error) |
| 564 | return getOpenCLError(Error, "clCreateKernel"); |
| 565 | return Kernel; |
| 566 | } |
| 567 | |
| 568 | static void openCLDestroyKernel(void *H) { |
| 569 | logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)), |
| 570 | "clReleaseKernel"); |
| 571 | } |
| 572 | |
| 573 | HandleDestructor OpenCLPlatform::getKernelHandleDestructor() { |
| 574 | return openCLDestroyKernel; |
| 575 | } |
| 576 | |
| 577 | Status OpenCLPlatform::rawEnqueueKernelLaunch( |
| 578 | void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions, |
| 579 | Span<void *> Arguments, Span<size_t> ArgumentSizes, |
| 580 | size_t SharedMemoryBytes) { |
| 581 | if (SharedMemoryBytes != 0) |
| 582 | return Status("OpenCL kernel launches only accept zero for the shared " |
| 583 | "memory byte size"); |
| 584 | cl_kernel TheKernel = static_cast<cl_kernel>(Kernel); |
| 585 | for (int I = 0; I < Arguments.size(); ++I) |
| 586 | if (cl_int Error = |
| 587 | clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I])) |
| 588 | return getOpenCLError(Error, "clSetKernelArg"); |
| 589 | size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY, |
| 590 | LaunchDimensions.BlockZ}; |
| 591 | size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX, |
| 592 | LaunchDimensions.BlockY * LaunchDimensions.GridY, |
| 593 | LaunchDimensions.BlockZ * LaunchDimensions.GridZ}; |
| 594 | return getOpenCLError( |
| 595 | clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel, |
| 596 | 3, nullptr, GlobalWorkSize, LocalWorkSize, 0, |
| 597 | nullptr, nullptr), |
| 598 | "clEnqueueNDRangeKernel"); |
| 599 | } |
| 600 | |
| 601 | } // namespace |
| 602 | |
| 603 | namespace opencl { |
| 604 | |
| 605 | /// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a |
| 606 | /// Platform. |
| 607 | Expected<Platform *> getPlatform() { |
| 608 | static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> { |
| 609 | Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create(); |
| 610 | if (CreationResult.isError()) |
| 611 | return CreationResult.getError(); |
| 612 | else |
| 613 | return new OpenCLPlatform(CreationResult.takeValue()); |
| 614 | }(); |
| 615 | return MaybePlatform; |
| 616 | } |
| 617 | |
| 618 | } // namespace opencl |
| 619 | |
| 620 | } // namespace acxxel |