Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 1 | //===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// This file defines the standard OpenCL implementation of the Acxxel API. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "acxxel.h" |
| 15 | |
| 16 | #include "CL/cl.h" |
| 17 | |
| 18 | #include <mutex> |
| 19 | #include <sstream> |
| 20 | #include <utility> |
| 21 | #include <vector> |
| 22 | |
| 23 | namespace acxxel { |
| 24 | |
| 25 | namespace { |
| 26 | |
| 27 | /// An ID containing the platform ID and the device ID within the platform. |
| 28 | struct FullDeviceID { |
| 29 | cl_platform_id PlatformID; |
| 30 | cl_device_id DeviceID; |
| 31 | |
| 32 | FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID) |
| 33 | : PlatformID(PlatformID), DeviceID(DeviceID) {} |
| 34 | }; |
| 35 | |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 36 | static std::string getOpenCLErrorMessage(cl_int Result) { |
| 37 | if (!Result) |
| 38 | return "success"; |
| 39 | std::ostringstream OutStream; |
| 40 | OutStream << "OpenCL error: code = " << Result; |
| 41 | return OutStream.str(); |
| 42 | } |
| 43 | |
| 44 | static Status getOpenCLError(cl_int Result, const std::string &Message) { |
| 45 | if (!Result) |
| 46 | return Status(); |
| 47 | std::ostringstream OutStream; |
| 48 | OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message; |
| 49 | return Status(OutStream.str()); |
| 50 | } |
| 51 | |
| 52 | static void logOpenCLWarning(cl_int Result, const std::string &Message) { |
| 53 | if (Result) { |
| 54 | std::ostringstream OutStream; |
| 55 | OutStream << Message << ": " << getOpenCLErrorMessage(Result); |
| 56 | logWarning(OutStream.str()); |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | class OpenCLPlatform : public Platform { |
| 61 | public: |
| 62 | ~OpenCLPlatform() override = default; |
| 63 | |
| 64 | static Expected<OpenCLPlatform> create(); |
| 65 | |
| 66 | Expected<int> getDeviceCount() override; |
| 67 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 68 | Expected<Stream> createStream(int DeviceIndex) override; |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 69 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 70 | Expected<Event> createEvent(int DeviceIndex) override; |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 71 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 72 | Expected<Program> createProgramFromSource(Span<const char> Source, |
| 73 | int DeviceIndex) override; |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 74 | |
| 75 | protected: |
| 76 | Status streamSync(void *Stream) override; |
| 77 | |
| 78 | Status streamWaitOnEvent(void *Stream, void *Event) override; |
| 79 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 80 | Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override; |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 81 | HandleDestructor getDeviceMemoryHandleDestructor() override; |
| 82 | void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize, |
| 83 | size_t ByteOffset) override; |
| 84 | void rawDestroyDeviceMemorySpanHandle(void *Handle) override; |
| 85 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 86 | Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol, |
| 87 | int DeviceIndex) override; |
| 88 | Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol, |
| 89 | int DeviceIndex) override; |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 90 | |
| 91 | Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override; |
| 92 | HandleDestructor getUnregisterHostMemoryHandleDestructor() override; |
| 93 | |
| 94 | Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override; |
| 95 | HandleDestructor getFreeHostMemoryHandleDestructor() override; |
| 96 | |
| 97 | Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, |
| 98 | void *DeviceDst, ptrdiff_t DeviceDstByteOffset, |
| 99 | ptrdiff_t ByteCount, void *Stream) override; |
| 100 | Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset, |
| 101 | void *HostDst, ptrdiff_t ByteCount, |
| 102 | void *Stream) override; |
| 103 | Status asyncCopyHToD(const void *HostSrc, void *DeviceDst, |
| 104 | ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount, |
| 105 | void *Stream) override; |
| 106 | |
| 107 | Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, |
| 108 | ptrdiff_t ByteCount, char ByteValue, |
| 109 | void *Stream) override; |
| 110 | |
| 111 | Status addStreamCallback(Stream &Stream, StreamCallback Callback) override; |
| 112 | |
| 113 | Status enqueueEvent(void *Event, void *Stream) override; |
| 114 | bool eventIsDone(void *Event) override; |
| 115 | Status eventSync(void *Event) override; |
| 116 | Expected<float> getSecondsBetweenEvents(void *StartEvent, |
| 117 | void *EndEvent) override; |
| 118 | |
| 119 | Expected<void *> rawCreateKernel(void *Program, |
| 120 | const std::string &Name) override; |
| 121 | HandleDestructor getKernelHandleDestructor() override; |
| 122 | |
| 123 | Status rawEnqueueKernelLaunch(void *Stream, void *Kernel, |
| 124 | KernelLaunchDimensions LaunchDimensions, |
| 125 | Span<void *> Arguments, |
| 126 | Span<size_t> ArgumentSizes, |
| 127 | size_t SharedMemoryBytes) override; |
| 128 | |
| 129 | private: |
| 130 | OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs, |
| 131 | std::vector<cl_context> &&Contexts, |
| 132 | std::vector<cl_command_queue> &&CommandQueues) |
| 133 | : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)), |
| 134 | CommandQueues(std::move(CommandQueues)) {} |
| 135 | |
| 136 | std::vector<FullDeviceID> FullDeviceIDs; |
| 137 | std::vector<cl_context> Contexts; |
| 138 | std::vector<cl_command_queue> CommandQueues; |
| 139 | }; |
| 140 | |
| 141 | Expected<OpenCLPlatform> OpenCLPlatform::create() { |
| 142 | constexpr cl_uint MaxNumEntries = 100; |
| 143 | cl_platform_id Platforms[MaxNumEntries]; |
| 144 | cl_uint NumPlatforms; |
| 145 | if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms)) |
| 146 | return getOpenCLError(Result, "clGetPlatformIDs"); |
| 147 | |
| 148 | std::vector<FullDeviceID> FullDeviceIDs; |
| 149 | for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms; |
| 150 | ++PlatformIndex) { |
| 151 | cl_uint NumDevices; |
| 152 | cl_device_id Devices[MaxNumEntries]; |
| 153 | if (cl_int Result = |
| 154 | clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL, |
| 155 | MaxNumEntries, Devices, &NumDevices)) |
| 156 | return getOpenCLError(Result, "clGetDeviceIDs"); |
| 157 | for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex) |
| 158 | FullDeviceIDs.emplace_back(Platforms[PlatformIndex], |
| 159 | Devices[DeviceIndex]); |
| 160 | } |
| 161 | |
| 162 | if (FullDeviceIDs.empty()) |
| 163 | return Status("No OpenCL device available on this system."); |
| 164 | |
| 165 | std::vector<cl_context> Contexts(FullDeviceIDs.size()); |
| 166 | std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size()); |
| 167 | for (size_t I = 0; I < FullDeviceIDs.size(); ++I) { |
| 168 | cl_int CreateContextResult; |
| 169 | Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID, |
| 170 | nullptr, nullptr, &CreateContextResult); |
| 171 | if (CreateContextResult) |
| 172 | return getOpenCLError(CreateContextResult, "clCreateContext"); |
| 173 | |
| 174 | cl_int CreateCommandQueueResult; |
| 175 | CommandQueues[I] = clCreateCommandQueue( |
| 176 | Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE, |
| 177 | &CreateCommandQueueResult); |
| 178 | if (CreateCommandQueueResult) |
| 179 | return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue"); |
| 180 | } |
| 181 | |
| 182 | return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts), |
| 183 | std::move(CommandQueues)); |
| 184 | } |
| 185 | |
| 186 | Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); } |
| 187 | |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 188 | static void openCLDestroyStream(void *H) { |
| 189 | logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)), |
| 190 | "clReleaseCommandQueue"); |
| 191 | } |
| 192 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 193 | Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) { |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 194 | cl_int Result; |
| 195 | cl_command_queue Queue = clCreateCommandQueue( |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 196 | Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID, |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 197 | CL_QUEUE_PROFILING_ENABLE, &Result); |
| 198 | if (Result) |
| 199 | return getOpenCLError(Result, "clCreateCommandQueue"); |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 200 | return constructStream(this, DeviceIndex, Queue, openCLDestroyStream); |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 201 | } |
| 202 | |
| 203 | static void openCLEventDestroy(void *H) { |
| 204 | cl_event *CLEvent = static_cast<cl_event *>(H); |
| 205 | logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent"); |
| 206 | delete CLEvent; |
| 207 | } |
| 208 | |
| 209 | Status OpenCLPlatform::streamSync(void *Stream) { |
| 210 | return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)), |
| 211 | "clFinish"); |
| 212 | } |
| 213 | |
| 214 | Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) { |
| 215 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 216 | return getOpenCLError( |
| 217 | clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1, |
| 218 | CLEvent, nullptr), |
| 219 | "clEnqueueMarkerWithWaitList"); |
| 220 | } |
| 221 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 222 | Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) { |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 223 | cl_int Result; |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 224 | cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result); |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 225 | if (Result) |
| 226 | return getOpenCLError(Result, "clCreateUserEvent"); |
| 227 | if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE)) |
| 228 | return getOpenCLError(Result, "clSetUserEventStatus"); |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 229 | return constructEvent(this, DeviceIndex, new cl_event(Event), |
| 230 | openCLEventDestroy); |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 231 | } |
| 232 | |
| 233 | static void openCLDestroyProgram(void *H) { |
| 234 | logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)), |
| 235 | "clReleaseProgram"); |
| 236 | } |
| 237 | |
| 238 | Expected<Program> |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 239 | OpenCLPlatform::createProgramFromSource(Span<const char> Source, |
| 240 | int DeviceIndex) { |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 241 | cl_int Error; |
| 242 | const char *CSource = Source.data(); |
| 243 | size_t SourceSize = Source.size(); |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 244 | cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1, |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 245 | &CSource, &SourceSize, &Error); |
| 246 | if (Error) |
| 247 | return getOpenCLError(Error, "clCreateProgramWithSource"); |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 248 | cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID; |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 249 | if (cl_int Error = |
| 250 | clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr)) |
| 251 | return getOpenCLError(Error, "clBuildProgram"); |
| 252 | return constructProgram(this, Program, openCLDestroyProgram); |
| 253 | } |
| 254 | |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 255 | Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount, |
| 256 | int DeviceIndex) { |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 257 | cl_int Result; |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 258 | cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE, |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 259 | ByteCount, nullptr, &Result); |
| 260 | if (Result) |
| 261 | return getOpenCLError(Result, "clCreateBuffer"); |
| 262 | return reinterpret_cast<void *>(Memory); |
| 263 | } |
| 264 | |
| 265 | static void openCLDestroyDeviceMemory(void *H) { |
| 266 | logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)), |
| 267 | "clReleaseMemObject"); |
| 268 | } |
| 269 | |
| 270 | HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() { |
| 271 | return openCLDestroyDeviceMemory; |
| 272 | } |
| 273 | |
| 274 | void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle, |
| 275 | size_t ByteSize, |
| 276 | size_t ByteOffset) { |
| 277 | cl_int Error; |
| 278 | cl_buffer_region Region; |
| 279 | Region.origin = ByteOffset; |
| 280 | Region.size = ByteSize; |
| 281 | cl_mem SubBuffer = |
| 282 | clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0, |
| 283 | CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error); |
| 284 | logOpenCLWarning(Error, "clCreateSubBuffer"); |
| 285 | if (Error) |
| 286 | return nullptr; |
| 287 | return SubBuffer; |
| 288 | } |
| 289 | |
| 290 | void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) { |
| 291 | openCLDestroyDeviceMemory(Handle); |
| 292 | } |
| 293 | |
| 294 | Expected<void *> |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 295 | OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/, |
| 296 | int /*DeviceIndex*/) { |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 297 | // This doesn't seem to have any equivalent in OpenCL. |
| 298 | return Status("not implemented"); |
| 299 | } |
| 300 | |
| 301 | Expected<ptrdiff_t> |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 302 | OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/, |
| 303 | int /*DeviceIndex*/) { |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 304 | // This doesn't seem to have any equivalent in OpenCL. |
| 305 | return Status("not implemented"); |
| 306 | } |
| 307 | |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 308 | static void noOpHandleDestructor(void *) {} |
| 309 | |
| 310 | Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/, |
| 311 | ptrdiff_t /*ByteCount*/) { |
| 312 | // TODO(jhen): Do we want to do something to pin the memory here? |
| 313 | return Status(); |
| 314 | } |
| 315 | |
| 316 | HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() { |
| 317 | // TODO(jhen): Do we want to unpin the memory here? |
| 318 | return noOpHandleDestructor; |
| 319 | } |
| 320 | |
| 321 | Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) { |
| 322 | // TODO(jhen): Do we want to do something to pin the memory here? |
| 323 | return std::malloc(ByteCount); |
| 324 | } |
| 325 | |
| 326 | static void freeMemoryHandleDestructor(void *Memory) { |
| 327 | // TODO(jhen): Do we want to unpin the memory here? |
| 328 | std::free(Memory); |
| 329 | } |
| 330 | |
| 331 | HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() { |
| 332 | return freeMemoryHandleDestructor; |
| 333 | } |
| 334 | |
| 335 | Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc, |
| 336 | ptrdiff_t DeviceSrcByteOffset, |
| 337 | void *DeviceDst, |
| 338 | ptrdiff_t DeviceDstByteOffset, |
| 339 | ptrdiff_t ByteCount, void *Stream) { |
| 340 | return getOpenCLError( |
| 341 | clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream), |
| 342 | static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), |
| 343 | static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset, |
| 344 | DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr), |
| 345 | "clEnqueueCopyBuffer"); |
| 346 | } |
| 347 | |
| 348 | Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc, |
| 349 | ptrdiff_t DeviceSrcByteOffset, |
| 350 | void *HostDst, ptrdiff_t ByteCount, |
| 351 | void *Stream) { |
| 352 | return getOpenCLError( |
| 353 | clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream), |
| 354 | static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), |
| 355 | CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0, |
| 356 | nullptr, nullptr), |
| 357 | "clEnqueueReadBuffer"); |
| 358 | } |
| 359 | |
| 360 | Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst, |
| 361 | ptrdiff_t DeviceDstByteOffset, |
| 362 | ptrdiff_t ByteCount, void *Stream) { |
| 363 | return getOpenCLError( |
| 364 | clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream), |
| 365 | static_cast<cl_mem>(DeviceDst), CL_TRUE, |
| 366 | DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr, |
| 367 | nullptr), |
| 368 | "clEnqueueWriteBuffer"); |
| 369 | } |
| 370 | |
| 371 | Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, |
| 372 | ptrdiff_t ByteCount, char ByteValue, |
| 373 | void *Stream) { |
| 374 | return getOpenCLError( |
| 375 | clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream), |
| 376 | static_cast<cl_mem>(DeviceDst), &ByteValue, 1, |
| 377 | ByteOffset, ByteCount, 0, nullptr, nullptr), |
| 378 | "clEnqueueFillBuffer"); |
| 379 | } |
| 380 | |
| 381 | struct StreamCallbackUserData { |
| 382 | StreamCallbackUserData(Stream &TheStream, StreamCallback Function, |
| 383 | cl_event EndEvent) |
| 384 | : TheStream(TheStream), TheFunction(std::move(Function)), |
| 385 | EndEvent(EndEvent) {} |
| 386 | |
| 387 | Stream &TheStream; |
| 388 | StreamCallback TheFunction; |
| 389 | cl_event EndEvent; |
| 390 | }; |
| 391 | |
| 392 | // A function with the right signature to pass to clSetEventCallback. |
| 393 | void CL_CALLBACK openCLStreamCallbackShim(cl_event, |
| 394 | cl_int EventCommandExecStatus, |
| 395 | void *UserData) { |
| 396 | std::unique_ptr<StreamCallbackUserData> Data( |
| 397 | static_cast<StreamCallbackUserData *>(UserData)); |
| 398 | Data->TheFunction( |
| 399 | Data->TheStream, |
| 400 | getOpenCLError(EventCommandExecStatus, "stream callback error state")); |
| 401 | if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE)) |
| 402 | logOpenCLWarning(Result, "clSetUserEventStatus"); |
| 403 | if (cl_int Result = clReleaseEvent(Data->EndEvent)) |
| 404 | logOpenCLWarning(Result, "clReleaseEvent"); |
| 405 | } |
| 406 | |
| 407 | Status OpenCLPlatform::addStreamCallback(Stream &TheStream, |
| 408 | StreamCallback Callback) { |
| 409 | cl_int Result; |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 410 | cl_event StartEvent = |
| 411 | clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result); |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 412 | if (Result) |
| 413 | return getOpenCLError(Result, "clCreateUserEvent"); |
Jason Henline | bdc410b | 2016-10-28 00:54:02 +0000 | [diff] [blame] | 414 | cl_event EndEvent = |
| 415 | clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result); |
Jason Henline | ac232dd | 2016-10-25 20:18:56 +0000 | [diff] [blame] | 416 | if (Result) |
| 417 | return getOpenCLError(Result, "clCreateUserEvent"); |
| 418 | cl_event StartBarrierEvent; |
| 419 | if (cl_int Result = clEnqueueBarrierWithWaitList( |
| 420 | static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1, |
| 421 | &StartEvent, &StartBarrierEvent)) |
| 422 | return getOpenCLError(Result, "clEnqueueBarrierWithWaitList"); |
| 423 | |
| 424 | if (cl_int Result = clEnqueueBarrierWithWaitList( |
| 425 | static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1, |
| 426 | &EndEvent, nullptr)) |
| 427 | return getOpenCLError(Result, "clEnqueueBarrierWithWaitList"); |
| 428 | |
| 429 | std::unique_ptr<StreamCallbackUserData> UserData( |
| 430 | new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent)); |
| 431 | if (cl_int Result = |
| 432 | clSetEventCallback(StartBarrierEvent, CL_RUNNING, |
| 433 | openCLStreamCallbackShim, UserData.release())) |
| 434 | return getOpenCLError(Result, "clSetEventCallback"); |
| 435 | |
| 436 | if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE)) |
| 437 | return getOpenCLError(Result, "clSetUserEventStatus"); |
| 438 | |
| 439 | if (cl_int Result = clReleaseEvent(StartBarrierEvent)) |
| 440 | return getOpenCLError(Result, "clReleaseEvent"); |
| 441 | |
| 442 | return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent"); |
| 443 | } |
| 444 | |
| 445 | Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) { |
| 446 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 447 | cl_event OldEvent = *CLEvent; |
| 448 | cl_event NewEvent; |
| 449 | if (cl_int Result = clEnqueueMarkerWithWaitList( |
| 450 | static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent)) |
| 451 | return getOpenCLError(Result, "clEnqueueMarkerWithWaitList"); |
| 452 | *CLEvent = NewEvent; |
| 453 | return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent"); |
| 454 | } |
| 455 | |
| 456 | bool OpenCLPlatform::eventIsDone(void *Event) { |
| 457 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 458 | cl_int EventStatus; |
| 459 | logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, |
| 460 | sizeof(EventStatus), &EventStatus, nullptr), |
| 461 | "clGetEventInfo"); |
| 462 | return EventStatus == CL_COMPLETE || EventStatus < 0; |
| 463 | } |
| 464 | |
| 465 | Status OpenCLPlatform::eventSync(void *Event) { |
| 466 | cl_event *CLEvent = static_cast<cl_event *>(Event); |
| 467 | return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents"); |
| 468 | } |
| 469 | |
| 470 | Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent, |
| 471 | void *EndEvent) { |
| 472 | cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent); |
| 473 | cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent); |
| 474 | |
| 475 | cl_profiling_info ParamName = CL_PROFILING_COMMAND_END; |
| 476 | cl_ulong StartNanoseconds; |
| 477 | cl_ulong EndNanoseconds; |
| 478 | if (cl_int Result = |
| 479 | clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong), |
| 480 | &StartNanoseconds, nullptr)) |
| 481 | return getOpenCLError(Result, "clGetEventProfilingInfo"); |
| 482 | if (cl_int Result = clGetEventProfilingInfo( |
| 483 | *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr)) |
| 484 | return getOpenCLError(Result, "clGetEventProfilingInfo"); |
| 485 | return (EndNanoseconds - StartNanoseconds) * 1e-12; |
| 486 | } |
| 487 | |
| 488 | Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program, |
| 489 | const std::string &Name) { |
| 490 | |
| 491 | cl_int Error; |
| 492 | cl_kernel Kernel = |
| 493 | clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error); |
| 494 | if (Error) |
| 495 | return getOpenCLError(Error, "clCreateKernel"); |
| 496 | return Kernel; |
| 497 | } |
| 498 | |
| 499 | static void openCLDestroyKernel(void *H) { |
| 500 | logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)), |
| 501 | "clReleaseKernel"); |
| 502 | } |
| 503 | |
| 504 | HandleDestructor OpenCLPlatform::getKernelHandleDestructor() { |
| 505 | return openCLDestroyKernel; |
| 506 | } |
| 507 | |
| 508 | Status OpenCLPlatform::rawEnqueueKernelLaunch( |
| 509 | void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions, |
| 510 | Span<void *> Arguments, Span<size_t> ArgumentSizes, |
| 511 | size_t SharedMemoryBytes) { |
| 512 | if (SharedMemoryBytes != 0) |
| 513 | return Status("OpenCL kernel launches only accept zero for the shared " |
| 514 | "memory byte size"); |
| 515 | cl_kernel TheKernel = static_cast<cl_kernel>(Kernel); |
| 516 | for (int I = 0; I < Arguments.size(); ++I) |
| 517 | if (cl_int Error = |
| 518 | clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I])) |
| 519 | return getOpenCLError(Error, "clSetKernelArg"); |
| 520 | size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY, |
| 521 | LaunchDimensions.BlockZ}; |
| 522 | size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX, |
| 523 | LaunchDimensions.BlockY * LaunchDimensions.GridY, |
| 524 | LaunchDimensions.BlockZ * LaunchDimensions.GridZ}; |
| 525 | return getOpenCLError( |
| 526 | clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel, |
| 527 | 3, nullptr, GlobalWorkSize, LocalWorkSize, 0, |
| 528 | nullptr, nullptr), |
| 529 | "clEnqueueNDRangeKernel"); |
| 530 | } |
| 531 | |
| 532 | } // namespace |
| 533 | |
| 534 | namespace opencl { |
| 535 | |
| 536 | /// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a |
| 537 | /// Platform. |
| 538 | Expected<Platform *> getPlatform() { |
| 539 | static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> { |
| 540 | Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create(); |
| 541 | if (CreationResult.isError()) |
| 542 | return CreationResult.getError(); |
| 543 | else |
| 544 | return new OpenCLPlatform(CreationResult.takeValue()); |
| 545 | }(); |
| 546 | return MaybePlatform; |
| 547 | } |
| 548 | |
| 549 | } // namespace opencl |
| 550 | |
| 551 | } // namespace acxxel |