blob: 2ca74ed31d5f2ac9dff8b76b6f28271b8c22555e [file] [log] [blame]
Jason Henlineac232dd2016-10-25 20:18:56 +00001//===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// This file defines the standard OpenCL implementation of the Acxxel API.
11///
12//===----------------------------------------------------------------------===//
13
14#include "acxxel.h"
15
16#include "CL/cl.h"
17
18#include <mutex>
19#include <sstream>
20#include <utility>
21#include <vector>
22
23namespace acxxel {
24
25namespace {
26
27/// An ID containing the platform ID and the device ID within the platform.
28struct FullDeviceID {
29 cl_platform_id PlatformID;
30 cl_device_id DeviceID;
31
32 FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
33 : PlatformID(PlatformID), DeviceID(DeviceID) {}
34};
35
36thread_local int ActiveDeviceIndex = 0;
37
38static std::string getOpenCLErrorMessage(cl_int Result) {
39 if (!Result)
40 return "success";
41 std::ostringstream OutStream;
42 OutStream << "OpenCL error: code = " << Result;
43 return OutStream.str();
44}
45
46static Status getOpenCLError(cl_int Result, const std::string &Message) {
47 if (!Result)
48 return Status();
49 std::ostringstream OutStream;
50 OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
51 return Status(OutStream.str());
52}
53
54static void logOpenCLWarning(cl_int Result, const std::string &Message) {
55 if (Result) {
56 std::ostringstream OutStream;
57 OutStream << Message << ": " << getOpenCLErrorMessage(Result);
58 logWarning(OutStream.str());
59 }
60}
61
62class OpenCLPlatform : public Platform {
63public:
64 ~OpenCLPlatform() override = default;
65
66 static Expected<OpenCLPlatform> create();
67
68 Expected<int> getDeviceCount() override;
69
70 Status setActiveDeviceForThread(int DeviceIndex) override;
71
72 int getActiveDeviceForThread() override;
73
74 Expected<Stream> createStream() override;
75
76 Expected<Event> createEvent() override;
77
78 Expected<Program> createProgramFromSource(Span<const char> Source) override;
79
80protected:
81 Status streamSync(void *Stream) override;
82
83 Status streamWaitOnEvent(void *Stream, void *Event) override;
84
85 Expected<void *> rawMallocD(ptrdiff_t ByteCount) override;
86 HandleDestructor getDeviceMemoryHandleDestructor() override;
87 void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
88 size_t ByteOffset) override;
89 void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
90
91 Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol) override;
92 Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol) override;
93
94 Status rawCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
95 void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
96 ptrdiff_t ByteCount) override;
97 Status rawCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
98 void *HostDst, ptrdiff_t ByteCount) override;
99 Status rawCopyHToD(const void *HostSrc, void *DeviceDst,
100 ptrdiff_t DeviceDstByteOffset,
101 ptrdiff_t ByteCount) override;
102
103 Status rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset, ptrdiff_t ByteCount,
104 char ByteValue) override;
105
106 Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
107 HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
108
109 Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
110 HandleDestructor getFreeHostMemoryHandleDestructor() override;
111
112 Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
113 void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
114 ptrdiff_t ByteCount, void *Stream) override;
115 Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
116 void *HostDst, ptrdiff_t ByteCount,
117 void *Stream) override;
118 Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
119 ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
120 void *Stream) override;
121
122 Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
123 ptrdiff_t ByteCount, char ByteValue,
124 void *Stream) override;
125
126 Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
127
128 Status enqueueEvent(void *Event, void *Stream) override;
129 bool eventIsDone(void *Event) override;
130 Status eventSync(void *Event) override;
131 Expected<float> getSecondsBetweenEvents(void *StartEvent,
132 void *EndEvent) override;
133
134 Expected<void *> rawCreateKernel(void *Program,
135 const std::string &Name) override;
136 HandleDestructor getKernelHandleDestructor() override;
137
138 Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
139 KernelLaunchDimensions LaunchDimensions,
140 Span<void *> Arguments,
141 Span<size_t> ArgumentSizes,
142 size_t SharedMemoryBytes) override;
143
144private:
145 OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
146 std::vector<cl_context> &&Contexts,
147 std::vector<cl_command_queue> &&CommandQueues)
148 : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
149 CommandQueues(std::move(CommandQueues)) {}
150
151 std::vector<FullDeviceID> FullDeviceIDs;
152 std::vector<cl_context> Contexts;
153 std::vector<cl_command_queue> CommandQueues;
154};
155
156Expected<OpenCLPlatform> OpenCLPlatform::create() {
157 constexpr cl_uint MaxNumEntries = 100;
158 cl_platform_id Platforms[MaxNumEntries];
159 cl_uint NumPlatforms;
160 if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
161 return getOpenCLError(Result, "clGetPlatformIDs");
162
163 std::vector<FullDeviceID> FullDeviceIDs;
164 for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
165 ++PlatformIndex) {
166 cl_uint NumDevices;
167 cl_device_id Devices[MaxNumEntries];
168 if (cl_int Result =
169 clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
170 MaxNumEntries, Devices, &NumDevices))
171 return getOpenCLError(Result, "clGetDeviceIDs");
172 for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
173 FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
174 Devices[DeviceIndex]);
175 }
176
177 if (FullDeviceIDs.empty())
178 return Status("No OpenCL device available on this system.");
179
180 std::vector<cl_context> Contexts(FullDeviceIDs.size());
181 std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
182 for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
183 cl_int CreateContextResult;
184 Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
185 nullptr, nullptr, &CreateContextResult);
186 if (CreateContextResult)
187 return getOpenCLError(CreateContextResult, "clCreateContext");
188
189 cl_int CreateCommandQueueResult;
190 CommandQueues[I] = clCreateCommandQueue(
191 Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
192 &CreateCommandQueueResult);
193 if (CreateCommandQueueResult)
194 return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
195 }
196
197 return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
198 std::move(CommandQueues));
199}
200
201Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
202
203Status OpenCLPlatform::setActiveDeviceForThread(int DeviceIndex) {
204 if (static_cast<size_t>(DeviceIndex) >= FullDeviceIDs.size())
205 return Status("Could not set active device index to " +
206 std::to_string(DeviceIndex) + " because there are only " +
207 std::to_string(FullDeviceIDs.size()) +
208 " devices in the system");
209 ActiveDeviceIndex = DeviceIndex;
210 return Status();
211}
212
213int OpenCLPlatform::getActiveDeviceForThread() { return ActiveDeviceIndex; }
214
215static void openCLDestroyStream(void *H) {
216 logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
217 "clReleaseCommandQueue");
218}
219
220Expected<Stream> OpenCLPlatform::createStream() {
221 cl_int Result;
222 cl_command_queue Queue = clCreateCommandQueue(
223 Contexts[ActiveDeviceIndex], FullDeviceIDs[ActiveDeviceIndex].DeviceID,
224 CL_QUEUE_PROFILING_ENABLE, &Result);
225 if (Result)
226 return getOpenCLError(Result, "clCreateCommandQueue");
227 return constructStream(this, Queue, openCLDestroyStream);
228}
229
230static void openCLEventDestroy(void *H) {
231 cl_event *CLEvent = static_cast<cl_event *>(H);
232 logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
233 delete CLEvent;
234}
235
236Status OpenCLPlatform::streamSync(void *Stream) {
237 return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
238 "clFinish");
239}
240
241Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
242 cl_event *CLEvent = static_cast<cl_event *>(Event);
243 return getOpenCLError(
244 clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
245 CLEvent, nullptr),
246 "clEnqueueMarkerWithWaitList");
247}
248
249Expected<Event> OpenCLPlatform::createEvent() {
250 cl_int Result;
251 cl_event Event = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result);
252 if (Result)
253 return getOpenCLError(Result, "clCreateUserEvent");
254 if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
255 return getOpenCLError(Result, "clSetUserEventStatus");
256 return constructEvent(this, new cl_event(Event), openCLEventDestroy);
257}
258
259static void openCLDestroyProgram(void *H) {
260 logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
261 "clReleaseProgram");
262}
263
264Expected<Program>
265OpenCLPlatform::createProgramFromSource(Span<const char> Source) {
266 cl_int Error;
267 const char *CSource = Source.data();
268 size_t SourceSize = Source.size();
269 cl_program Program = clCreateProgramWithSource(Contexts[ActiveDeviceIndex], 1,
270 &CSource, &SourceSize, &Error);
271 if (Error)
272 return getOpenCLError(Error, "clCreateProgramWithSource");
273 cl_device_id DeviceID = FullDeviceIDs[ActiveDeviceIndex].DeviceID;
274 if (cl_int Error =
275 clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
276 return getOpenCLError(Error, "clBuildProgram");
277 return constructProgram(this, Program, openCLDestroyProgram);
278}
279
280Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount) {
281 cl_int Result;
282 cl_mem Memory = clCreateBuffer(Contexts[ActiveDeviceIndex], CL_MEM_READ_WRITE,
283 ByteCount, nullptr, &Result);
284 if (Result)
285 return getOpenCLError(Result, "clCreateBuffer");
286 return reinterpret_cast<void *>(Memory);
287}
288
289static void openCLDestroyDeviceMemory(void *H) {
290 logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
291 "clReleaseMemObject");
292}
293
294HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
295 return openCLDestroyDeviceMemory;
296}
297
298void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
299 size_t ByteSize,
300 size_t ByteOffset) {
301 cl_int Error;
302 cl_buffer_region Region;
303 Region.origin = ByteOffset;
304 Region.size = ByteSize;
305 cl_mem SubBuffer =
306 clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
307 CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
308 logOpenCLWarning(Error, "clCreateSubBuffer");
309 if (Error)
310 return nullptr;
311 return SubBuffer;
312}
313
314void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
315 openCLDestroyDeviceMemory(Handle);
316}
317
318Expected<void *>
319OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/) {
320 // This doesn't seem to have any equivalent in OpenCL.
321 return Status("not implemented");
322}
323
324Expected<ptrdiff_t>
325OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/) {
326 // This doesn't seem to have any equivalent in OpenCL.
327 return Status("not implemented");
328}
329
330Status OpenCLPlatform::rawCopyDToD(const void *DeviceSrc,
331 ptrdiff_t DeviceSrcByteOffset,
332 void *DeviceDst,
333 ptrdiff_t DeviceDstByteOffset,
334 ptrdiff_t ByteCount) {
335 cl_event DoneEvent;
336 if (cl_int Result = clEnqueueCopyBuffer(
337 CommandQueues[ActiveDeviceIndex],
338 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
339 static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
340 DeviceDstByteOffset, ByteCount, 0, nullptr, &DoneEvent))
341 return getOpenCLError(Result, "clEnqueueCopyBuffer");
342 return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
343}
344
345Status OpenCLPlatform::rawCopyDToH(const void *DeviceSrc,
346 ptrdiff_t DeviceSrcByteOffset, void *HostDst,
347 ptrdiff_t ByteCount) {
348 cl_event DoneEvent;
349 if (cl_int Result = clEnqueueReadBuffer(
350 CommandQueues[ActiveDeviceIndex],
351 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)), CL_TRUE,
352 DeviceSrcByteOffset, ByteCount, HostDst, 0, nullptr, &DoneEvent))
353 return getOpenCLError(Result, "clEnqueueReadBuffer");
354 return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
355}
356
357Status OpenCLPlatform::rawCopyHToD(const void *HostSrc, void *DeviceDst,
358 ptrdiff_t DeviceDstByteOffset,
359 ptrdiff_t ByteCount) {
360 cl_event DoneEvent;
361 if (cl_int Result = clEnqueueWriteBuffer(
362 CommandQueues[ActiveDeviceIndex], static_cast<cl_mem>(DeviceDst),
363 CL_TRUE, DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
364 &DoneEvent))
365 return getOpenCLError(Result, "clEnqueueWriteBuffer");
366 return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
367}
368
369Status OpenCLPlatform::rawMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
370 ptrdiff_t ByteCount, char ByteValue) {
371 cl_event DoneEvent;
372 if (cl_int Result = clEnqueueFillBuffer(
373 CommandQueues[ActiveDeviceIndex], static_cast<cl_mem>(DeviceDst),
374 &ByteValue, 1, ByteOffset, ByteCount, 0, nullptr, &DoneEvent))
375 return getOpenCLError(Result, "clEnqueueFillBuffer");
376 return getOpenCLError(clWaitForEvents(1, &DoneEvent), "clWaitForEvents");
377}
378
379static void noOpHandleDestructor(void *) {}
380
381Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
382 ptrdiff_t /*ByteCount*/) {
383 // TODO(jhen): Do we want to do something to pin the memory here?
384 return Status();
385}
386
387HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
388 // TODO(jhen): Do we want to unpin the memory here?
389 return noOpHandleDestructor;
390}
391
392Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
393 // TODO(jhen): Do we want to do something to pin the memory here?
394 return std::malloc(ByteCount);
395}
396
397static void freeMemoryHandleDestructor(void *Memory) {
398 // TODO(jhen): Do we want to unpin the memory here?
399 std::free(Memory);
400}
401
402HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
403 return freeMemoryHandleDestructor;
404}
405
406Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
407 ptrdiff_t DeviceSrcByteOffset,
408 void *DeviceDst,
409 ptrdiff_t DeviceDstByteOffset,
410 ptrdiff_t ByteCount, void *Stream) {
411 return getOpenCLError(
412 clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
413 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
414 static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
415 DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
416 "clEnqueueCopyBuffer");
417}
418
419Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
420 ptrdiff_t DeviceSrcByteOffset,
421 void *HostDst, ptrdiff_t ByteCount,
422 void *Stream) {
423 return getOpenCLError(
424 clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
425 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
426 CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
427 nullptr, nullptr),
428 "clEnqueueReadBuffer");
429}
430
431Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
432 ptrdiff_t DeviceDstByteOffset,
433 ptrdiff_t ByteCount, void *Stream) {
434 return getOpenCLError(
435 clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
436 static_cast<cl_mem>(DeviceDst), CL_TRUE,
437 DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
438 nullptr),
439 "clEnqueueWriteBuffer");
440}
441
442Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
443 ptrdiff_t ByteCount, char ByteValue,
444 void *Stream) {
445 return getOpenCLError(
446 clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
447 static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
448 ByteOffset, ByteCount, 0, nullptr, nullptr),
449 "clEnqueueFillBuffer");
450}
451
452struct StreamCallbackUserData {
453 StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
454 cl_event EndEvent)
455 : TheStream(TheStream), TheFunction(std::move(Function)),
456 EndEvent(EndEvent) {}
457
458 Stream &TheStream;
459 StreamCallback TheFunction;
460 cl_event EndEvent;
461};
462
463// A function with the right signature to pass to clSetEventCallback.
464void CL_CALLBACK openCLStreamCallbackShim(cl_event,
465 cl_int EventCommandExecStatus,
466 void *UserData) {
467 std::unique_ptr<StreamCallbackUserData> Data(
468 static_cast<StreamCallbackUserData *>(UserData));
469 Data->TheFunction(
470 Data->TheStream,
471 getOpenCLError(EventCommandExecStatus, "stream callback error state"));
472 if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
473 logOpenCLWarning(Result, "clSetUserEventStatus");
474 if (cl_int Result = clReleaseEvent(Data->EndEvent))
475 logOpenCLWarning(Result, "clReleaseEvent");
476}
477
478Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
479 StreamCallback Callback) {
480 cl_int Result;
481 cl_event StartEvent = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result);
482 if (Result)
483 return getOpenCLError(Result, "clCreateUserEvent");
484 cl_event EndEvent = clCreateUserEvent(Contexts[ActiveDeviceIndex], &Result);
485 if (Result)
486 return getOpenCLError(Result, "clCreateUserEvent");
487 cl_event StartBarrierEvent;
488 if (cl_int Result = clEnqueueBarrierWithWaitList(
489 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
490 &StartEvent, &StartBarrierEvent))
491 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
492
493 if (cl_int Result = clEnqueueBarrierWithWaitList(
494 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
495 &EndEvent, nullptr))
496 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
497
498 std::unique_ptr<StreamCallbackUserData> UserData(
499 new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
500 if (cl_int Result =
501 clSetEventCallback(StartBarrierEvent, CL_RUNNING,
502 openCLStreamCallbackShim, UserData.release()))
503 return getOpenCLError(Result, "clSetEventCallback");
504
505 if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
506 return getOpenCLError(Result, "clSetUserEventStatus");
507
508 if (cl_int Result = clReleaseEvent(StartBarrierEvent))
509 return getOpenCLError(Result, "clReleaseEvent");
510
511 return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
512}
513
514Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
515 cl_event *CLEvent = static_cast<cl_event *>(Event);
516 cl_event OldEvent = *CLEvent;
517 cl_event NewEvent;
518 if (cl_int Result = clEnqueueMarkerWithWaitList(
519 static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
520 return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
521 *CLEvent = NewEvent;
522 return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
523}
524
525bool OpenCLPlatform::eventIsDone(void *Event) {
526 cl_event *CLEvent = static_cast<cl_event *>(Event);
527 cl_int EventStatus;
528 logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
529 sizeof(EventStatus), &EventStatus, nullptr),
530 "clGetEventInfo");
531 return EventStatus == CL_COMPLETE || EventStatus < 0;
532}
533
534Status OpenCLPlatform::eventSync(void *Event) {
535 cl_event *CLEvent = static_cast<cl_event *>(Event);
536 return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
537}
538
539Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
540 void *EndEvent) {
541 cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
542 cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
543
544 cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
545 cl_ulong StartNanoseconds;
546 cl_ulong EndNanoseconds;
547 if (cl_int Result =
548 clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
549 &StartNanoseconds, nullptr))
550 return getOpenCLError(Result, "clGetEventProfilingInfo");
551 if (cl_int Result = clGetEventProfilingInfo(
552 *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
553 return getOpenCLError(Result, "clGetEventProfilingInfo");
554 return (EndNanoseconds - StartNanoseconds) * 1e-12;
555}
556
557Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
558 const std::string &Name) {
559
560 cl_int Error;
561 cl_kernel Kernel =
562 clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
563 if (Error)
564 return getOpenCLError(Error, "clCreateKernel");
565 return Kernel;
566}
567
568static void openCLDestroyKernel(void *H) {
569 logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
570 "clReleaseKernel");
571}
572
573HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
574 return openCLDestroyKernel;
575}
576
577Status OpenCLPlatform::rawEnqueueKernelLaunch(
578 void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
579 Span<void *> Arguments, Span<size_t> ArgumentSizes,
580 size_t SharedMemoryBytes) {
581 if (SharedMemoryBytes != 0)
582 return Status("OpenCL kernel launches only accept zero for the shared "
583 "memory byte size");
584 cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
585 for (int I = 0; I < Arguments.size(); ++I)
586 if (cl_int Error =
587 clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
588 return getOpenCLError(Error, "clSetKernelArg");
589 size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
590 LaunchDimensions.BlockZ};
591 size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
592 LaunchDimensions.BlockY * LaunchDimensions.GridY,
593 LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
594 return getOpenCLError(
595 clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
596 3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
597 nullptr, nullptr),
598 "clEnqueueNDRangeKernel");
599}
600
601} // namespace
602
603namespace opencl {
604
605/// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
606/// Platform.
607Expected<Platform *> getPlatform() {
608 static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
609 Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
610 if (CreationResult.isError())
611 return CreationResult.getError();
612 else
613 return new OpenCLPlatform(CreationResult.takeValue());
614 }();
615 return MaybePlatform;
616}
617
618} // namespace opencl
619
620} // namespace acxxel