blob: 0c2d9b6f285e017b1aec25104e482ef9aaa1043b [file] [log] [blame]
Jason Henlineac232dd2016-10-25 20:18:56 +00001//===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// This file defines the standard OpenCL implementation of the Acxxel API.
11///
12//===----------------------------------------------------------------------===//
13
14#include "acxxel.h"
15
16#include "CL/cl.h"
17
18#include <mutex>
19#include <sstream>
20#include <utility>
21#include <vector>
22
23namespace acxxel {
24
25namespace {
26
27/// An ID containing the platform ID and the device ID within the platform.
28struct FullDeviceID {
29 cl_platform_id PlatformID;
30 cl_device_id DeviceID;
31
32 FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
33 : PlatformID(PlatformID), DeviceID(DeviceID) {}
34};
35
Jason Henlineac232dd2016-10-25 20:18:56 +000036static std::string getOpenCLErrorMessage(cl_int Result) {
37 if (!Result)
38 return "success";
39 std::ostringstream OutStream;
40 OutStream << "OpenCL error: code = " << Result;
41 return OutStream.str();
42}
43
44static Status getOpenCLError(cl_int Result, const std::string &Message) {
45 if (!Result)
46 return Status();
47 std::ostringstream OutStream;
48 OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
49 return Status(OutStream.str());
50}
51
52static void logOpenCLWarning(cl_int Result, const std::string &Message) {
53 if (Result) {
54 std::ostringstream OutStream;
55 OutStream << Message << ": " << getOpenCLErrorMessage(Result);
56 logWarning(OutStream.str());
57 }
58}
59
60class OpenCLPlatform : public Platform {
61public:
62 ~OpenCLPlatform() override = default;
63
64 static Expected<OpenCLPlatform> create();
65
66 Expected<int> getDeviceCount() override;
67
Jason Henlinebdc410b2016-10-28 00:54:02 +000068 Expected<Stream> createStream(int DeviceIndex) override;
Jason Henlineac232dd2016-10-25 20:18:56 +000069
Jason Henlinebdc410b2016-10-28 00:54:02 +000070 Expected<Event> createEvent(int DeviceIndex) override;
Jason Henlineac232dd2016-10-25 20:18:56 +000071
Jason Henlinebdc410b2016-10-28 00:54:02 +000072 Expected<Program> createProgramFromSource(Span<const char> Source,
73 int DeviceIndex) override;
Jason Henlineac232dd2016-10-25 20:18:56 +000074
75protected:
76 Status streamSync(void *Stream) override;
77
78 Status streamWaitOnEvent(void *Stream, void *Event) override;
79
Jason Henlinebdc410b2016-10-28 00:54:02 +000080 Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
Jason Henlineac232dd2016-10-25 20:18:56 +000081 HandleDestructor getDeviceMemoryHandleDestructor() override;
82 void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
83 size_t ByteOffset) override;
84 void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
85
Jason Henlinebdc410b2016-10-28 00:54:02 +000086 Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
87 int DeviceIndex) override;
88 Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
89 int DeviceIndex) override;
Jason Henlineac232dd2016-10-25 20:18:56 +000090
91 Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
92 HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
93
94 Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
95 HandleDestructor getFreeHostMemoryHandleDestructor() override;
96
97 Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
98 void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
99 ptrdiff_t ByteCount, void *Stream) override;
100 Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
101 void *HostDst, ptrdiff_t ByteCount,
102 void *Stream) override;
103 Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
104 ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
105 void *Stream) override;
106
107 Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
108 ptrdiff_t ByteCount, char ByteValue,
109 void *Stream) override;
110
111 Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
112
113 Status enqueueEvent(void *Event, void *Stream) override;
114 bool eventIsDone(void *Event) override;
115 Status eventSync(void *Event) override;
116 Expected<float> getSecondsBetweenEvents(void *StartEvent,
117 void *EndEvent) override;
118
119 Expected<void *> rawCreateKernel(void *Program,
120 const std::string &Name) override;
121 HandleDestructor getKernelHandleDestructor() override;
122
123 Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
124 KernelLaunchDimensions LaunchDimensions,
125 Span<void *> Arguments,
126 Span<size_t> ArgumentSizes,
127 size_t SharedMemoryBytes) override;
128
129private:
130 OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
131 std::vector<cl_context> &&Contexts,
132 std::vector<cl_command_queue> &&CommandQueues)
133 : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
134 CommandQueues(std::move(CommandQueues)) {}
135
136 std::vector<FullDeviceID> FullDeviceIDs;
137 std::vector<cl_context> Contexts;
138 std::vector<cl_command_queue> CommandQueues;
139};
140
141Expected<OpenCLPlatform> OpenCLPlatform::create() {
142 constexpr cl_uint MaxNumEntries = 100;
143 cl_platform_id Platforms[MaxNumEntries];
144 cl_uint NumPlatforms;
145 if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
146 return getOpenCLError(Result, "clGetPlatformIDs");
147
148 std::vector<FullDeviceID> FullDeviceIDs;
149 for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
150 ++PlatformIndex) {
151 cl_uint NumDevices;
152 cl_device_id Devices[MaxNumEntries];
153 if (cl_int Result =
154 clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
155 MaxNumEntries, Devices, &NumDevices))
156 return getOpenCLError(Result, "clGetDeviceIDs");
157 for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
158 FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
159 Devices[DeviceIndex]);
160 }
161
162 if (FullDeviceIDs.empty())
163 return Status("No OpenCL device available on this system.");
164
165 std::vector<cl_context> Contexts(FullDeviceIDs.size());
166 std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
167 for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
168 cl_int CreateContextResult;
169 Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
170 nullptr, nullptr, &CreateContextResult);
171 if (CreateContextResult)
172 return getOpenCLError(CreateContextResult, "clCreateContext");
173
174 cl_int CreateCommandQueueResult;
175 CommandQueues[I] = clCreateCommandQueue(
176 Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
177 &CreateCommandQueueResult);
178 if (CreateCommandQueueResult)
179 return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
180 }
181
182 return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
183 std::move(CommandQueues));
184}
185
186Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
187
Jason Henlineac232dd2016-10-25 20:18:56 +0000188static void openCLDestroyStream(void *H) {
189 logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
190 "clReleaseCommandQueue");
191}
192
Jason Henlinebdc410b2016-10-28 00:54:02 +0000193Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) {
Jason Henlineac232dd2016-10-25 20:18:56 +0000194 cl_int Result;
195 cl_command_queue Queue = clCreateCommandQueue(
Jason Henlinebdc410b2016-10-28 00:54:02 +0000196 Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID,
Jason Henlineac232dd2016-10-25 20:18:56 +0000197 CL_QUEUE_PROFILING_ENABLE, &Result);
198 if (Result)
199 return getOpenCLError(Result, "clCreateCommandQueue");
Jason Henlinebdc410b2016-10-28 00:54:02 +0000200 return constructStream(this, DeviceIndex, Queue, openCLDestroyStream);
Jason Henlineac232dd2016-10-25 20:18:56 +0000201}
202
203static void openCLEventDestroy(void *H) {
204 cl_event *CLEvent = static_cast<cl_event *>(H);
205 logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
206 delete CLEvent;
207}
208
209Status OpenCLPlatform::streamSync(void *Stream) {
210 return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
211 "clFinish");
212}
213
214Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
215 cl_event *CLEvent = static_cast<cl_event *>(Event);
216 return getOpenCLError(
217 clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
218 CLEvent, nullptr),
219 "clEnqueueMarkerWithWaitList");
220}
221
Jason Henlinebdc410b2016-10-28 00:54:02 +0000222Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) {
Jason Henlineac232dd2016-10-25 20:18:56 +0000223 cl_int Result;
Jason Henlinebdc410b2016-10-28 00:54:02 +0000224 cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result);
Jason Henlineac232dd2016-10-25 20:18:56 +0000225 if (Result)
226 return getOpenCLError(Result, "clCreateUserEvent");
227 if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
228 return getOpenCLError(Result, "clSetUserEventStatus");
Jason Henlinebdc410b2016-10-28 00:54:02 +0000229 return constructEvent(this, DeviceIndex, new cl_event(Event),
230 openCLEventDestroy);
Jason Henlineac232dd2016-10-25 20:18:56 +0000231}
232
233static void openCLDestroyProgram(void *H) {
234 logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
235 "clReleaseProgram");
236}
237
238Expected<Program>
Jason Henlinebdc410b2016-10-28 00:54:02 +0000239OpenCLPlatform::createProgramFromSource(Span<const char> Source,
240 int DeviceIndex) {
Jason Henlineac232dd2016-10-25 20:18:56 +0000241 cl_int Error;
242 const char *CSource = Source.data();
243 size_t SourceSize = Source.size();
Jason Henlinebdc410b2016-10-28 00:54:02 +0000244 cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1,
Jason Henlineac232dd2016-10-25 20:18:56 +0000245 &CSource, &SourceSize, &Error);
246 if (Error)
247 return getOpenCLError(Error, "clCreateProgramWithSource");
Jason Henlinebdc410b2016-10-28 00:54:02 +0000248 cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID;
Jason Henlineac232dd2016-10-25 20:18:56 +0000249 if (cl_int Error =
250 clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
251 return getOpenCLError(Error, "clBuildProgram");
252 return constructProgram(this, Program, openCLDestroyProgram);
253}
254
Jason Henlinebdc410b2016-10-28 00:54:02 +0000255Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount,
256 int DeviceIndex) {
Jason Henlineac232dd2016-10-25 20:18:56 +0000257 cl_int Result;
Jason Henlinebdc410b2016-10-28 00:54:02 +0000258 cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE,
Jason Henlineac232dd2016-10-25 20:18:56 +0000259 ByteCount, nullptr, &Result);
260 if (Result)
261 return getOpenCLError(Result, "clCreateBuffer");
262 return reinterpret_cast<void *>(Memory);
263}
264
265static void openCLDestroyDeviceMemory(void *H) {
266 logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
267 "clReleaseMemObject");
268}
269
270HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
271 return openCLDestroyDeviceMemory;
272}
273
274void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
275 size_t ByteSize,
276 size_t ByteOffset) {
277 cl_int Error;
278 cl_buffer_region Region;
279 Region.origin = ByteOffset;
280 Region.size = ByteSize;
281 cl_mem SubBuffer =
282 clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
283 CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
284 logOpenCLWarning(Error, "clCreateSubBuffer");
285 if (Error)
286 return nullptr;
287 return SubBuffer;
288}
289
290void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
291 openCLDestroyDeviceMemory(Handle);
292}
293
294Expected<void *>
Jason Henlinebdc410b2016-10-28 00:54:02 +0000295OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
296 int /*DeviceIndex*/) {
Jason Henlineac232dd2016-10-25 20:18:56 +0000297 // This doesn't seem to have any equivalent in OpenCL.
298 return Status("not implemented");
299}
300
301Expected<ptrdiff_t>
Jason Henlinebdc410b2016-10-28 00:54:02 +0000302OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
303 int /*DeviceIndex*/) {
Jason Henlineac232dd2016-10-25 20:18:56 +0000304 // This doesn't seem to have any equivalent in OpenCL.
305 return Status("not implemented");
306}
307
Jason Henlineac232dd2016-10-25 20:18:56 +0000308static void noOpHandleDestructor(void *) {}
309
310Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
311 ptrdiff_t /*ByteCount*/) {
312 // TODO(jhen): Do we want to do something to pin the memory here?
313 return Status();
314}
315
316HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
317 // TODO(jhen): Do we want to unpin the memory here?
318 return noOpHandleDestructor;
319}
320
321Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
322 // TODO(jhen): Do we want to do something to pin the memory here?
323 return std::malloc(ByteCount);
324}
325
326static void freeMemoryHandleDestructor(void *Memory) {
327 // TODO(jhen): Do we want to unpin the memory here?
328 std::free(Memory);
329}
330
331HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
332 return freeMemoryHandleDestructor;
333}
334
335Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
336 ptrdiff_t DeviceSrcByteOffset,
337 void *DeviceDst,
338 ptrdiff_t DeviceDstByteOffset,
339 ptrdiff_t ByteCount, void *Stream) {
340 return getOpenCLError(
341 clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
342 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
343 static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
344 DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
345 "clEnqueueCopyBuffer");
346}
347
348Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
349 ptrdiff_t DeviceSrcByteOffset,
350 void *HostDst, ptrdiff_t ByteCount,
351 void *Stream) {
352 return getOpenCLError(
353 clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
354 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
355 CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
356 nullptr, nullptr),
357 "clEnqueueReadBuffer");
358}
359
360Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
361 ptrdiff_t DeviceDstByteOffset,
362 ptrdiff_t ByteCount, void *Stream) {
363 return getOpenCLError(
364 clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
365 static_cast<cl_mem>(DeviceDst), CL_TRUE,
366 DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
367 nullptr),
368 "clEnqueueWriteBuffer");
369}
370
371Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
372 ptrdiff_t ByteCount, char ByteValue,
373 void *Stream) {
374 return getOpenCLError(
375 clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
376 static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
377 ByteOffset, ByteCount, 0, nullptr, nullptr),
378 "clEnqueueFillBuffer");
379}
380
381struct StreamCallbackUserData {
382 StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
383 cl_event EndEvent)
384 : TheStream(TheStream), TheFunction(std::move(Function)),
385 EndEvent(EndEvent) {}
386
387 Stream &TheStream;
388 StreamCallback TheFunction;
389 cl_event EndEvent;
390};
391
392// A function with the right signature to pass to clSetEventCallback.
393void CL_CALLBACK openCLStreamCallbackShim(cl_event,
394 cl_int EventCommandExecStatus,
395 void *UserData) {
396 std::unique_ptr<StreamCallbackUserData> Data(
397 static_cast<StreamCallbackUserData *>(UserData));
398 Data->TheFunction(
399 Data->TheStream,
400 getOpenCLError(EventCommandExecStatus, "stream callback error state"));
401 if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
402 logOpenCLWarning(Result, "clSetUserEventStatus");
403 if (cl_int Result = clReleaseEvent(Data->EndEvent))
404 logOpenCLWarning(Result, "clReleaseEvent");
405}
406
407Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
408 StreamCallback Callback) {
409 cl_int Result;
Jason Henlinebdc410b2016-10-28 00:54:02 +0000410 cl_event StartEvent =
411 clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
Jason Henlineac232dd2016-10-25 20:18:56 +0000412 if (Result)
413 return getOpenCLError(Result, "clCreateUserEvent");
Jason Henlinebdc410b2016-10-28 00:54:02 +0000414 cl_event EndEvent =
415 clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
Jason Henlineac232dd2016-10-25 20:18:56 +0000416 if (Result)
417 return getOpenCLError(Result, "clCreateUserEvent");
418 cl_event StartBarrierEvent;
419 if (cl_int Result = clEnqueueBarrierWithWaitList(
420 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
421 &StartEvent, &StartBarrierEvent))
422 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
423
424 if (cl_int Result = clEnqueueBarrierWithWaitList(
425 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
426 &EndEvent, nullptr))
427 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
428
429 std::unique_ptr<StreamCallbackUserData> UserData(
430 new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
431 if (cl_int Result =
432 clSetEventCallback(StartBarrierEvent, CL_RUNNING,
433 openCLStreamCallbackShim, UserData.release()))
434 return getOpenCLError(Result, "clSetEventCallback");
435
436 if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
437 return getOpenCLError(Result, "clSetUserEventStatus");
438
439 if (cl_int Result = clReleaseEvent(StartBarrierEvent))
440 return getOpenCLError(Result, "clReleaseEvent");
441
442 return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
443}
444
445Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
446 cl_event *CLEvent = static_cast<cl_event *>(Event);
447 cl_event OldEvent = *CLEvent;
448 cl_event NewEvent;
449 if (cl_int Result = clEnqueueMarkerWithWaitList(
450 static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
451 return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
452 *CLEvent = NewEvent;
453 return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
454}
455
456bool OpenCLPlatform::eventIsDone(void *Event) {
457 cl_event *CLEvent = static_cast<cl_event *>(Event);
458 cl_int EventStatus;
459 logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
460 sizeof(EventStatus), &EventStatus, nullptr),
461 "clGetEventInfo");
462 return EventStatus == CL_COMPLETE || EventStatus < 0;
463}
464
465Status OpenCLPlatform::eventSync(void *Event) {
466 cl_event *CLEvent = static_cast<cl_event *>(Event);
467 return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
468}
469
470Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
471 void *EndEvent) {
472 cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
473 cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
474
475 cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
476 cl_ulong StartNanoseconds;
477 cl_ulong EndNanoseconds;
478 if (cl_int Result =
479 clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
480 &StartNanoseconds, nullptr))
481 return getOpenCLError(Result, "clGetEventProfilingInfo");
482 if (cl_int Result = clGetEventProfilingInfo(
483 *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
484 return getOpenCLError(Result, "clGetEventProfilingInfo");
485 return (EndNanoseconds - StartNanoseconds) * 1e-12;
486}
487
488Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
489 const std::string &Name) {
490
491 cl_int Error;
492 cl_kernel Kernel =
493 clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
494 if (Error)
495 return getOpenCLError(Error, "clCreateKernel");
496 return Kernel;
497}
498
499static void openCLDestroyKernel(void *H) {
500 logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
501 "clReleaseKernel");
502}
503
504HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
505 return openCLDestroyKernel;
506}
507
508Status OpenCLPlatform::rawEnqueueKernelLaunch(
509 void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
510 Span<void *> Arguments, Span<size_t> ArgumentSizes,
511 size_t SharedMemoryBytes) {
512 if (SharedMemoryBytes != 0)
513 return Status("OpenCL kernel launches only accept zero for the shared "
514 "memory byte size");
515 cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
516 for (int I = 0; I < Arguments.size(); ++I)
517 if (cl_int Error =
518 clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
519 return getOpenCLError(Error, "clSetKernelArg");
520 size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
521 LaunchDimensions.BlockZ};
522 size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
523 LaunchDimensions.BlockY * LaunchDimensions.GridY,
524 LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
525 return getOpenCLError(
526 clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
527 3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
528 nullptr, nullptr),
529 "clEnqueueNDRangeKernel");
530}
531
532} // namespace
533
534namespace opencl {
535
536/// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
537/// Platform.
538Expected<Platform *> getPlatform() {
539 static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
540 Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
541 if (CreationResult.isError())
542 return CreationResult.getError();
543 else
544 return new OpenCLPlatform(CreationResult.takeValue());
545 }();
546 return MaybePlatform;
547}
548
549} // namespace opencl
550
551} // namespace acxxel