blob: 0c0f394b90508637da0d26ab4ac010ea1588ece3 [file] [log] [blame]
cristy3f6d1482010-01-20 21:01:21 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7% A A C C E L E R R A A T E %
8% AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9% A A C C E L E R R A A T E %
10% A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11% %
12% %
13% MagickCore Acceleration Methods %
14% %
15% Software Design %
cristy0d127ab2010-05-14 23:29:46 +000016% John Cristy %
cristy3f6d1482010-01-20 21:01:21 +000017% January 2010 %
18% %
19% %
cristy7e41fe82010-12-04 23:12:08 +000020% Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization %
cristy3f6d1482010-01-20 21:01:21 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
cristy0d127ab2010-05-14 23:29:46 +000036% Morphology is the the application of various kernals, of any size and even
cristy3f6d1482010-01-20 21:01:21 +000037% shape, to a image in various ways (typically binary, but not always).
38%
39% Convolution (weighted sum or average) is just one specific type of
40% accelerate. Just one that is very common for image bluring and sharpening
41% effects. Not only 2D Gaussian blurring, but also 2-pass 1D Blurring.
42%
43% This module provides not only a general accelerate function, and the ability
44% to apply more advanced or iterative morphologies, but also functions for the
45% generation of many different types of kernel arrays from user supplied
46% arguments. Prehaps even the generation of a kernel from a small image.
47*/
48
49/*
50 Include declarations.
51*/
cristy4c08aed2011-07-01 19:47:50 +000052#include "MagickCore/studio.h"
53#include "MagickCore/accelerate.h"
54#include "MagickCore/artifact.h"
cristy35f33492011-07-07 16:54:49 +000055#include "MagickCore/cache.h"
cristyd1dd6e42011-09-04 01:46:08 +000056#include "MagickCore/cache-private.h"
cristy4c08aed2011-07-01 19:47:50 +000057#include "MagickCore/cache-view.h"
58#include "MagickCore/color-private.h"
59#include "MagickCore/enhance.h"
60#include "MagickCore/exception.h"
61#include "MagickCore/exception-private.h"
62#include "MagickCore/gem.h"
63#include "MagickCore/hashmap.h"
64#include "MagickCore/image.h"
65#include "MagickCore/image-private.h"
66#include "MagickCore/list.h"
67#include "MagickCore/memory_.h"
68#include "MagickCore/monitor-private.h"
69#include "MagickCore/accelerate.h"
70#include "MagickCore/option.h"
71#include "MagickCore/pixel-accessor.h"
72#include "MagickCore/prepress.h"
73#include "MagickCore/quantize.h"
74#include "MagickCore/registry.h"
75#include "MagickCore/semaphore.h"
76#include "MagickCore/splay-tree.h"
77#include "MagickCore/statistic.h"
78#include "MagickCore/string_.h"
79#include "MagickCore/string-private.h"
80#include "MagickCore/token.h"
cristy3f6d1482010-01-20 21:01:21 +000081
82/*
83%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84% %
85% %
86% %
87% A c c e l e r a t e C o n v o l v e I m a g e %
88% %
89% %
90% %
91%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
92%
93% AccelerateConvolveImage() applies a custom convolution kernel to the image.
94% It is accelerated by taking advantage of speed-ups offered by executing in
95% concert across heterogeneous platforms consisting of CPUs, GPUs, and other
96% processors.
97%
98% The format of the AccelerateConvolveImage method is:
99%
100% Image *AccelerateConvolveImage(const Image *image,
cristy2be15382010-01-21 02:38:03 +0000101% const KernelInfo *kernel,Image *convolve_image,
cristy3f6d1482010-01-20 21:01:21 +0000102% ExceptionInfo *exception)
103%
104% A description of each parameter follows:
105%
106% o image: the image.
107%
108% o kernel: the convolution kernel.
109%
110% o convole_image: the convoleed image.
111%
112% o exception: return any errors or warnings in this structure.
113%
114*/
cristyd43a46b2010-01-21 02:13:41 +0000115
116#if defined(MAGICKCORE_OPENCL_SUPPORT)
117
118#if defined(MAGICKCORE_HDRI_SUPPORT)
119#define CLOptions "-DMAGICKCORE_HDRI_SUPPORT=1 -DCLQuantum=float " \
120 "-DCLPixelType=float4 -DQuantumRange=%g -DMagickEpsilon=%g"
121#define CLPixelPacket cl_float4
122#else
123#if (MAGICKCORE_QUANTUM_DEPTH == 8)
124#define CLOptions "-DCLQuantum=uchar -DCLPixelType=uchar4 " \
125 "-DQuantumRange=%g -DMagickEpsilon=%g"
126#define CLPixelPacket cl_uchar4
127#elif (MAGICKCORE_QUANTUM_DEPTH == 16)
128#define CLOptions "-DCLQuantum=ushort -DCLPixelType=ushort4 " \
129 "-DQuantumRange=%g -DMagickEpsilon=%g"
130#define CLPixelPacket cl_ushort4
131#elif (MAGICKCORE_QUANTUM_DEPTH == 32)
132#define CLOptions "-DCLQuantum=uint -DCLPixelType=uint4 " \
133 "-DQuantumRange=%g -DMagickEpsilon=%g"
134#define CLPixelPacket cl_uint4
cristy4434d7b2011-09-01 18:19:57 +0000135#elif (MAGICKCORE_QUANTUM_DEPTH == 64)
cristybb503372010-05-27 20:51:26 +0000136#define CLOptions "-DCLQuantum=ussize_t -DCLPixelType=ussize_t4 " \
cristyd43a46b2010-01-21 02:13:41 +0000137 "-DQuantumRange=%g -DMagickEpsilon=%g"
cristy5f959472010-05-27 22:19:46 +0000138#define CLPixelPacket cl_ulong4
cristyd43a46b2010-01-21 02:13:41 +0000139#endif
140#endif
141
142typedef struct _ConvolveInfo
143{
144 cl_context
145 context;
146
147 cl_device_id
148 *devices;
149
150 cl_command_queue
151 command_queue;
152
153 cl_kernel
154 kernel;
155
156 cl_program
157 program;
158
159 cl_mem
160 pixels,
161 convolve_pixels;
162
cristy5f959472010-05-27 22:19:46 +0000163 cl_ulong
cristyd43a46b2010-01-21 02:13:41 +0000164 width,
165 height;
166
cristy966032e2011-09-12 19:12:00 +0000167 cl_uint
cristyd43a46b2010-01-21 02:13:41 +0000168 matte;
169
170 cl_mem
171 filter;
172} ConvolveInfo;
173
174static char
175 *ConvolveKernel =
cristy5f959472010-05-27 22:19:46 +0000176 "static inline long ClampToCanvas(const long offset,const unsigned long range)\n"
cristyd43a46b2010-01-21 02:13:41 +0000177 "{\n"
178 " if (offset < 0L)\n"
179 " return(0L);\n"
180 " if (offset >= range)\n"
cristy5f959472010-05-27 22:19:46 +0000181 " return((long) (range-1L));\n"
cristyd43a46b2010-01-21 02:13:41 +0000182 " return(offset);\n"
183 "}\n"
184 "\n"
cristy966032e2011-09-12 19:12:00 +0000185 "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
cristyd43a46b2010-01-21 02:13:41 +0000186 "static inline CLQuantum ClampToQuantum(const double value)\n"
187 "{\n"
188 "#if defined(MAGICKCORE_HDRI_SUPPORT)\n"
189 " return((CLQuantum) value)\n"
190 "#else\n"
191 " if (value < 0.0)\n"
192 " return((CLQuantum) 0);\n"
193 " if (value >= (double) QuantumRange)\n"
194 " return((CLQuantum) QuantumRange);\n"
195 " return((CLQuantum) (value+0.5));\n"
196 "#endif\n"
197 "}\n"
198 "\n"
199 "__kernel void Convolve(const __global CLPixelType *input,\n"
cristy5f959472010-05-27 22:19:46 +0000200 " __constant double *filter,const unsigned long width,const unsigned long height,\n"
cristy966032e2011-09-12 19:12:00 +0000201 " const unsigned int matte,__global CLPixelType *output)\n"
cristyd43a46b2010-01-21 02:13:41 +0000202 "{\n"
cristy5f959472010-05-27 22:19:46 +0000203 " const unsigned long columns = get_global_size(0);\n"
204 " const unsigned long rows = get_global_size(1);\n"
cristyd43a46b2010-01-21 02:13:41 +0000205 "\n"
cristy5f959472010-05-27 22:19:46 +0000206 " const long x = get_global_id(0);\n"
207 " const long y = get_global_id(1);\n"
cristyd43a46b2010-01-21 02:13:41 +0000208 "\n"
209 " const double scale = (1.0/QuantumRange);\n"
cristy5f959472010-05-27 22:19:46 +0000210 " const long mid_width = (width-1)/2;\n"
211 " const long mid_height = (height-1)/2;\n"
cristyd43a46b2010-01-21 02:13:41 +0000212 " double4 sum = { 0.0, 0.0, 0.0, 0.0 };\n"
213 " double gamma = 0.0;\n"
cristy5f959472010-05-27 22:19:46 +0000214 " register unsigned long i = 0;\n"
cristyd43a46b2010-01-21 02:13:41 +0000215 "\n"
216 " int method = 0;\n"
217 " if (matte != false)\n"
218 " method=1;\n"
219 " if ((x >= width) && (x < (columns-width-1)) &&\n"
220 " (y >= height) && (y < (rows-height-1)))\n"
221 " {\n"
222 " method=2;\n"
223 " if (matte != false)\n"
224 " method=3;\n"
225 " }\n"
226 " switch (method)\n"
227 " {\n"
228 " case 0:\n"
229 " {\n"
cristy5f959472010-05-27 22:19:46 +0000230 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000231 " {\n"
cristy5f959472010-05-27 22:19:46 +0000232 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000233 " {\n"
cristy5f959472010-05-27 22:19:46 +0000234 " const long index=ClampToCanvas(y+v,rows)*columns+\n"
cristyd43a46b2010-01-21 02:13:41 +0000235 " ClampToCanvas(x+u,columns);\n"
236 " sum.x+=filter[i]*input[index].x;\n"
237 " sum.y+=filter[i]*input[index].y;\n"
238 " sum.z+=filter[i]*input[index].z;\n"
239 " gamma+=filter[i];\n"
240 " i++;\n"
241 " }\n"
242 " }\n"
243 " break;\n"
244 " }\n"
245 " case 1:\n"
246 " {\n"
cristy5f959472010-05-27 22:19:46 +0000247 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000248 " {\n"
cristy5f959472010-05-27 22:19:46 +0000249 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000250 " {\n"
cristy5f959472010-05-27 22:19:46 +0000251 " const unsigned long index=ClampToCanvas(y+v,rows)*columns+\n"
cristyd43a46b2010-01-21 02:13:41 +0000252 " ClampToCanvas(x+u,columns);\n"
cristy4c08aed2011-07-01 19:47:50 +0000253 " const double alpha=scale*input[index].w;\n"
cristyd43a46b2010-01-21 02:13:41 +0000254 " sum.x+=alpha*filter[i]*input[index].x;\n"
255 " sum.y+=alpha*filter[i]*input[index].y;\n"
256 " sum.z+=alpha*filter[i]*input[index].z;\n"
257 " sum.w+=filter[i]*input[index].w;\n"
258 " gamma+=alpha*filter[i];\n"
259 " i++;\n"
260 " }\n"
261 " }\n"
262 " break;\n"
263 " }\n"
264 " case 2:\n"
265 " {\n"
cristy5f959472010-05-27 22:19:46 +0000266 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000267 " {\n"
cristy5f959472010-05-27 22:19:46 +0000268 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000269 " {\n"
cristy5f959472010-05-27 22:19:46 +0000270 " const unsigned long index=(y+v)*columns+(x+u);\n"
cristyd43a46b2010-01-21 02:13:41 +0000271 " sum.x+=filter[i]*input[index].x;\n"
272 " sum.y+=filter[i]*input[index].y;\n"
273 " sum.z+=filter[i]*input[index].z;\n"
274 " gamma+=filter[i];\n"
275 " i++;\n"
276 " }\n"
277 " }\n"
278 " break;\n"
279 " }\n"
280 " case 3:\n"
281 " {\n"
cristy5f959472010-05-27 22:19:46 +0000282 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000283 " {\n"
cristy5f959472010-05-27 22:19:46 +0000284 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000285 " {\n"
cristy5f959472010-05-27 22:19:46 +0000286 " const unsigned long index=(y+v)*columns+(x+u);\n"
cristy4c08aed2011-07-01 19:47:50 +0000287 " const double alpha=scale*input[index].w;\n"
cristyd43a46b2010-01-21 02:13:41 +0000288 " sum.x+=alpha*filter[i]*input[index].x;\n"
289 " sum.y+=alpha*filter[i]*input[index].y;\n"
290 " sum.z+=alpha*filter[i]*input[index].z;\n"
291 " sum.w+=filter[i]*input[index].w;\n"
292 " gamma+=alpha*filter[i];\n"
293 " i++;\n"
294 " }\n"
295 " }\n"
296 " break;\n"
297 " }\n"
298 " }\n"
299 " gamma=1.0/(fabs(gamma) <= MagickEpsilon ? 1.0 : gamma);\n"
cristy5f959472010-05-27 22:19:46 +0000300 " const unsigned long index = y*columns+x;\n"
cristyd43a46b2010-01-21 02:13:41 +0000301 " output[index].x=ClampToQuantum(gamma*sum.x);\n"
302 " output[index].y=ClampToQuantum(gamma*sum.y);\n"
303 " output[index].z=ClampToQuantum(gamma*sum.z);\n"
304 " if (matte == false)\n"
305 " output[index].w=input[index].w;\n"
306 " else\n"
307 " output[index].w=ClampToQuantum(sum.w);\n"
308 "}\n";
309
310static void ConvolveNotify(const char *message,const void *data,size_t length,
311 void *user_context)
312{
313 ExceptionInfo
314 *exception;
315
316 (void) data;
317 (void) length;
318 exception=(ExceptionInfo *) user_context;
cristy32cca402010-01-23 04:02:23 +0000319 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
320 "DelegateFailed","`%s'",message);
cristyd43a46b2010-01-21 02:13:41 +0000321}
322
323static MagickBooleanType BindConvolveParameters(ConvolveInfo *convolve_info,
cristyc8523c12011-09-13 00:02:53 +0000324 const Image *image,const void *pixels,double *filter,const size_t width,
325 const size_t height,void *convolve_pixels)
cristyd43a46b2010-01-21 02:13:41 +0000326{
327 cl_int
328 status;
329
cristy5f959472010-05-27 22:19:46 +0000330 register cl_uint
cristyd43a46b2010-01-21 02:13:41 +0000331 i;
332
333 size_t
334 length;
335
336 /*
337 Allocate OpenCL buffers.
338 */
339 length=image->columns*image->rows;
cristy5f959472010-05-27 22:19:46 +0000340 convolve_info->pixels=clCreateBuffer(convolve_info->context,(cl_mem_flags)
341 (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(CLPixelPacket),
342 (void *) pixels,&status);
cristyd43a46b2010-01-21 02:13:41 +0000343 if ((convolve_info->pixels == (cl_mem) NULL) || (status != CL_SUCCESS))
344 return(MagickFalse);
345 length=width*height;
cristy5f959472010-05-27 22:19:46 +0000346 convolve_info->filter=clCreateBuffer(convolve_info->context,(cl_mem_flags)
347 (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(cl_double),filter,
348 &status);
cristyd43a46b2010-01-21 02:13:41 +0000349 if ((convolve_info->filter == (cl_mem) NULL) || (status != CL_SUCCESS))
350 return(MagickFalse);
351 length=image->columns*image->rows;
352 convolve_info->convolve_pixels=clCreateBuffer(convolve_info->context,
cristy5f959472010-05-27 22:19:46 +0000353 (cl_mem_flags) (CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR),length*
354 sizeof(CLPixelPacket),convolve_pixels,&status);
cristyd43a46b2010-01-21 02:13:41 +0000355 if ((convolve_info->convolve_pixels == (cl_mem) NULL) ||
356 (status != CL_SUCCESS))
357 return(MagickFalse);
358 /*
359 Bind OpenCL buffers.
360 */
361 i=0;
362 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
363 &convolve_info->pixels);
364 if (status != CL_SUCCESS)
365 return(MagickFalse);
366 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
367 &convolve_info->filter);
368 if (status != CL_SUCCESS)
369 return(MagickFalse);
cristy5f959472010-05-27 22:19:46 +0000370 convolve_info->width=(cl_ulong) width;
371 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *)
cristyd43a46b2010-01-21 02:13:41 +0000372 &convolve_info->width);
373 if (status != CL_SUCCESS)
374 return(MagickFalse);
cristy5f959472010-05-27 22:19:46 +0000375 convolve_info->height=(cl_ulong) height;
376 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *)
cristyd43a46b2010-01-21 02:13:41 +0000377 &convolve_info->height);
378 if (status != CL_SUCCESS)
379 return(MagickFalse);
cristy966032e2011-09-12 19:12:00 +0000380 convolve_info->matte=(cl_uint) image->matte;
381 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_uint),(void *)
cristyd43a46b2010-01-21 02:13:41 +0000382 &convolve_info->matte);
383 if (status != CL_SUCCESS)
384 return(MagickFalse);
385 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
386 &convolve_info->convolve_pixels);
387 if (status != CL_SUCCESS)
388 return(MagickFalse);
389 status=clFinish(convolve_info->command_queue);
390 if (status != CL_SUCCESS)
391 return(MagickFalse);
392 return(MagickTrue);
393}
394
395static void DestroyConvolveBuffers(ConvolveInfo *convolve_info)
396{
397 cl_int
398 status;
399
cristyf864d422011-09-12 17:57:53 +0000400 (void) status;
cristyd43a46b2010-01-21 02:13:41 +0000401 if (convolve_info->convolve_pixels != (cl_mem) NULL)
402 status=clReleaseMemObject(convolve_info->convolve_pixels);
403 if (convolve_info->pixels != (cl_mem) NULL)
404 status=clReleaseMemObject(convolve_info->pixels);
405 if (convolve_info->filter != (cl_mem) NULL)
406 status=clReleaseMemObject(convolve_info->filter);
407}
408
409static ConvolveInfo *DestroyConvolveInfo(ConvolveInfo *convolve_info)
410{
411 cl_int
412 status;
413
cristyf864d422011-09-12 17:57:53 +0000414 (void) status;
cristyd43a46b2010-01-21 02:13:41 +0000415 if (convolve_info->kernel != (cl_kernel) NULL)
416 status=clReleaseKernel(convolve_info->kernel);
417 if (convolve_info->program != (cl_program) NULL)
418 status=clReleaseProgram(convolve_info->program);
419 if (convolve_info->command_queue != (cl_command_queue) NULL)
420 status=clReleaseCommandQueue(convolve_info->command_queue);
421 if (convolve_info->context != (cl_context) NULL)
422 status=clReleaseContext(convolve_info->context);
423 convolve_info=(ConvolveInfo *) RelinquishMagickMemory(convolve_info);
424 return(convolve_info);
425}
426
427static MagickBooleanType EnqueueConvolveKernel(ConvolveInfo *convolve_info,
cristyc8523c12011-09-13 00:02:53 +0000428 const Image *image,const void *pixels,double *filter,const size_t width,
429 const size_t height,void *convolve_pixels)
cristyd43a46b2010-01-21 02:13:41 +0000430{
431 cl_int
432 status;
433
434 size_t
435 global_work_size[2],
436 length;
437
438 length=image->columns*image->rows;
439 status=clEnqueueWriteBuffer(convolve_info->command_queue,
440 convolve_info->pixels,CL_TRUE,0,length*sizeof(CLPixelPacket),pixels,0,NULL,
441 NULL);
442 length=width*height;
443 status=clEnqueueWriteBuffer(convolve_info->command_queue,
444 convolve_info->filter,CL_TRUE,0,length*sizeof(cl_double),filter,0,NULL,
445 NULL);
446 if (status != CL_SUCCESS)
447 return(MagickFalse);
448 global_work_size[0]=image->columns;
449 global_work_size[1]=image->rows;
450 status=clEnqueueNDRangeKernel(convolve_info->command_queue,
451 convolve_info->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
452 if (status != CL_SUCCESS)
453 return(MagickFalse);
454 length=image->columns*image->rows;
455 status=clEnqueueReadBuffer(convolve_info->command_queue,
456 convolve_info->convolve_pixels,CL_TRUE,0,length*sizeof(CLPixelPacket),
457 convolve_pixels,0,NULL,NULL);
458 if (status != CL_SUCCESS)
459 return(MagickFalse);
460 status=clFinish(convolve_info->command_queue);
461 if (status != CL_SUCCESS)
462 return(MagickFalse);
463 return(MagickTrue);
464}
465
466static ConvolveInfo *GetConvolveInfo(const Image *image,const char *name,
467 const char *source,ExceptionInfo *exception)
468{
469 char
470 options[MaxTextExtent];
471
cristy966032e2011-09-12 19:12:00 +0000472 cl_context_properties
473 context_properties[3];
474
cristyd43a46b2010-01-21 02:13:41 +0000475 cl_int
476 status;
477
cristy966032e2011-09-12 19:12:00 +0000478 cl_platform_id
479 platforms[1];
480
481 cl_uint
482 number_platforms;
483
cristyd43a46b2010-01-21 02:13:41 +0000484 ConvolveInfo
485 *convolve_info;
486
487 size_t
488 length,
489 lengths[] = { strlen(source) };
490
491 /*
492 Create OpenCL info.
493 */
cristy73bd4a52010-10-05 11:24:23 +0000494 convolve_info=(ConvolveInfo *) AcquireMagickMemory(sizeof(*convolve_info));
cristyd43a46b2010-01-21 02:13:41 +0000495 if (convolve_info == (ConvolveInfo *) NULL)
496 {
497 (void) ThrowMagickException(exception,GetMagickModule(),
498 ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename);
499 return((ConvolveInfo *) NULL);
500 }
501 (void) ResetMagickMemory(convolve_info,0,sizeof(*convolve_info));
502 /*
503 Create OpenCL context.
504 */
cristy966032e2011-09-12 19:12:00 +0000505 status=clGetPlatformIDs(0,NULL,&number_platforms);
506 if (status == CL_SUCCESS)
507 status=clGetPlatformIDs(1,platforms,NULL);
508 if (status != CL_SUCCESS)
509 {
510 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
511 "failed to create OpenCL context","`%s' (%d)",image->filename,status);
512 convolve_info=DestroyConvolveInfo(convolve_info);
513 return((ConvolveInfo *) NULL);
514 }
515 context_properties[0]=CL_CONTEXT_PLATFORM;
516 context_properties[1]=(cl_context_properties) platforms[0];
517 context_properties[2]=0;
518 convolve_info->context=clCreateContextFromType(context_properties,
519 (cl_device_type) CL_DEVICE_TYPE_GPU,ConvolveNotify,exception,&status);
cristyd43a46b2010-01-21 02:13:41 +0000520 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
cristy966032e2011-09-12 19:12:00 +0000521 convolve_info->context=clCreateContextFromType(context_properties,
522 (cl_device_type) CL_DEVICE_TYPE_CPU,ConvolveNotify,exception,&status);
cristyd43a46b2010-01-21 02:13:41 +0000523 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
cristy966032e2011-09-12 19:12:00 +0000524 convolve_info->context=clCreateContextFromType(context_properties,
525 (cl_device_type) CL_DEVICE_TYPE_DEFAULT,ConvolveNotify,exception,&status);
cristyd43a46b2010-01-21 02:13:41 +0000526 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
527 {
cristy32cca402010-01-23 04:02:23 +0000528 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
cristyd43a46b2010-01-21 02:13:41 +0000529 "failed to create OpenCL context","`%s' (%d)",image->filename,status);
cristy5f959472010-05-27 22:19:46 +0000530 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000531 return((ConvolveInfo *) NULL);
532 }
533 /*
534 Detect OpenCL devices.
535 */
536 status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,0,NULL,
537 &length);
538 if ((status != CL_SUCCESS) || (length == 0))
539 {
cristy5f959472010-05-27 22:19:46 +0000540 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000541 return((ConvolveInfo *) NULL);
542 }
543 convolve_info->devices=(cl_device_id *) AcquireMagickMemory(length);
544 if (convolve_info->devices == (cl_device_id *) NULL)
545 {
546 (void) ThrowMagickException(exception,GetMagickModule(),
547 ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename);
cristy5f959472010-05-27 22:19:46 +0000548 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000549 return((ConvolveInfo *) NULL);
550 }
551 status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,length,
552 convolve_info->devices,NULL);
553 if (status != CL_SUCCESS)
554 {
cristy5f959472010-05-27 22:19:46 +0000555 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000556 return((ConvolveInfo *) NULL);
557 }
558 /*
559 Create OpenCL command queue.
560 */
561 convolve_info->command_queue=clCreateCommandQueue(convolve_info->context,
562 convolve_info->devices[0],0,&status);
563 if ((convolve_info->command_queue == (cl_command_queue) NULL) ||
564 (status != CL_SUCCESS))
565 {
cristy5f959472010-05-27 22:19:46 +0000566 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000567 return((ConvolveInfo *) NULL);
568 }
569 /*
570 Build OpenCL program.
571 */
572 convolve_info->program=clCreateProgramWithSource(convolve_info->context,1,
573 &source,lengths,&status);
574 if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS))
575 {
cristy5f959472010-05-27 22:19:46 +0000576 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000577 return((ConvolveInfo *) NULL);
578 }
cristyb51dff52011-05-19 16:55:47 +0000579 (void) FormatLocaleString(options,MaxTextExtent,CLOptions,(double)
cristyd43a46b2010-01-21 02:13:41 +0000580 QuantumRange,MagickEpsilon);
cristy00243d12010-01-21 02:45:27 +0000581 status=clBuildProgram(convolve_info->program,1,convolve_info->devices,options,
582 NULL,NULL);
cristyd43a46b2010-01-21 02:13:41 +0000583 if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS))
584 {
585 char
586 *log;
587
588 status=clGetProgramBuildInfo(convolve_info->program,
589 convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,0,NULL,&length);
590 log=(char *) AcquireMagickMemory(length);
591 if (log == (char *) NULL)
592 {
cristy5f959472010-05-27 22:19:46 +0000593 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000594 return((ConvolveInfo *) NULL);
595 }
596 status=clGetProgramBuildInfo(convolve_info->program,
597 convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,length,log,&length);
cristy32cca402010-01-23 04:02:23 +0000598 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
cristyd43a46b2010-01-21 02:13:41 +0000599 "failed to build OpenCL program","`%s' (%s)",image->filename,log);
600 log=DestroyString(log);
cristy5f959472010-05-27 22:19:46 +0000601 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000602 return((ConvolveInfo *) NULL);
603 }
604 /*
605 Get a kernel object.
606 */
607 convolve_info->kernel=clCreateKernel(convolve_info->program,name,&status);
608 if ((convolve_info->kernel == (cl_kernel) NULL) || (status != CL_SUCCESS))
609 {
cristy5f959472010-05-27 22:19:46 +0000610 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000611 return((ConvolveInfo *) NULL);
612 }
613 return(convolve_info);
614}
615
616#endif
617
cristy3f6d1482010-01-20 21:01:21 +0000618MagickExport MagickBooleanType AccelerateConvolveImage(const Image *image,
cristy2be15382010-01-21 02:38:03 +0000619 const KernelInfo *kernel,Image *convolve_image,ExceptionInfo *exception)
cristy3f6d1482010-01-20 21:01:21 +0000620{
621 assert(image != (Image *) NULL);
622 assert(image->signature == MagickSignature);
623 if (image->debug != MagickFalse)
624 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
cristy2be15382010-01-21 02:38:03 +0000625 assert(kernel != (KernelInfo *) NULL);
cristyd43a46b2010-01-21 02:13:41 +0000626 assert(kernel->signature == MagickSignature);
cristy3f6d1482010-01-20 21:01:21 +0000627 assert(convolve_image != (Image *) NULL);
628 assert(convolve_image->signature == MagickSignature);
629 assert(exception != (ExceptionInfo *) NULL);
630 assert(exception->signature == MagickSignature);
cristy394651a2010-01-23 21:05:55 +0000631 if ((image->storage_class != DirectClass) ||
632 (image->colorspace == CMYKColorspace))
cristyd43a46b2010-01-21 02:13:41 +0000633 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
634 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
635 return(MagickFalse);
636#if !defined(MAGICKCORE_OPENCL_SUPPORT)
cristy3f6d1482010-01-20 21:01:21 +0000637 return(MagickFalse);
cristyd43a46b2010-01-21 02:13:41 +0000638#else
639 {
640 const void
641 *pixels;
642
643 ConvolveInfo
644 *convolve_info;
645
646 MagickBooleanType
647 status;
648
649 MagickSizeType
650 length;
651
652 void
653 *convolve_pixels;
654
cristyd43a46b2010-01-21 02:13:41 +0000655 convolve_info=GetConvolveInfo(image,"Convolve",ConvolveKernel,exception);
656 if (convolve_info == (ConvolveInfo *) NULL)
657 return(MagickFalse);
658 pixels=AcquirePixelCachePixels(image,&length,exception);
659 if (pixels == (const void *) NULL)
660 {
661 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
662 "UnableToReadPixelCache","`%s'",image->filename);
663 convolve_info=DestroyConvolveInfo(convolve_info);
664 return(MagickFalse);
665 }
666 convolve_pixels=GetPixelCachePixels(convolve_image,&length,exception);
667 if (convolve_pixels == (void *) NULL)
668 {
669 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
670 "UnableToReadPixelCache","`%s'",image->filename);
671 convolve_info=DestroyConvolveInfo(convolve_info);
672 return(MagickFalse);
673 }
674 status=BindConvolveParameters(convolve_info,image,pixels,kernel->values,
675 kernel->width,kernel->height,convolve_pixels);
676 if (status == MagickFalse)
677 {
678 DestroyConvolveBuffers(convolve_info);
679 convolve_info=DestroyConvolveInfo(convolve_info);
680 return(MagickFalse);
681 }
682 status=EnqueueConvolveKernel(convolve_info,image,pixels,kernel->values,
683 kernel->width,kernel->height,convolve_pixels);
684 if (status == MagickFalse)
685 {
686 DestroyConvolveBuffers(convolve_info);
687 convolve_info=DestroyConvolveInfo(convolve_info);
688 return(MagickFalse);
689 }
690 DestroyConvolveBuffers(convolve_info);
691 convolve_info=DestroyConvolveInfo(convolve_info);
692 return(MagickTrue);
693 }
694#endif
cristy3f6d1482010-01-20 21:01:21 +0000695}