blob: 29b6ac4f926323548fa58e45731ef78a9d5954a5 [file] [log] [blame]
cristy3f6d1482010-01-20 21:01:21 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7% A A C C E L E R R A A T E %
8% AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9% A A C C E L E R R A A T E %
10% A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11% %
12% %
13% MagickCore Acceleration Methods %
14% %
15% Software Design %
cristy0d127ab2010-05-14 23:29:46 +000016% John Cristy %
cristy3f6d1482010-01-20 21:01:21 +000017% January 2010 %
18% %
19% %
cristy1454be72011-12-19 01:52:48 +000020% Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization %
cristy3f6d1482010-01-20 21:01:21 +000021% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% http://www.imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
cristy0d127ab2010-05-14 23:29:46 +000036% Morphology is the the application of various kernals, of any size and even
cristy3f6d1482010-01-20 21:01:21 +000037% shape, to a image in various ways (typically binary, but not always).
38%
39% Convolution (weighted sum or average) is just one specific type of
40% accelerate. Just one that is very common for image bluring and sharpening
41% effects. Not only 2D Gaussian blurring, but also 2-pass 1D Blurring.
42%
43% This module provides not only a general accelerate function, and the ability
44% to apply more advanced or iterative morphologies, but also functions for the
45% generation of many different types of kernel arrays from user supplied
46% arguments. Prehaps even the generation of a kernel from a small image.
47*/
48
49/*
50 Include declarations.
51*/
cristy4c08aed2011-07-01 19:47:50 +000052#include "MagickCore/studio.h"
53#include "MagickCore/accelerate.h"
54#include "MagickCore/artifact.h"
cristy35f33492011-07-07 16:54:49 +000055#include "MagickCore/cache.h"
cristyd1dd6e42011-09-04 01:46:08 +000056#include "MagickCore/cache-private.h"
cristy4c08aed2011-07-01 19:47:50 +000057#include "MagickCore/cache-view.h"
58#include "MagickCore/color-private.h"
59#include "MagickCore/enhance.h"
60#include "MagickCore/exception.h"
61#include "MagickCore/exception-private.h"
62#include "MagickCore/gem.h"
63#include "MagickCore/hashmap.h"
64#include "MagickCore/image.h"
65#include "MagickCore/image-private.h"
66#include "MagickCore/list.h"
67#include "MagickCore/memory_.h"
68#include "MagickCore/monitor-private.h"
69#include "MagickCore/accelerate.h"
70#include "MagickCore/option.h"
71#include "MagickCore/pixel-accessor.h"
72#include "MagickCore/prepress.h"
73#include "MagickCore/quantize.h"
74#include "MagickCore/registry.h"
75#include "MagickCore/semaphore.h"
76#include "MagickCore/splay-tree.h"
77#include "MagickCore/statistic.h"
78#include "MagickCore/string_.h"
79#include "MagickCore/string-private.h"
80#include "MagickCore/token.h"
cristy3f6d1482010-01-20 21:01:21 +000081
82/*
83%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84% %
85% %
86% %
87% A c c e l e r a t e C o n v o l v e I m a g e %
88% %
89% %
90% %
91%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
92%
93% AccelerateConvolveImage() applies a custom convolution kernel to the image.
94% It is accelerated by taking advantage of speed-ups offered by executing in
95% concert across heterogeneous platforms consisting of CPUs, GPUs, and other
96% processors.
97%
98% The format of the AccelerateConvolveImage method is:
99%
100% Image *AccelerateConvolveImage(const Image *image,
cristy2be15382010-01-21 02:38:03 +0000101% const KernelInfo *kernel,Image *convolve_image,
cristy3f6d1482010-01-20 21:01:21 +0000102% ExceptionInfo *exception)
103%
104% A description of each parameter follows:
105%
106% o image: the image.
107%
108% o kernel: the convolution kernel.
109%
110% o convole_image: the convoleed image.
111%
112% o exception: return any errors or warnings in this structure.
113%
114*/
cristyd43a46b2010-01-21 02:13:41 +0000115
116#if defined(MAGICKCORE_OPENCL_SUPPORT)
117
118#if defined(MAGICKCORE_HDRI_SUPPORT)
119#define CLOptions "-DMAGICKCORE_HDRI_SUPPORT=1 -DCLQuantum=float " \
120 "-DCLPixelType=float4 -DQuantumRange=%g -DMagickEpsilon=%g"
cristy101ab702011-10-13 13:06:32 +0000121#define CLPixelInfo cl_float4
cristyd43a46b2010-01-21 02:13:41 +0000122#else
123#if (MAGICKCORE_QUANTUM_DEPTH == 8)
124#define CLOptions "-DCLQuantum=uchar -DCLPixelType=uchar4 " \
125 "-DQuantumRange=%g -DMagickEpsilon=%g"
cristy101ab702011-10-13 13:06:32 +0000126#define CLPixelInfo cl_uchar4
cristyd43a46b2010-01-21 02:13:41 +0000127#elif (MAGICKCORE_QUANTUM_DEPTH == 16)
128#define CLOptions "-DCLQuantum=ushort -DCLPixelType=ushort4 " \
129 "-DQuantumRange=%g -DMagickEpsilon=%g"
cristy101ab702011-10-13 13:06:32 +0000130#define CLPixelInfo cl_ushort4
cristyd43a46b2010-01-21 02:13:41 +0000131#elif (MAGICKCORE_QUANTUM_DEPTH == 32)
132#define CLOptions "-DCLQuantum=uint -DCLPixelType=uint4 " \
133 "-DQuantumRange=%g -DMagickEpsilon=%g"
cristy101ab702011-10-13 13:06:32 +0000134#define CLPixelInfo cl_uint4
cristy4434d7b2011-09-01 18:19:57 +0000135#elif (MAGICKCORE_QUANTUM_DEPTH == 64)
cristybb503372010-05-27 20:51:26 +0000136#define CLOptions "-DCLQuantum=ussize_t -DCLPixelType=ussize_t4 " \
cristyd43a46b2010-01-21 02:13:41 +0000137 "-DQuantumRange=%g -DMagickEpsilon=%g"
cristy101ab702011-10-13 13:06:32 +0000138#define CLPixelInfo cl_ulong4
cristyd43a46b2010-01-21 02:13:41 +0000139#endif
140#endif
141
142typedef struct _ConvolveInfo
143{
144 cl_context
145 context;
146
147 cl_device_id
148 *devices;
149
150 cl_command_queue
151 command_queue;
152
153 cl_kernel
154 kernel;
155
156 cl_program
157 program;
158
159 cl_mem
160 pixels,
161 convolve_pixels;
162
cristy5f959472010-05-27 22:19:46 +0000163 cl_ulong
cristyd43a46b2010-01-21 02:13:41 +0000164 width,
165 height;
166
cristy966032e2011-09-12 19:12:00 +0000167 cl_uint
cristyd43a46b2010-01-21 02:13:41 +0000168 matte;
169
170 cl_mem
171 filter;
172} ConvolveInfo;
173
cristy1daf0e82011-09-26 18:10:05 +0000174static const char
cristyd43a46b2010-01-21 02:13:41 +0000175 *ConvolveKernel =
cristy5f959472010-05-27 22:19:46 +0000176 "static inline long ClampToCanvas(const long offset,const unsigned long range)\n"
cristyd43a46b2010-01-21 02:13:41 +0000177 "{\n"
178 " if (offset < 0L)\n"
179 " return(0L);\n"
180 " if (offset >= range)\n"
cristy5f959472010-05-27 22:19:46 +0000181 " return((long) (range-1L));\n"
cristyd43a46b2010-01-21 02:13:41 +0000182 " return(offset);\n"
183 "}\n"
184 "\n"
cristy1a2e2762011-09-13 00:31:55 +0000185 "static inline CLQuantum ClampToQuantum(const float value)\n"
cristyd43a46b2010-01-21 02:13:41 +0000186 "{\n"
187 "#if defined(MAGICKCORE_HDRI_SUPPORT)\n"
cristyc9ab8902012-01-08 13:44:42 +0000188 " return((CLQuantum) value);\n"
cristyd43a46b2010-01-21 02:13:41 +0000189 "#else\n"
190 " if (value < 0.0)\n"
191 " return((CLQuantum) 0);\n"
cristy1a2e2762011-09-13 00:31:55 +0000192 " if (value >= (float) QuantumRange)\n"
cristyd43a46b2010-01-21 02:13:41 +0000193 " return((CLQuantum) QuantumRange);\n"
194 " return((CLQuantum) (value+0.5));\n"
195 "#endif\n"
196 "}\n"
197 "\n"
cristy2d5be002012-08-05 12:11:45 +0000198 "static inline float MagickEpsilonReciprocal(const float x)\n"
199 "{\n"
200 " float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;\n"
201 " return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/\n"
202 " MagickEpsilon));\n"
203 "}\n"
204 "\n"
cristyd43a46b2010-01-21 02:13:41 +0000205 "__kernel void Convolve(const __global CLPixelType *input,\n"
cristy1a2e2762011-09-13 00:31:55 +0000206 " __constant float *filter,const unsigned long width,const unsigned long height,\n"
cristy966032e2011-09-12 19:12:00 +0000207 " const unsigned int matte,__global CLPixelType *output)\n"
cristyd43a46b2010-01-21 02:13:41 +0000208 "{\n"
cristy5f959472010-05-27 22:19:46 +0000209 " const unsigned long columns = get_global_size(0);\n"
210 " const unsigned long rows = get_global_size(1);\n"
cristyd43a46b2010-01-21 02:13:41 +0000211 "\n"
cristy5f959472010-05-27 22:19:46 +0000212 " const long x = get_global_id(0);\n"
213 " const long y = get_global_id(1);\n"
cristyd43a46b2010-01-21 02:13:41 +0000214 "\n"
cristy1a2e2762011-09-13 00:31:55 +0000215 " const float scale = (1.0/QuantumRange);\n"
cristy5f959472010-05-27 22:19:46 +0000216 " const long mid_width = (width-1)/2;\n"
217 " const long mid_height = (height-1)/2;\n"
cristy1a2e2762011-09-13 00:31:55 +0000218 " float4 sum = { 0.0, 0.0, 0.0, 0.0 };\n"
219 " float gamma = 0.0;\n"
cristy5f959472010-05-27 22:19:46 +0000220 " register unsigned long i = 0;\n"
cristyd43a46b2010-01-21 02:13:41 +0000221 "\n"
222 " int method = 0;\n"
223 " if (matte != false)\n"
224 " method=1;\n"
225 " if ((x >= width) && (x < (columns-width-1)) &&\n"
226 " (y >= height) && (y < (rows-height-1)))\n"
227 " {\n"
228 " method=2;\n"
229 " if (matte != false)\n"
230 " method=3;\n"
231 " }\n"
232 " switch (method)\n"
233 " {\n"
234 " case 0:\n"
235 " {\n"
cristy5f959472010-05-27 22:19:46 +0000236 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000237 " {\n"
cristy5f959472010-05-27 22:19:46 +0000238 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000239 " {\n"
cristy5f959472010-05-27 22:19:46 +0000240 " const long index=ClampToCanvas(y+v,rows)*columns+\n"
cristyd43a46b2010-01-21 02:13:41 +0000241 " ClampToCanvas(x+u,columns);\n"
242 " sum.x+=filter[i]*input[index].x;\n"
243 " sum.y+=filter[i]*input[index].y;\n"
244 " sum.z+=filter[i]*input[index].z;\n"
245 " gamma+=filter[i];\n"
246 " i++;\n"
247 " }\n"
248 " }\n"
249 " break;\n"
250 " }\n"
251 " case 1:\n"
252 " {\n"
cristy5f959472010-05-27 22:19:46 +0000253 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000254 " {\n"
cristy5f959472010-05-27 22:19:46 +0000255 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000256 " {\n"
cristy5f959472010-05-27 22:19:46 +0000257 " const unsigned long index=ClampToCanvas(y+v,rows)*columns+\n"
cristyd43a46b2010-01-21 02:13:41 +0000258 " ClampToCanvas(x+u,columns);\n"
cristy1a2e2762011-09-13 00:31:55 +0000259 " const float alpha=scale*input[index].w;\n"
cristyd43a46b2010-01-21 02:13:41 +0000260 " sum.x+=alpha*filter[i]*input[index].x;\n"
261 " sum.y+=alpha*filter[i]*input[index].y;\n"
262 " sum.z+=alpha*filter[i]*input[index].z;\n"
263 " sum.w+=filter[i]*input[index].w;\n"
264 " gamma+=alpha*filter[i];\n"
265 " i++;\n"
266 " }\n"
267 " }\n"
268 " break;\n"
269 " }\n"
270 " case 2:\n"
271 " {\n"
cristy5f959472010-05-27 22:19:46 +0000272 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000273 " {\n"
cristy5f959472010-05-27 22:19:46 +0000274 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000275 " {\n"
cristy5f959472010-05-27 22:19:46 +0000276 " const unsigned long index=(y+v)*columns+(x+u);\n"
cristyd43a46b2010-01-21 02:13:41 +0000277 " sum.x+=filter[i]*input[index].x;\n"
278 " sum.y+=filter[i]*input[index].y;\n"
279 " sum.z+=filter[i]*input[index].z;\n"
280 " gamma+=filter[i];\n"
281 " i++;\n"
282 " }\n"
283 " }\n"
284 " break;\n"
285 " }\n"
286 " case 3:\n"
287 " {\n"
cristy5f959472010-05-27 22:19:46 +0000288 " for (long v=(-mid_height); v <= mid_height; v++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000289 " {\n"
cristy5f959472010-05-27 22:19:46 +0000290 " for (long u=(-mid_width); u <= mid_width; u++)\n"
cristyd43a46b2010-01-21 02:13:41 +0000291 " {\n"
cristy5f959472010-05-27 22:19:46 +0000292 " const unsigned long index=(y+v)*columns+(x+u);\n"
cristy1a2e2762011-09-13 00:31:55 +0000293 " const float alpha=scale*input[index].w;\n"
cristyd43a46b2010-01-21 02:13:41 +0000294 " sum.x+=alpha*filter[i]*input[index].x;\n"
295 " sum.y+=alpha*filter[i]*input[index].y;\n"
296 " sum.z+=alpha*filter[i]*input[index].z;\n"
297 " sum.w+=filter[i]*input[index].w;\n"
298 " gamma+=alpha*filter[i];\n"
299 " i++;\n"
300 " }\n"
301 " }\n"
302 " break;\n"
303 " }\n"
304 " }\n"
cristy2d5be002012-08-05 12:11:45 +0000305 " gamma=MagickEpsilonReciprocal(gamma);\n"
cristy5f959472010-05-27 22:19:46 +0000306 " const unsigned long index = y*columns+x;\n"
cristyd43a46b2010-01-21 02:13:41 +0000307 " output[index].x=ClampToQuantum(gamma*sum.x);\n"
308 " output[index].y=ClampToQuantum(gamma*sum.y);\n"
309 " output[index].z=ClampToQuantum(gamma*sum.z);\n"
310 " if (matte == false)\n"
311 " output[index].w=input[index].w;\n"
312 " else\n"
313 " output[index].w=ClampToQuantum(sum.w);\n"
314 "}\n";
315
316static void ConvolveNotify(const char *message,const void *data,size_t length,
317 void *user_context)
318{
319 ExceptionInfo
320 *exception;
321
322 (void) data;
323 (void) length;
324 exception=(ExceptionInfo *) user_context;
cristy32cca402010-01-23 04:02:23 +0000325 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
anthonye5b39652012-04-21 05:37:29 +0000326 "DelegateFailed","'%s'",message);
cristyd43a46b2010-01-21 02:13:41 +0000327}
328
329static MagickBooleanType BindConvolveParameters(ConvolveInfo *convolve_info,
cristy1a2e2762011-09-13 00:31:55 +0000330 const Image *image,const void *pixels,float *filter,const size_t width,
cristyc8523c12011-09-13 00:02:53 +0000331 const size_t height,void *convolve_pixels)
cristyd43a46b2010-01-21 02:13:41 +0000332{
333 cl_int
334 status;
335
cristy5f959472010-05-27 22:19:46 +0000336 register cl_uint
cristyd43a46b2010-01-21 02:13:41 +0000337 i;
338
339 size_t
340 length;
341
342 /*
343 Allocate OpenCL buffers.
344 */
345 length=image->columns*image->rows;
cristy5f959472010-05-27 22:19:46 +0000346 convolve_info->pixels=clCreateBuffer(convolve_info->context,(cl_mem_flags)
cristy101ab702011-10-13 13:06:32 +0000347 (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(CLPixelInfo),
cristy5f959472010-05-27 22:19:46 +0000348 (void *) pixels,&status);
cristyd43a46b2010-01-21 02:13:41 +0000349 if ((convolve_info->pixels == (cl_mem) NULL) || (status != CL_SUCCESS))
350 return(MagickFalse);
351 length=width*height;
cristy5f959472010-05-27 22:19:46 +0000352 convolve_info->filter=clCreateBuffer(convolve_info->context,(cl_mem_flags)
cristy1a2e2762011-09-13 00:31:55 +0000353 (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(cl_float),filter,
cristy5f959472010-05-27 22:19:46 +0000354 &status);
cristyd43a46b2010-01-21 02:13:41 +0000355 if ((convolve_info->filter == (cl_mem) NULL) || (status != CL_SUCCESS))
356 return(MagickFalse);
357 length=image->columns*image->rows;
358 convolve_info->convolve_pixels=clCreateBuffer(convolve_info->context,
cristy5f959472010-05-27 22:19:46 +0000359 (cl_mem_flags) (CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR),length*
cristy101ab702011-10-13 13:06:32 +0000360 sizeof(CLPixelInfo),convolve_pixels,&status);
cristyd43a46b2010-01-21 02:13:41 +0000361 if ((convolve_info->convolve_pixels == (cl_mem) NULL) ||
362 (status != CL_SUCCESS))
363 return(MagickFalse);
364 /*
365 Bind OpenCL buffers.
366 */
367 i=0;
368 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
369 &convolve_info->pixels);
370 if (status != CL_SUCCESS)
371 return(MagickFalse);
372 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
373 &convolve_info->filter);
374 if (status != CL_SUCCESS)
375 return(MagickFalse);
cristy5f959472010-05-27 22:19:46 +0000376 convolve_info->width=(cl_ulong) width;
377 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *)
cristyd43a46b2010-01-21 02:13:41 +0000378 &convolve_info->width);
379 if (status != CL_SUCCESS)
380 return(MagickFalse);
cristy5f959472010-05-27 22:19:46 +0000381 convolve_info->height=(cl_ulong) height;
382 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *)
cristyd43a46b2010-01-21 02:13:41 +0000383 &convolve_info->height);
384 if (status != CL_SUCCESS)
385 return(MagickFalse);
cristy644d5d02012-08-29 11:20:44 +0000386 convolve_info->matte=(cl_uint) image->alpha_trait == BlendPixelTrait ?
387 MagickTrue : MagickFalse;
cristy966032e2011-09-12 19:12:00 +0000388 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_uint),(void *)
cristy644d5d02012-08-29 11:20:44 +0000389 &convolve_info->matte);
cristyd43a46b2010-01-21 02:13:41 +0000390 if (status != CL_SUCCESS)
391 return(MagickFalse);
392 status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *)
393 &convolve_info->convolve_pixels);
394 if (status != CL_SUCCESS)
395 return(MagickFalse);
396 status=clFinish(convolve_info->command_queue);
397 if (status != CL_SUCCESS)
398 return(MagickFalse);
399 return(MagickTrue);
400}
401
402static void DestroyConvolveBuffers(ConvolveInfo *convolve_info)
403{
404 cl_int
405 status;
406
cristy9f027d12011-09-21 01:17:17 +0000407 status=0;
cristyd43a46b2010-01-21 02:13:41 +0000408 if (convolve_info->convolve_pixels != (cl_mem) NULL)
409 status=clReleaseMemObject(convolve_info->convolve_pixels);
410 if (convolve_info->pixels != (cl_mem) NULL)
411 status=clReleaseMemObject(convolve_info->pixels);
412 if (convolve_info->filter != (cl_mem) NULL)
413 status=clReleaseMemObject(convolve_info->filter);
cristyaa83c2c2011-09-21 13:36:25 +0000414 (void) status;
cristyd43a46b2010-01-21 02:13:41 +0000415}
416
417static ConvolveInfo *DestroyConvolveInfo(ConvolveInfo *convolve_info)
418{
419 cl_int
420 status;
421
cristy9f027d12011-09-21 01:17:17 +0000422 status=0;
cristyd43a46b2010-01-21 02:13:41 +0000423 if (convolve_info->kernel != (cl_kernel) NULL)
424 status=clReleaseKernel(convolve_info->kernel);
425 if (convolve_info->program != (cl_program) NULL)
426 status=clReleaseProgram(convolve_info->program);
427 if (convolve_info->command_queue != (cl_command_queue) NULL)
428 status=clReleaseCommandQueue(convolve_info->command_queue);
429 if (convolve_info->context != (cl_context) NULL)
430 status=clReleaseContext(convolve_info->context);
cristyaa83c2c2011-09-21 13:36:25 +0000431 (void) status;
cristyd43a46b2010-01-21 02:13:41 +0000432 convolve_info=(ConvolveInfo *) RelinquishMagickMemory(convolve_info);
433 return(convolve_info);
434}
435
436static MagickBooleanType EnqueueConvolveKernel(ConvolveInfo *convolve_info,
cristy1a2e2762011-09-13 00:31:55 +0000437 const Image *image,const void *pixels,float *filter,const size_t width,
cristyc8523c12011-09-13 00:02:53 +0000438 const size_t height,void *convolve_pixels)
cristyd43a46b2010-01-21 02:13:41 +0000439{
440 cl_int
441 status;
442
443 size_t
444 global_work_size[2],
445 length;
446
447 length=image->columns*image->rows;
448 status=clEnqueueWriteBuffer(convolve_info->command_queue,
cristy101ab702011-10-13 13:06:32 +0000449 convolve_info->pixels,CL_TRUE,0,length*sizeof(CLPixelInfo),pixels,0,NULL,
cristyd43a46b2010-01-21 02:13:41 +0000450 NULL);
451 length=width*height;
452 status=clEnqueueWriteBuffer(convolve_info->command_queue,
cristy1a2e2762011-09-13 00:31:55 +0000453 convolve_info->filter,CL_TRUE,0,length*sizeof(cl_float),filter,0,NULL,
cristyd43a46b2010-01-21 02:13:41 +0000454 NULL);
455 if (status != CL_SUCCESS)
456 return(MagickFalse);
457 global_work_size[0]=image->columns;
458 global_work_size[1]=image->rows;
459 status=clEnqueueNDRangeKernel(convolve_info->command_queue,
460 convolve_info->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
461 if (status != CL_SUCCESS)
462 return(MagickFalse);
463 length=image->columns*image->rows;
464 status=clEnqueueReadBuffer(convolve_info->command_queue,
cristy101ab702011-10-13 13:06:32 +0000465 convolve_info->convolve_pixels,CL_TRUE,0,length*sizeof(CLPixelInfo),
cristyd43a46b2010-01-21 02:13:41 +0000466 convolve_pixels,0,NULL,NULL);
467 if (status != CL_SUCCESS)
468 return(MagickFalse);
469 status=clFinish(convolve_info->command_queue);
470 if (status != CL_SUCCESS)
471 return(MagickFalse);
472 return(MagickTrue);
473}
474
475static ConvolveInfo *GetConvolveInfo(const Image *image,const char *name,
476 const char *source,ExceptionInfo *exception)
477{
478 char
479 options[MaxTextExtent];
480
cristy966032e2011-09-12 19:12:00 +0000481 cl_context_properties
482 context_properties[3];
483
cristyd43a46b2010-01-21 02:13:41 +0000484 cl_int
485 status;
486
cristy966032e2011-09-12 19:12:00 +0000487 cl_platform_id
488 platforms[1];
489
490 cl_uint
491 number_platforms;
492
cristyd43a46b2010-01-21 02:13:41 +0000493 ConvolveInfo
494 *convolve_info;
495
496 size_t
497 length,
498 lengths[] = { strlen(source) };
499
500 /*
501 Create OpenCL info.
502 */
cristy73bd4a52010-10-05 11:24:23 +0000503 convolve_info=(ConvolveInfo *) AcquireMagickMemory(sizeof(*convolve_info));
cristyd43a46b2010-01-21 02:13:41 +0000504 if (convolve_info == (ConvolveInfo *) NULL)
505 {
506 (void) ThrowMagickException(exception,GetMagickModule(),
anthonye5b39652012-04-21 05:37:29 +0000507 ResourceLimitError,"MemoryAllocationFailed","'%s'",image->filename);
cristyd43a46b2010-01-21 02:13:41 +0000508 return((ConvolveInfo *) NULL);
509 }
510 (void) ResetMagickMemory(convolve_info,0,sizeof(*convolve_info));
511 /*
512 Create OpenCL context.
513 */
cristy61b76e62011-09-13 12:04:12 +0000514 status=clGetPlatformIDs(0,(cl_platform_id *) NULL,&number_platforms);
515 if ((status == CL_SUCCESS) && (number_platforms > 0))
cristy966032e2011-09-12 19:12:00 +0000516 status=clGetPlatformIDs(1,platforms,NULL);
517 if (status != CL_SUCCESS)
518 {
519 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
anthonye5b39652012-04-21 05:37:29 +0000520 "failed to create OpenCL context","'%s' (%d)",image->filename,status);
cristy966032e2011-09-12 19:12:00 +0000521 convolve_info=DestroyConvolveInfo(convolve_info);
522 return((ConvolveInfo *) NULL);
523 }
524 context_properties[0]=CL_CONTEXT_PLATFORM;
525 context_properties[1]=(cl_context_properties) platforms[0];
526 context_properties[2]=0;
527 convolve_info->context=clCreateContextFromType(context_properties,
528 (cl_device_type) CL_DEVICE_TYPE_GPU,ConvolveNotify,exception,&status);
cristyd43a46b2010-01-21 02:13:41 +0000529 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
cristy966032e2011-09-12 19:12:00 +0000530 convolve_info->context=clCreateContextFromType(context_properties,
531 (cl_device_type) CL_DEVICE_TYPE_CPU,ConvolveNotify,exception,&status);
cristyd43a46b2010-01-21 02:13:41 +0000532 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
cristy966032e2011-09-12 19:12:00 +0000533 convolve_info->context=clCreateContextFromType(context_properties,
534 (cl_device_type) CL_DEVICE_TYPE_DEFAULT,ConvolveNotify,exception,&status);
cristyd43a46b2010-01-21 02:13:41 +0000535 if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS))
536 {
cristy32cca402010-01-23 04:02:23 +0000537 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
anthonye5b39652012-04-21 05:37:29 +0000538 "failed to create OpenCL context","'%s' (%d)",image->filename,status);
cristy5f959472010-05-27 22:19:46 +0000539 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000540 return((ConvolveInfo *) NULL);
541 }
542 /*
543 Detect OpenCL devices.
544 */
545 status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,0,NULL,
546 &length);
547 if ((status != CL_SUCCESS) || (length == 0))
548 {
cristy5f959472010-05-27 22:19:46 +0000549 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000550 return((ConvolveInfo *) NULL);
551 }
552 convolve_info->devices=(cl_device_id *) AcquireMagickMemory(length);
553 if (convolve_info->devices == (cl_device_id *) NULL)
554 {
555 (void) ThrowMagickException(exception,GetMagickModule(),
anthonye5b39652012-04-21 05:37:29 +0000556 ResourceLimitError,"MemoryAllocationFailed","'%s'",image->filename);
cristy5f959472010-05-27 22:19:46 +0000557 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000558 return((ConvolveInfo *) NULL);
559 }
560 status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,length,
561 convolve_info->devices,NULL);
562 if (status != CL_SUCCESS)
563 {
cristy5f959472010-05-27 22:19:46 +0000564 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000565 return((ConvolveInfo *) NULL);
566 }
cristydc9c80d2011-10-26 23:47:02 +0000567 if (image->debug != MagickFalse)
568 {
569 char
570 attribute[MaxTextExtent];
571
572 size_t
573 length;
574
575 clGetDeviceInfo(convolve_info->devices[0],CL_DEVICE_NAME,
576 sizeof(attribute),attribute,&length);
577 (void) LogMagickEvent(AccelerateEvent,GetMagickModule(),"Name: %s",
578 attribute);
579 clGetDeviceInfo(convolve_info->devices[0],CL_DEVICE_VENDOR,
580 sizeof(attribute),attribute,&length);
581 (void) LogMagickEvent(AccelerateEvent,GetMagickModule(),"Vendor: %s",
582 attribute);
583 clGetDeviceInfo(convolve_info->devices[0],CL_DEVICE_VERSION,
584 sizeof(attribute),attribute,&length);
585 (void) LogMagickEvent(AccelerateEvent,GetMagickModule(),
586 "Driver Version: %s",attribute);
587 clGetDeviceInfo(convolve_info->devices[0],CL_DEVICE_PROFILE,
588 sizeof(attribute),attribute,&length);
589 (void) LogMagickEvent(AccelerateEvent,GetMagickModule(),"Profile: %s",
590 attribute);
591 clGetDeviceInfo(convolve_info->devices[0],CL_DRIVER_VERSION,
592 sizeof(attribute),attribute,&length);
593 (void) LogMagickEvent(AccelerateEvent,GetMagickModule(),"Driver: %s",
594 attribute);
595 clGetDeviceInfo(convolve_info->devices[0],CL_DEVICE_EXTENSIONS,
596 sizeof(attribute),attribute,&length);
597 (void) LogMagickEvent(AccelerateEvent,GetMagickModule(),"Extensions: %s",
598 attribute);
599 }
cristyd43a46b2010-01-21 02:13:41 +0000600 /*
601 Create OpenCL command queue.
602 */
603 convolve_info->command_queue=clCreateCommandQueue(convolve_info->context,
604 convolve_info->devices[0],0,&status);
605 if ((convolve_info->command_queue == (cl_command_queue) NULL) ||
606 (status != CL_SUCCESS))
607 {
cristy5f959472010-05-27 22:19:46 +0000608 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000609 return((ConvolveInfo *) NULL);
610 }
611 /*
612 Build OpenCL program.
613 */
614 convolve_info->program=clCreateProgramWithSource(convolve_info->context,1,
615 &source,lengths,&status);
616 if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS))
617 {
cristy5f959472010-05-27 22:19:46 +0000618 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000619 return((ConvolveInfo *) NULL);
620 }
cristy1a2e2762011-09-13 00:31:55 +0000621 (void) FormatLocaleString(options,MaxTextExtent,CLOptions,(float)
cristyd43a46b2010-01-21 02:13:41 +0000622 QuantumRange,MagickEpsilon);
cristy00243d12010-01-21 02:45:27 +0000623 status=clBuildProgram(convolve_info->program,1,convolve_info->devices,options,
624 NULL,NULL);
cristyd43a46b2010-01-21 02:13:41 +0000625 if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS))
626 {
627 char
628 *log;
629
630 status=clGetProgramBuildInfo(convolve_info->program,
631 convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,0,NULL,&length);
632 log=(char *) AcquireMagickMemory(length);
633 if (log == (char *) NULL)
634 {
cristy5f959472010-05-27 22:19:46 +0000635 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000636 return((ConvolveInfo *) NULL);
637 }
638 status=clGetProgramBuildInfo(convolve_info->program,
639 convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,length,log,&length);
cristy32cca402010-01-23 04:02:23 +0000640 (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning,
anthonye5b39652012-04-21 05:37:29 +0000641 "failed to build OpenCL program","'%s' (%s)",image->filename,log);
cristyd43a46b2010-01-21 02:13:41 +0000642 log=DestroyString(log);
cristy5f959472010-05-27 22:19:46 +0000643 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000644 return((ConvolveInfo *) NULL);
645 }
646 /*
647 Get a kernel object.
648 */
649 convolve_info->kernel=clCreateKernel(convolve_info->program,name,&status);
650 if ((convolve_info->kernel == (cl_kernel) NULL) || (status != CL_SUCCESS))
651 {
cristy5f959472010-05-27 22:19:46 +0000652 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000653 return((ConvolveInfo *) NULL);
654 }
655 return(convolve_info);
656}
657
658#endif
659
cristy3f6d1482010-01-20 21:01:21 +0000660MagickExport MagickBooleanType AccelerateConvolveImage(const Image *image,
cristy2be15382010-01-21 02:38:03 +0000661 const KernelInfo *kernel,Image *convolve_image,ExceptionInfo *exception)
cristy3f6d1482010-01-20 21:01:21 +0000662{
663 assert(image != (Image *) NULL);
664 assert(image->signature == MagickSignature);
665 if (image->debug != MagickFalse)
666 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
cristy2be15382010-01-21 02:38:03 +0000667 assert(kernel != (KernelInfo *) NULL);
cristyd43a46b2010-01-21 02:13:41 +0000668 assert(kernel->signature == MagickSignature);
cristy3f6d1482010-01-20 21:01:21 +0000669 assert(convolve_image != (Image *) NULL);
670 assert(convolve_image->signature == MagickSignature);
671 assert(exception != (ExceptionInfo *) NULL);
672 assert(exception->signature == MagickSignature);
cristy394651a2010-01-23 21:05:55 +0000673 if ((image->storage_class != DirectClass) ||
674 (image->colorspace == CMYKColorspace))
cristybfa02fa2011-09-13 00:40:44 +0000675 return(MagickFalse);
cristyd43a46b2010-01-21 02:13:41 +0000676 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
677 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
678 return(MagickFalse);
cristyb572edf2011-09-13 12:25:22 +0000679 if (GetPixelChannels(image) != 4)
680 return(MagickFalse);
cristyd43a46b2010-01-21 02:13:41 +0000681#if !defined(MAGICKCORE_OPENCL_SUPPORT)
cristy3f6d1482010-01-20 21:01:21 +0000682 return(MagickFalse);
cristyd43a46b2010-01-21 02:13:41 +0000683#else
684 {
685 const void
686 *pixels;
687
cristy1a2e2762011-09-13 00:31:55 +0000688 float
689 *filter;
690
cristyd43a46b2010-01-21 02:13:41 +0000691 ConvolveInfo
692 *convolve_info;
693
694 MagickBooleanType
695 status;
696
697 MagickSizeType
698 length;
699
cristy1a2e2762011-09-13 00:31:55 +0000700 register ssize_t
701 i;
702
cristyd43a46b2010-01-21 02:13:41 +0000703 void
704 *convolve_pixels;
705
cristyd43a46b2010-01-21 02:13:41 +0000706 convolve_info=GetConvolveInfo(image,"Convolve",ConvolveKernel,exception);
707 if (convolve_info == (ConvolveInfo *) NULL)
708 return(MagickFalse);
709 pixels=AcquirePixelCachePixels(image,&length,exception);
710 if (pixels == (const void *) NULL)
711 {
cristy1a2e2762011-09-13 00:31:55 +0000712 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000713 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
anthonye5b39652012-04-21 05:37:29 +0000714 "UnableToReadPixelCache","'%s'",image->filename);
cristyd43a46b2010-01-21 02:13:41 +0000715 return(MagickFalse);
716 }
717 convolve_pixels=GetPixelCachePixels(convolve_image,&length,exception);
718 if (convolve_pixels == (void *) NULL)
719 {
cristy1a2e2762011-09-13 00:31:55 +0000720 convolve_info=DestroyConvolveInfo(convolve_info);
cristyd43a46b2010-01-21 02:13:41 +0000721 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
anthonye5b39652012-04-21 05:37:29 +0000722 "UnableToReadPixelCache","'%s'",image->filename);
cristyd43a46b2010-01-21 02:13:41 +0000723 return(MagickFalse);
724 }
cristy1a2e2762011-09-13 00:31:55 +0000725 filter=(float *) AcquireQuantumMemory(kernel->width,kernel->height*
726 sizeof(*filter));
727 if (filter == (float *) NULL)
728 {
729 DestroyConvolveBuffers(convolve_info);
730 convolve_info=DestroyConvolveInfo(convolve_info);
731 (void) ThrowMagickException(exception,GetMagickModule(),
anthonye5b39652012-04-21 05:37:29 +0000732 ResourceLimitError,"MemoryAllocationFailed","'%s'",image->filename);
cristy1a2e2762011-09-13 00:31:55 +0000733 return(MagickFalse);
734 }
735 for (i=0; i < (ssize_t) (kernel->width*kernel->height); i++)
736 filter[i]=(float) kernel->values[i];
737 status=BindConvolveParameters(convolve_info,image,pixels,filter,
cristyd43a46b2010-01-21 02:13:41 +0000738 kernel->width,kernel->height,convolve_pixels);
739 if (status == MagickFalse)
740 {
cristy1a2e2762011-09-13 00:31:55 +0000741 filter=(float *) RelinquishMagickMemory(filter);
cristyd43a46b2010-01-21 02:13:41 +0000742 DestroyConvolveBuffers(convolve_info);
743 convolve_info=DestroyConvolveInfo(convolve_info);
744 return(MagickFalse);
745 }
cristy1a2e2762011-09-13 00:31:55 +0000746 status=EnqueueConvolveKernel(convolve_info,image,pixels,filter,
cristyd43a46b2010-01-21 02:13:41 +0000747 kernel->width,kernel->height,convolve_pixels);
cristy1a2e2762011-09-13 00:31:55 +0000748 filter=(float *) RelinquishMagickMemory(filter);
cristyd43a46b2010-01-21 02:13:41 +0000749 if (status == MagickFalse)
750 {
751 DestroyConvolveBuffers(convolve_info);
752 convolve_info=DestroyConvolveInfo(convolve_info);
753 return(MagickFalse);
754 }
755 DestroyConvolveBuffers(convolve_info);
756 convolve_info=DestroyConvolveInfo(convolve_info);
757 return(MagickTrue);
758 }
759#endif
cristy3f6d1482010-01-20 21:01:21 +0000760}