blob: 4ce01e553e4aacfed0a86e5ca66c556dda190fd6 [file] [log] [blame]
cristy3f6d1482010-01-20 21:01:21 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7% A A C C E L E R R A A T E %
8% AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9% A A C C E L E R R A A T E %
10% A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11% %
12% %
13% MagickCore Acceleration Methods %
14% %
15% Software Design %
cristyde984cd2013-12-01 14:49:27 +000016% Cristy %
cristyf034abb2013-11-24 14:16:14 +000017% SiuChi Chan %
18% Guansong Zhang %
cristy3f6d1482010-01-20 21:01:21 +000019% January 2010 %
20% %
21% %
cristyfe676ee2013-11-18 13:03:38 +000022% Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization %
cristy3f6d1482010-01-20 21:01:21 +000023% dedicated to making software imaging solutions freely available. %
24% %
25% You may not use this file except in compliance with the License. You may %
26% obtain a copy of the License at %
27% %
28% http://www.imagemagick.org/script/license.php %
29% %
30% Unless required by applicable law or agreed to in writing, software %
31% distributed under the License is distributed on an "AS IS" BASIS, %
32% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
33% See the License for the specific language governing permissions and %
34% limitations under the License. %
35% %
36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cristy3f6d1482010-01-20 21:01:21 +000037*/
cristyf034abb2013-11-24 14:16:14 +000038
cristy3f6d1482010-01-20 21:01:21 +000039/*
cristyf034abb2013-11-24 14:16:14 +000040Include declarations.
cristy3f6d1482010-01-20 21:01:21 +000041*/
cristy4c08aed2011-07-01 19:47:50 +000042#include "MagickCore/studio.h"
43#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000044#include "MagickCore/accelerate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000045#include "MagickCore/artifact.h"
cristy35f33492011-07-07 16:54:49 +000046#include "MagickCore/cache.h"
cristyd1dd6e42011-09-04 01:46:08 +000047#include "MagickCore/cache-private.h"
cristy4c08aed2011-07-01 19:47:50 +000048#include "MagickCore/cache-view.h"
49#include "MagickCore/color-private.h"
cristy7f866842013-07-11 01:15:58 +000050#include "MagickCore/delegate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000051#include "MagickCore/enhance.h"
52#include "MagickCore/exception.h"
53#include "MagickCore/exception-private.h"
54#include "MagickCore/gem.h"
55#include "MagickCore/hashmap.h"
56#include "MagickCore/image.h"
57#include "MagickCore/image-private.h"
58#include "MagickCore/list.h"
59#include "MagickCore/memory_.h"
60#include "MagickCore/monitor-private.h"
61#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000062#include "MagickCore/opencl.h"
63#include "MagickCore/opencl-private.h"
cristy4c08aed2011-07-01 19:47:50 +000064#include "MagickCore/option.h"
cristyf034abb2013-11-24 14:16:14 +000065#include "MagickCore/pixel-private.h"
cristy4c08aed2011-07-01 19:47:50 +000066#include "MagickCore/prepress.h"
67#include "MagickCore/quantize.h"
cristye85d0f72013-11-27 02:25:43 +000068#include "MagickCore/random_.h"
69#include "MagickCore/random-private.h"
cristy4c08aed2011-07-01 19:47:50 +000070#include "MagickCore/registry.h"
cristyf034abb2013-11-24 14:16:14 +000071#include "MagickCore/resize.h"
72#include "MagickCore/resize-private.h"
cristy4c08aed2011-07-01 19:47:50 +000073#include "MagickCore/semaphore.h"
74#include "MagickCore/splay-tree.h"
75#include "MagickCore/statistic.h"
76#include "MagickCore/string_.h"
77#include "MagickCore/string-private.h"
78#include "MagickCore/token.h"
cristyf034abb2013-11-24 14:16:14 +000079
80#ifdef MAGICKCORE_CLPERFMARKER
81#include "CLPerfMarker.h"
82#endif
83
cristye85d0f72013-11-27 02:25:43 +000084#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
85#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
86
cristyf034abb2013-11-24 14:16:14 +000087#if defined(MAGICKCORE_OPENCL_SUPPORT)
88
89#define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0)
90/*#define ALIGNED(pointer,type) (0) */
91
92static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
93{
94 MagickBooleanType flag;
95
96 MagickCLEnv clEnv;
97 clEnv = GetDefaultOpenCLEnv();
cristya22457d2013-12-07 14:03:06 +000098
cristyf034abb2013-11-24 14:16:14 +000099 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
100 , sizeof(MagickBooleanType), &flag, exception);
101 if (flag == MagickTrue)
102 return MagickFalse;
103
104 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED
105 , sizeof(MagickBooleanType), &flag, exception);
106 if (flag == MagickFalse)
107 {
108 if(InitOpenCLEnv(clEnv, exception) == MagickFalse)
109 return MagickFalse;
cristya22457d2013-12-07 14:03:06 +0000110
111 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
112 , sizeof(MagickBooleanType), &flag, exception);
113 if (flag == MagickTrue)
114 return MagickFalse;
cristyf034abb2013-11-24 14:16:14 +0000115 }
116
117 return MagickTrue;
118}
119
120
dirk5dcb7622013-12-01 10:43:43 +0000121static MagickBooleanType checkAccelerateCondition(const Image* image, const ChannelType channel)
cristyf034abb2013-11-24 14:16:14 +0000122{
123 /* check if the image's colorspace is supported */
124 if (image->colorspace != RGBColorspace
125 && image->colorspace != sRGBColorspace)
126 return MagickFalse;
127
128 /* check if the channel is supported */
129 if (((channel&RedChannel) == 0)
130 || ((channel&GreenChannel) == 0)
131 || ((channel&BlueChannel) == 0))
132 {
133 return MagickFalse;
134 }
135
136
137 /* check if if the virtual pixel method is compatible with the OpenCL implementation */
138 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod)&&
139 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
140 return MagickFalse;
141
142 return MagickTrue;
143}
144
145
146static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
147{
148 MagickBooleanType outputReady;
149 MagickCLEnv clEnv;
150
151 cl_int clStatus;
152 size_t global_work_size[2];
153 size_t localGroupSize[2];
154 size_t localMemoryRequirement;
155 Image* filteredImage;
156 MagickSizeType length;
157 const void *inputPixels;
158 void *filteredPixels;
159 cl_mem_flags mem_flags;
160 float* kernelBufferPtr;
161 unsigned kernelSize;
162 unsigned int i;
163 void *hostPtr;
164 unsigned int matte, filterWidth, filterHeight, imageWidth, imageHeight;
165
166 cl_context context;
167 cl_kernel clkernel;
168 cl_mem inputImageBuffer, filteredImageBuffer, convolutionKernel;
169 cl_ulong deviceLocalMemorySize;
170 cl_device_id device;
171
172 cl_command_queue queue;
173
174 /* intialize all CL objects to NULL */
175 context = NULL;
176 inputImageBuffer = NULL;
177 filteredImageBuffer = NULL;
178 convolutionKernel = NULL;
179 clkernel = NULL;
180 queue = NULL;
181 device = NULL;
182
183 filteredImage = NULL;
184 outputReady = MagickFalse;
185
186 clEnv = GetDefaultOpenCLEnv();
187 context = GetOpenCLContext(clEnv);
188
189 inputPixels = NULL;
190 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
191 if (inputPixels == (const void *) NULL)
192 {
cristya22457d2013-12-07 14:03:06 +0000193 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000194 goto cleanup;
195 }
196
197 /* Create and initialize OpenCL buffers. */
198
199 /* If the host pointer is aligned to the size of CLPixelPacket,
200 then use the host buffer directly from the GPU; otherwise,
201 create a buffer on the GPU and copy the data over */
202 if (ALIGNED(inputPixels,CLPixelPacket))
203 {
204 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
205 }
206 else
207 {
208 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
209 }
210 /* create a CL buffer from image pixel buffer */
211 length = inputImage->columns * inputImage->rows;
212 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
213 if (clStatus != CL_SUCCESS)
214 {
cristya22457d2013-12-07 14:03:06 +0000215 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000216 goto cleanup;
217 }
218
219 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
220 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +0000221 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +0000222 {
cristya22457d2013-12-07 14:03:06 +0000223 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000224 goto cleanup;
225 }
226 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
227 if (filteredPixels == (void *) NULL)
228 {
cristya22457d2013-12-07 14:03:06 +0000229 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000230 goto cleanup;
231 }
232
233 if (ALIGNED(filteredPixels,CLPixelPacket))
234 {
235 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
236 hostPtr = filteredPixels;
237 }
238 else
239 {
240 mem_flags = CL_MEM_WRITE_ONLY;
241 hostPtr = NULL;
242 }
243 /* create a CL buffer from image pixel buffer */
244 length = inputImage->columns * inputImage->rows;
245 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
246 if (clStatus != CL_SUCCESS)
247 {
cristya22457d2013-12-07 14:03:06 +0000248 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000249 goto cleanup;
250 }
251
252 kernelSize = kernel->width * kernel->height;
253 convolutionKernel = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
254 if (clStatus != CL_SUCCESS)
255 {
cristya22457d2013-12-07 14:03:06 +0000256 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000257 goto cleanup;
258 }
259
260 queue = AcquireOpenCLCommandQueue(clEnv);
261
262 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
263 , 0, NULL, NULL, &clStatus);
264 if (clStatus != CL_SUCCESS)
265 {
cristya22457d2013-12-07 14:03:06 +0000266 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000267 goto cleanup;
268 }
269 for (i = 0; i < kernelSize; i++)
270 {
271 kernelBufferPtr[i] = (float) kernel->values[i];
272 }
273 clStatus = clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
274 if (clStatus != CL_SUCCESS)
275 {
cristya22457d2013-12-07 14:03:06 +0000276 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000277 goto cleanup;
278 }
279 clFlush(queue);
280
281 /* Compute the local memory requirement for a 16x16 workgroup.
282 If it's larger than 16k, reduce the workgroup size to 8x8 */
283 localGroupSize[0] = 16;
284 localGroupSize[1] = 16;
285 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
286 + kernel->width*kernel->height*sizeof(float);
287 if (localMemoryRequirement > 16384)
288 {
289
290
291 localGroupSize[0] = 8;
292 localGroupSize[1] = 8;
293
294 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
295 + kernel->width*kernel->height*sizeof(float);
296 }
297
298 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_DEVICE, sizeof(cl_device_id), &device, exception);
299 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &deviceLocalMemorySize, NULL);
300 if (localMemoryRequirement <= deviceLocalMemorySize)
301 {
302 /* get the OpenCL kernel */
cristya22457d2013-12-07 14:03:06 +0000303 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ConvolveOptimized");
cristyf034abb2013-11-24 14:16:14 +0000304 if (clkernel == NULL)
305 {
cristya22457d2013-12-07 14:03:06 +0000306 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000307 goto cleanup;
308 }
309
310 /* set the kernel arguments */
311 i = 0;
312 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
313 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
314 imageWidth = inputImage->columns;
315 imageHeight = inputImage->rows;
316 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
317 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
318 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
319 filterWidth = kernel->width;
320 filterHeight = kernel->height;
321 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
322 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
dirke19d0cc2013-12-01 10:07:42 +0000323 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +0000324 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
325 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
326 clStatus|=clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
327 clStatus|=clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
328 if (clStatus != CL_SUCCESS)
329 {
cristya22457d2013-12-07 14:03:06 +0000330 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000331 goto cleanup;
332 }
333
334 /* pad the global size to a multiple of the local work size dimension */
335 global_work_size[0] = ((inputImage->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
336 global_work_size[1] = ((inputImage->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
337
338 /* launch the kernel */
339 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
340 if (clStatus != CL_SUCCESS)
341 {
cristya22457d2013-12-07 14:03:06 +0000342 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000343 goto cleanup;
344 }
345 }
346 else
347 {
348 /* get the OpenCL kernel */
349 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
350 if (clkernel == NULL)
351 {
cristya22457d2013-12-07 14:03:06 +0000352 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000353 goto cleanup;
354 }
355
356 /* set the kernel arguments */
357 i = 0;
358 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
359 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
360 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
361 filterWidth = kernel->width;
362 filterHeight = kernel->height;
363 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
364 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
dirke19d0cc2013-12-01 10:07:42 +0000365 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +0000366 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
367 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
368 if (clStatus != CL_SUCCESS)
369 {
cristya22457d2013-12-07 14:03:06 +0000370 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000371 goto cleanup;
372 }
373
374 global_work_size[0] = inputImage->columns;
375 global_work_size[1] = inputImage->rows;
376
377 /* launch the kernel */
378 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
379 if (clStatus != CL_SUCCESS)
380 {
cristya22457d2013-12-07 14:03:06 +0000381 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000382 goto cleanup;
383 }
384 }
385 clFlush(queue);
386
387 if (ALIGNED(filteredPixels,CLPixelPacket))
388 {
389 length = inputImage->columns * inputImage->rows;
390 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
391 }
392 else
393 {
394 length = inputImage->columns * inputImage->rows;
395 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
396 }
397 if (clStatus != CL_SUCCESS)
398 {
cristya22457d2013-12-07 14:03:06 +0000399 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000400 goto cleanup;
401 }
402
403 /* everything is fine! :) */
404 outputReady = MagickTrue;
405
cristyf034abb2013-11-24 14:16:14 +0000406cleanup:
cristya22457d2013-12-07 14:03:06 +0000407 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +0000408
409 if (inputImageBuffer != NULL)
410 clReleaseMemObject(inputImageBuffer);
411
412 if (filteredImageBuffer != NULL)
413 clReleaseMemObject(filteredImageBuffer);
414
415 if (convolutionKernel != NULL)
416 clReleaseMemObject(convolutionKernel);
417
418 if (clkernel != NULL)
419 RelinquishOpenCLKernel(clEnv, clkernel);
420
421 if (queue != NULL)
422 RelinquishOpenCLCommandQueue(clEnv, queue);
423
424 if (outputReady == MagickFalse)
425 {
426 if (filteredImage != NULL)
427 {
428 DestroyImage(filteredImage);
429 filteredImage = NULL;
430 }
431 }
432
433 return filteredImage;
434}
435
cristy3f6d1482010-01-20 21:01:21 +0000436/*
437%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
438% %
439% %
440% %
cristyf034abb2013-11-24 14:16:14 +0000441% C o n v o l v e I m a g e w i t h O p e n C L %
cristy3f6d1482010-01-20 21:01:21 +0000442% %
443% %
444% %
445%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
446%
cristyf034abb2013-11-24 14:16:14 +0000447% ConvolveImage() applies a custom convolution kernel to the image.
cristy3f6d1482010-01-20 21:01:21 +0000448%
cristyf034abb2013-11-24 14:16:14 +0000449% The format of the ConvolveImage method is:
cristy3f6d1482010-01-20 21:01:21 +0000450%
cristyf034abb2013-11-24 14:16:14 +0000451% Image *ConvolveImage(const Image *image,const size_t order,
452% const double *kernel,ExceptionInfo *exception)
453% Image *ConvolveImageChannel(const Image *image,const ChannelType channel,
454% const size_t order,const double *kernel,ExceptionInfo *exception)
455%
456% A description of each parameter follows:
457%
458% o image: the image.
459%
460% o channel: the channel type.
461%
462% o kernel: kernel info.
463%
464% o exception: return any errors or warnings in this structure.
465%
466*/
467
468MagickExport Image* AccelerateConvolveImageChannel(const Image *image, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
469{
470 MagickBooleanType status;
471 Image* filteredImage = NULL;
472
473 assert(image != NULL);
474 assert(kernel != (KernelInfo *) NULL);
475 assert(exception != (ExceptionInfo *) NULL);
476
477 status = checkOpenCLEnvironment(exception);
478 if (status == MagickFalse)
479 return NULL;
480
dirk5dcb7622013-12-01 10:43:43 +0000481 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +0000482 if (status == MagickFalse)
483 return NULL;
484
485 filteredImage = ComputeConvolveImage(image, channel, kernel, exception);
cristyf034abb2013-11-24 14:16:14 +0000486 return filteredImage;
487}
488
489static MagickBooleanType ComputeFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
490 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
491{
492 MagickBooleanType status;
493
494 MagickCLEnv clEnv;
495
496 MagickSizeType length;
497 void* pixels;
498 float* parametersBufferPtr;
499
500 cl_int clStatus;
501 cl_context context;
502 cl_kernel clkernel;
503 cl_command_queue queue;
504 cl_mem_flags mem_flags;
505 cl_mem imageBuffer;
506 cl_mem parametersBuffer;
507 size_t globalWorkSize[2];
508
509 unsigned int i;
510
511 status = MagickFalse;
512
513 context = NULL;
514 clkernel = NULL;
515 queue = NULL;
516 imageBuffer = NULL;
517 parametersBuffer = NULL;
518
519 clEnv = GetDefaultOpenCLEnv();
520 context = GetOpenCLContext(clEnv);
521
522 pixels = GetPixelCachePixels(image, &length, exception);
523 if (pixels == (void *) NULL)
524 {
cristya22457d2013-12-07 14:03:06 +0000525 (void) OpenCLThrowMagickException(exception, GetMagickModule(), CacheWarning,
cristyf034abb2013-11-24 14:16:14 +0000526 "GetPixelCachePixels failed.",
527 "'%s'", image->filename);
528 goto cleanup;
529 }
530
531
532 if (ALIGNED(pixels,CLPixelPacket))
533 {
534 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
535 }
536 else
537 {
538 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
539 }
540 /* create a CL buffer from image pixel buffer */
541 length = image->columns * image->rows;
542 imageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
543 if (clStatus != CL_SUCCESS)
544 {
cristya22457d2013-12-07 14:03:06 +0000545 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000546 goto cleanup;
547 }
548
549 parametersBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
550 if (clStatus != CL_SUCCESS)
551 {
cristya22457d2013-12-07 14:03:06 +0000552 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000553 goto cleanup;
554 }
555
556 queue = AcquireOpenCLCommandQueue(clEnv);
557
558 parametersBufferPtr = (float*)clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
559 , 0, NULL, NULL, &clStatus);
560 if (clStatus != CL_SUCCESS)
561 {
cristya22457d2013-12-07 14:03:06 +0000562 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000563 goto cleanup;
564 }
565 for (i = 0; i < number_parameters; i++)
566 {
567 parametersBufferPtr[i] = (float)parameters[i];
568 }
569 clStatus = clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
570 if (clStatus != CL_SUCCESS)
571 {
cristya22457d2013-12-07 14:03:06 +0000572 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000573 goto cleanup;
574 }
575 clFlush(queue);
576
577 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage");
578 if (clkernel == NULL)
579 {
cristya22457d2013-12-07 14:03:06 +0000580 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000581 goto cleanup;
582 }
583
584 /* set the kernel arguments */
585 i = 0;
586 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
587 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
588 clStatus|=clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
589 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
590 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
591 if (clStatus != CL_SUCCESS)
592 {
cristya22457d2013-12-07 14:03:06 +0000593 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000594 goto cleanup;
595 }
596
597 globalWorkSize[0] = image->columns;
598 globalWorkSize[1] = image->rows;
599 /* launch the kernel */
600 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
601 if (clStatus != CL_SUCCESS)
602 {
cristya22457d2013-12-07 14:03:06 +0000603 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000604 goto cleanup;
605 }
606 clFlush(queue);
607
608
609 if (ALIGNED(pixels,CLPixelPacket))
610 {
611 length = image->columns * image->rows;
612 clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
613 }
614 else
615 {
616 length = image->columns * image->rows;
617 clStatus = clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
618 }
619 if (clStatus != CL_SUCCESS)
620 {
cristya22457d2013-12-07 14:03:06 +0000621 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000622 goto cleanup;
623 }
624 status = MagickTrue;
625
626cleanup:
cristya22457d2013-12-07 14:03:06 +0000627 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +0000628
629 if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
630 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
631 if (imageBuffer != NULL) clReleaseMemObject(imageBuffer);
632 if (parametersBuffer != NULL) clReleaseMemObject(parametersBuffer);
633
634 return status;
635}
636
637
638
639MagickExport MagickBooleanType
640 AccelerateFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
641 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
642{
643 MagickBooleanType status;
644
645 status = MagickFalse;
646
647 assert(image != NULL);
648 assert(exception != (ExceptionInfo *) NULL);
649
650 status = checkOpenCLEnvironment(exception);
651 if (status == MagickTrue)
652 {
dirk5dcb7622013-12-01 10:43:43 +0000653 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +0000654 if (status == MagickTrue)
655 {
656 status = ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
cristyf034abb2013-11-24 14:16:14 +0000657 }
658 }
659 return status;
660}
661
662
663static MagickBooleanType splitImage(const Image* inputImage)
664{
665 MagickBooleanType split;
666
667 MagickCLEnv clEnv;
668 unsigned long allocSize;
669 unsigned long tempSize;
670
671 clEnv = GetDefaultOpenCLEnv();
672
673 allocSize = GetOpenCLDeviceMaxMemAllocSize(clEnv);
674 tempSize = inputImage->columns * inputImage->rows * 4 * 4;
675
676 /*
677 printf("alloc size: %lu\n", allocSize);
678 printf("temp size: %lu\n", tempSize);
679 */
680
681 split = ((tempSize > allocSize) ? MagickTrue:MagickFalse);
682
683 return split;
684}
685
686static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
687{
688 MagickBooleanType outputReady;
689 Image* filteredImage;
690 MagickCLEnv clEnv;
691
692 cl_int clStatus;
693
694 const void *inputPixels;
695 void *filteredPixels;
696 cl_mem_flags mem_flags;
697
698 cl_context context;
699 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
700 cl_kernel blurRowKernel, blurColumnKernel;
701 cl_command_queue queue;
702
703 void* hostPtr;
704 float* kernelBufferPtr;
705 MagickSizeType length;
706
707 char geometry[MaxTextExtent];
708 KernelInfo* kernel = NULL;
709 unsigned int kernelWidth;
710 unsigned int imageColumns, imageRows;
711
712 unsigned int i;
713
714 context = NULL;
715 filteredImage = NULL;
716 inputImageBuffer = NULL;
717 tempImageBuffer = NULL;
718 filteredImageBuffer = NULL;
719 imageKernelBuffer = NULL;
720 blurRowKernel = NULL;
721 blurColumnKernel = NULL;
722 queue = NULL;
723
724 outputReady = MagickFalse;
725
726 clEnv = GetDefaultOpenCLEnv();
727 context = GetOpenCLContext(clEnv);
728 queue = AcquireOpenCLCommandQueue(clEnv);
729
730 /* Create and initialize OpenCL buffers. */
731 {
732 inputPixels = NULL;
733 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
734 if (inputPixels == (const void *) NULL)
735 {
cristya22457d2013-12-07 14:03:06 +0000736 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000737 goto cleanup;
738 }
739 /* If the host pointer is aligned to the size of CLPixelPacket,
740 then use the host buffer directly from the GPU; otherwise,
741 create a buffer on the GPU and copy the data over */
742 if (ALIGNED(inputPixels,CLPixelPacket))
743 {
744 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
745 }
746 else
747 {
748 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
749 }
750 /* create a CL buffer from image pixel buffer */
751 length = inputImage->columns * inputImage->rows;
752 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
753 if (clStatus != CL_SUCCESS)
754 {
cristya22457d2013-12-07 14:03:06 +0000755 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000756 goto cleanup;
757 }
758 }
759
760 /* create output */
761 {
762 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
763 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +0000764 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +0000765 {
cristya22457d2013-12-07 14:03:06 +0000766 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000767 goto cleanup;
768 }
769 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
770 if (filteredPixels == (void *) NULL)
771 {
cristya22457d2013-12-07 14:03:06 +0000772 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000773 goto cleanup;
774 }
775
776 if (ALIGNED(filteredPixels,CLPixelPacket))
777 {
778 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
779 hostPtr = filteredPixels;
780 }
781 else
782 {
783 mem_flags = CL_MEM_WRITE_ONLY;
784 hostPtr = NULL;
785 }
786 /* create a CL buffer from image pixel buffer */
787 length = inputImage->columns * inputImage->rows;
788 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
789 if (clStatus != CL_SUCCESS)
790 {
cristya22457d2013-12-07 14:03:06 +0000791 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000792 goto cleanup;
793 }
794 }
795
796 /* create processing kernel */
797 {
798 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
799 kernel=AcquireKernelInfo(geometry);
800 if (kernel == (KernelInfo *) NULL)
801 {
cristya22457d2013-12-07 14:03:06 +0000802 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
cristyf034abb2013-11-24 14:16:14 +0000803 goto cleanup;
804 }
805
806 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
807 if (clStatus != CL_SUCCESS)
808 {
cristya22457d2013-12-07 14:03:06 +0000809 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000810 goto cleanup;
811 }
812 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
813 if (clStatus != CL_SUCCESS)
814 {
cristya22457d2013-12-07 14:03:06 +0000815 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000816 goto cleanup;
817 }
818
819 for (i = 0; i < kernel->width; i++)
820 {
821 kernelBufferPtr[i] = (float) kernel->values[i];
822 }
823
824 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
825 if (clStatus != CL_SUCCESS)
826 {
cristya22457d2013-12-07 14:03:06 +0000827 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000828 goto cleanup;
829 }
830 }
831
832 {
833
834 /* create temp buffer */
835 {
836 length = inputImage->columns * inputImage->rows;
837 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
838 if (clStatus != CL_SUCCESS)
839 {
cristya22457d2013-12-07 14:03:06 +0000840 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000841 goto cleanup;
842 }
843 }
844
845 /* get the OpenCL kernels */
846 {
847 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
848 if (blurRowKernel == NULL)
849 {
cristya22457d2013-12-07 14:03:06 +0000850 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000851 goto cleanup;
852 };
853
854 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
855 if (blurColumnKernel == NULL)
856 {
cristya22457d2013-12-07 14:03:06 +0000857 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000858 goto cleanup;
859 };
860 }
861
862 {
863 /* need logic to decide this value */
864 int chunkSize = 256;
865
866 {
867 imageColumns = inputImage->columns;
868 imageRows = inputImage->rows;
869
870 /* set the kernel arguments */
871 i = 0;
872 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
873 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
874 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
875 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
876 kernelWidth = kernel->width;
877 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
878 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
879 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
880 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
881 if (clStatus != CL_SUCCESS)
882 {
cristya22457d2013-12-07 14:03:06 +0000883 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000884 goto cleanup;
885 }
886 }
887
888 /* launch the kernel */
889 {
890 size_t gsize[2];
891 size_t wsize[2];
892
893 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
894 gsize[1] = inputImage->rows;
895 wsize[0] = chunkSize;
896 wsize[1] = 1;
897
898 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
899 if (clStatus != CL_SUCCESS)
900 {
cristya22457d2013-12-07 14:03:06 +0000901 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000902 goto cleanup;
903 }
904 clFlush(queue);
905 }
906 }
907
908 {
909 /* need logic to decide this value */
910 int chunkSize = 256;
911
912 {
913 imageColumns = inputImage->columns;
914 imageRows = inputImage->rows;
915
916 /* set the kernel arguments */
917 i = 0;
918 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
919 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
920 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
921 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
922 kernelWidth = kernel->width;
923 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
924 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
925 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
926 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
927 if (clStatus != CL_SUCCESS)
928 {
cristya22457d2013-12-07 14:03:06 +0000929 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000930 goto cleanup;
931 }
932 }
933
934 /* launch the kernel */
935 {
936 size_t gsize[2];
937 size_t wsize[2];
938
939 gsize[0] = inputImage->columns;
940 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
941 wsize[0] = 1;
942 wsize[1] = chunkSize;
943
944 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
945 if (clStatus != CL_SUCCESS)
946 {
cristya22457d2013-12-07 14:03:06 +0000947 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000948 goto cleanup;
949 }
950 clFlush(queue);
951 }
952 }
953
954 }
955
956 /* get result */
957 if (ALIGNED(filteredPixels,CLPixelPacket))
958 {
959 length = inputImage->columns * inputImage->rows;
960 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
961 }
962 else
963 {
964 length = inputImage->columns * inputImage->rows;
965 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
966 }
967 if (clStatus != CL_SUCCESS)
968 {
cristya22457d2013-12-07 14:03:06 +0000969 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000970 goto cleanup;
971 }
972
973 outputReady = MagickTrue;
974
975cleanup:
cristya22457d2013-12-07 14:03:06 +0000976 OpenCLLogException(__FUNCTION__,__LINE__,exception);
977
cristyf034abb2013-11-24 14:16:14 +0000978 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
979 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
980 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
981 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
982 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
983 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
984 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
985 if (kernel!=NULL) DestroyKernelInfo(kernel);
986 if (outputReady == MagickFalse)
987 {
988 if (filteredImage != NULL)
989 {
990 DestroyImage(filteredImage);
991 filteredImage = NULL;
992 }
993 }
994 return filteredImage;
995}
996
997static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
998{
999 MagickBooleanType outputReady;
1000 Image* filteredImage;
1001 MagickCLEnv clEnv;
1002
1003 cl_int clStatus;
1004
1005 const void *inputPixels;
1006 void *filteredPixels;
1007 cl_mem_flags mem_flags;
1008
1009 cl_context context;
1010 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
1011 cl_kernel blurRowKernel, blurColumnKernel;
1012 cl_command_queue queue;
1013
1014 void* hostPtr;
1015 float* kernelBufferPtr;
1016 MagickSizeType length;
1017
1018 char geometry[MaxTextExtent];
1019 KernelInfo* kernel = NULL;
1020 unsigned int kernelWidth;
1021 unsigned int imageColumns, imageRows;
1022
1023 unsigned int i;
1024
1025 context = NULL;
1026 filteredImage = NULL;
1027 inputImageBuffer = NULL;
1028 tempImageBuffer = NULL;
1029 filteredImageBuffer = NULL;
1030 imageKernelBuffer = NULL;
1031 blurRowKernel = NULL;
1032 blurColumnKernel = NULL;
1033 queue = NULL;
1034
1035 outputReady = MagickFalse;
1036
1037 clEnv = GetDefaultOpenCLEnv();
1038 context = GetOpenCLContext(clEnv);
1039 queue = AcquireOpenCLCommandQueue(clEnv);
1040
1041 /* Create and initialize OpenCL buffers. */
1042 {
1043 inputPixels = NULL;
1044 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1045 if (inputPixels == (const void *) NULL)
1046 {
cristya22457d2013-12-07 14:03:06 +00001047 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001048 goto cleanup;
1049 }
1050 /* If the host pointer is aligned to the size of CLPixelPacket,
1051 then use the host buffer directly from the GPU; otherwise,
1052 create a buffer on the GPU and copy the data over */
1053 if (ALIGNED(inputPixels,CLPixelPacket))
1054 {
1055 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1056 }
1057 else
1058 {
1059 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1060 }
1061 /* create a CL buffer from image pixel buffer */
1062 length = inputImage->columns * inputImage->rows;
1063 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1064 if (clStatus != CL_SUCCESS)
1065 {
cristya22457d2013-12-07 14:03:06 +00001066 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001067 goto cleanup;
1068 }
1069 }
1070
1071 /* create output */
1072 {
1073 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1074 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001075 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001076 {
cristya22457d2013-12-07 14:03:06 +00001077 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001078 goto cleanup;
1079 }
1080 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1081 if (filteredPixels == (void *) NULL)
1082 {
cristya22457d2013-12-07 14:03:06 +00001083 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001084 goto cleanup;
1085 }
1086
1087 if (ALIGNED(filteredPixels,CLPixelPacket))
1088 {
1089 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1090 hostPtr = filteredPixels;
1091 }
1092 else
1093 {
1094 mem_flags = CL_MEM_WRITE_ONLY;
1095 hostPtr = NULL;
1096 }
1097 /* create a CL buffer from image pixel buffer */
1098 length = inputImage->columns * inputImage->rows;
1099 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1100 if (clStatus != CL_SUCCESS)
1101 {
cristya22457d2013-12-07 14:03:06 +00001102 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001103 goto cleanup;
1104 }
1105 }
1106
1107 /* create processing kernel */
1108 {
1109 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1110 kernel=AcquireKernelInfo(geometry);
1111 if (kernel == (KernelInfo *) NULL)
1112 {
cristya22457d2013-12-07 14:03:06 +00001113 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
cristyf034abb2013-11-24 14:16:14 +00001114 goto cleanup;
1115 }
1116
1117 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
1118 if (clStatus != CL_SUCCESS)
1119 {
cristya22457d2013-12-07 14:03:06 +00001120 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001121 goto cleanup;
1122 }
1123 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1124 if (clStatus != CL_SUCCESS)
1125 {
cristya22457d2013-12-07 14:03:06 +00001126 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001127 goto cleanup;
1128 }
1129
1130 for (i = 0; i < kernel->width; i++)
1131 {
1132 kernelBufferPtr[i] = (float) kernel->values[i];
1133 }
1134
1135 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1136 if (clStatus != CL_SUCCESS)
1137 {
cristya22457d2013-12-07 14:03:06 +00001138 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001139 goto cleanup;
1140 }
1141 }
1142
1143 {
1144 unsigned int offsetRows;
1145 unsigned int sec;
1146
1147 /* create temp buffer */
1148 {
1149 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
1150 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1151 if (clStatus != CL_SUCCESS)
1152 {
cristya22457d2013-12-07 14:03:06 +00001153 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001154 goto cleanup;
1155 }
1156 }
1157
1158 /* get the OpenCL kernels */
1159 {
1160 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
1161 if (blurRowKernel == NULL)
1162 {
cristya22457d2013-12-07 14:03:06 +00001163 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001164 goto cleanup;
1165 };
1166
1167 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumnSection");
1168 if (blurColumnKernel == NULL)
1169 {
cristya22457d2013-12-07 14:03:06 +00001170 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001171 goto cleanup;
1172 };
1173 }
1174
1175 for (sec = 0; sec < 2; sec++)
1176 {
1177 {
1178 /* need logic to decide this value */
1179 int chunkSize = 256;
1180
1181 {
1182 imageColumns = inputImage->columns;
1183 if (sec == 0)
1184 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
1185 else
1186 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
1187
1188 offsetRows = sec * inputImage->rows / 2;
1189
1190 kernelWidth = kernel->width;
1191
1192 /* set the kernel arguments */
1193 i = 0;
1194 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1195 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1196 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1197 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1198 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1199 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1200 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1201 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1202 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1203 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
1204 if (clStatus != CL_SUCCESS)
1205 {
cristya22457d2013-12-07 14:03:06 +00001206 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001207 goto cleanup;
1208 }
1209 }
1210
1211 /* launch the kernel */
1212 {
1213 size_t gsize[2];
1214 size_t wsize[2];
1215
1216 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
1217 gsize[1] = imageRows;
1218 wsize[0] = chunkSize;
1219 wsize[1] = 1;
1220
1221 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1222 if (clStatus != CL_SUCCESS)
1223 {
cristya22457d2013-12-07 14:03:06 +00001224 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001225 goto cleanup;
1226 }
1227 clFlush(queue);
1228 }
1229 }
1230
1231 {
1232 /* need logic to decide this value */
1233 int chunkSize = 256;
1234
1235 {
1236 imageColumns = inputImage->columns;
1237 if (sec == 0)
1238 imageRows = inputImage->rows / 2;
1239 else
1240 imageRows = (inputImage->rows - inputImage->rows / 2);
1241
1242 offsetRows = sec * inputImage->rows / 2;
1243
1244 kernelWidth = kernel->width;
1245
1246 /* set the kernel arguments */
1247 i = 0;
1248 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1249 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1250 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1251 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1252 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1253 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1254 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1255 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
1256 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1257 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
1258 if (clStatus != CL_SUCCESS)
1259 {
cristya22457d2013-12-07 14:03:06 +00001260 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001261 goto cleanup;
1262 }
1263 }
1264
1265 /* launch the kernel */
1266 {
1267 size_t gsize[2];
1268 size_t wsize[2];
1269
1270 gsize[0] = imageColumns;
1271 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
1272 wsize[0] = 1;
1273 wsize[1] = chunkSize;
1274
1275 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1276 if (clStatus != CL_SUCCESS)
1277 {
cristya22457d2013-12-07 14:03:06 +00001278 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001279 goto cleanup;
1280 }
1281 clFlush(queue);
1282 }
1283 }
1284 }
1285
1286 }
1287
1288 /* get result */
1289 if (ALIGNED(filteredPixels,CLPixelPacket))
1290 {
1291 length = inputImage->columns * inputImage->rows;
1292 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1293 }
1294 else
1295 {
1296 length = inputImage->columns * inputImage->rows;
1297 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1298 }
1299 if (clStatus != CL_SUCCESS)
1300 {
cristya22457d2013-12-07 14:03:06 +00001301 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001302 goto cleanup;
1303 }
1304
1305 outputReady = MagickTrue;
1306
1307cleanup:
cristya22457d2013-12-07 14:03:06 +00001308 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1309
cristyf034abb2013-11-24 14:16:14 +00001310 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1311 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1312 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1313 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1314 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1315 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1316 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1317 if (kernel!=NULL) DestroyKernelInfo(kernel);
1318 if (outputReady == MagickFalse)
1319 {
1320 if (filteredImage != NULL)
1321 {
1322 DestroyImage(filteredImage);
1323 filteredImage = NULL;
1324 }
1325 }
1326 return filteredImage;
1327}
1328
1329/*
1330%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1331% %
1332% %
1333% %
1334% B l u r I m a g e w i t h O p e n C L %
1335% %
1336% %
1337% %
1338%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1339%
1340% BlurImage() blurs an image. We convolve the image with a Gaussian operator
1341% of the given radius and standard deviation (sigma). For reasonable results,
1342% the radius should be larger than sigma. Use a radius of 0 and BlurImage()
1343% selects a suitable radius for you.
1344%
1345% The format of the BlurImage method is:
1346%
1347% Image *BlurImage(const Image *image,const double radius,
1348% const double sigma,ExceptionInfo *exception)
1349% Image *BlurImageChannel(const Image *image,const ChannelType channel,
1350% const double radius,const double sigma,ExceptionInfo *exception)
1351%
1352% A description of each parameter follows:
1353%
1354% o image: the image.
1355%
1356% o channel: the channel type.
1357%
1358% o radius: the radius of the Gaussian, in pixels, not counting the center
1359% pixel.
1360%
1361% o sigma: the standard deviation of the Gaussian, in pixels.
1362%
1363% o exception: return any errors or warnings in this structure.
1364%
1365*/
1366
1367MagickExport
1368Image* AccelerateBlurImage(const Image *image, const ChannelType channel, const double radius, const double sigma,ExceptionInfo *exception)
1369{
1370 MagickBooleanType status;
1371 Image* filteredImage = NULL;
1372
1373 assert(image != NULL);
1374 assert(exception != (ExceptionInfo *) NULL);
1375
1376 status = checkOpenCLEnvironment(exception);
1377 if (status == MagickFalse)
1378 return NULL;
1379
dirk5dcb7622013-12-01 10:43:43 +00001380 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00001381 if (status == MagickFalse)
1382 return NULL;
1383
1384 if (splitImage(image) && (image->rows / 2 > radius))
1385 filteredImage = ComputeBlurImageSection(image, channel, radius, sigma, exception);
1386 else
1387 filteredImage = ComputeBlurImage(image, channel, radius, sigma, exception);
1388
cristyf034abb2013-11-24 14:16:14 +00001389 return filteredImage;
1390}
1391
1392
1393static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType channel, const double angle, ExceptionInfo *exception)
1394{
1395
1396 MagickBooleanType outputReady;
1397 Image* filteredImage;
1398 MagickCLEnv clEnv;
1399
1400 cl_int clStatus;
1401 size_t global_work_size[2];
1402
1403 cl_context context;
1404 cl_mem_flags mem_flags;
1405 cl_mem inputImageBuffer, filteredImageBuffer, sinThetaBuffer, cosThetaBuffer;
1406 cl_kernel radialBlurKernel;
1407 cl_command_queue queue;
1408
1409 const void *inputPixels;
1410 void *filteredPixels;
1411 void* hostPtr;
1412 float* sinThetaPtr;
1413 float* cosThetaPtr;
1414 MagickSizeType length;
1415 unsigned int matte;
dirke19d0cc2013-12-01 10:07:42 +00001416 PixelInfo bias;
cristyf034abb2013-11-24 14:16:14 +00001417 cl_float4 biasPixel;
1418 cl_float2 blurCenter;
1419 float blurRadius;
1420 unsigned int cossin_theta_size;
1421 float offset, theta;
1422
1423 unsigned int i;
1424
1425 outputReady = MagickFalse;
1426 context = NULL;
1427 filteredImage = NULL;
1428 inputImageBuffer = NULL;
1429 filteredImageBuffer = NULL;
1430 sinThetaBuffer = NULL;
1431 cosThetaBuffer = NULL;
1432 queue = NULL;
1433 radialBlurKernel = NULL;
1434
1435
1436 clEnv = GetDefaultOpenCLEnv();
1437 context = GetOpenCLContext(clEnv);
1438
1439
1440 /* Create and initialize OpenCL buffers. */
1441
1442 inputPixels = NULL;
1443 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1444 if (inputPixels == (const void *) NULL)
1445 {
cristya22457d2013-12-07 14:03:06 +00001446 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001447 goto cleanup;
1448 }
1449
1450 /* If the host pointer is aligned to the size of CLPixelPacket,
1451 then use the host buffer directly from the GPU; otherwise,
1452 create a buffer on the GPU and copy the data over */
1453 if (ALIGNED(inputPixels,CLPixelPacket))
1454 {
1455 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1456 }
1457 else
1458 {
1459 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1460 }
1461 /* create a CL buffer from image pixel buffer */
1462 length = inputImage->columns * inputImage->rows;
1463 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1464 if (clStatus != CL_SUCCESS)
1465 {
cristya22457d2013-12-07 14:03:06 +00001466 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001467 goto cleanup;
1468 }
1469
1470
1471 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1472 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001473 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001474 {
cristya22457d2013-12-07 14:03:06 +00001475 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001476 goto cleanup;
1477 }
1478 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1479 if (filteredPixels == (void *) NULL)
1480 {
cristya22457d2013-12-07 14:03:06 +00001481 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001482 goto cleanup;
1483 }
1484
1485 if (ALIGNED(filteredPixels,CLPixelPacket))
1486 {
1487 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1488 hostPtr = filteredPixels;
1489 }
1490 else
1491 {
1492 mem_flags = CL_MEM_WRITE_ONLY;
1493 hostPtr = NULL;
1494 }
1495 /* create a CL buffer from image pixel buffer */
1496 length = inputImage->columns * inputImage->rows;
1497 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1498 if (clStatus != CL_SUCCESS)
1499 {
cristya22457d2013-12-07 14:03:06 +00001500 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001501 goto cleanup;
1502 }
1503
1504 blurCenter.s[0] = (float) (inputImage->columns-1)/2.0;
1505 blurCenter.s[1] = (float) (inputImage->rows-1)/2.0;
1506 blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
1507 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
1508
1509 /* create a buffer for sin_theta and cos_theta */
1510 sinThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1511 if (clStatus != CL_SUCCESS)
1512 {
cristya22457d2013-12-07 14:03:06 +00001513 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001514 goto cleanup;
1515 }
1516 cosThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1517 if (clStatus != CL_SUCCESS)
1518 {
cristya22457d2013-12-07 14:03:06 +00001519 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001520 goto cleanup;
1521 }
1522
1523
1524 queue = AcquireOpenCLCommandQueue(clEnv);
1525 sinThetaPtr = (float*) clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1526 if (clStatus != CL_SUCCESS)
1527 {
cristya22457d2013-12-07 14:03:06 +00001528 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001529 goto cleanup;
1530 }
1531
1532 cosThetaPtr = (float*) clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1533 if (clStatus != CL_SUCCESS)
1534 {
cristya22457d2013-12-07 14:03:06 +00001535 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001536 goto cleanup;
1537 }
1538
1539 theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
1540 offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
1541 for (i=0; i < (ssize_t) cossin_theta_size; i++)
1542 {
1543 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
1544 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
1545 }
1546
1547 clStatus = clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
1548 clStatus |= clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
1549 if (clStatus != CL_SUCCESS)
1550 {
cristya22457d2013-12-07 14:03:06 +00001551 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001552 goto cleanup;
1553 }
1554
1555 /* get the OpenCL kernel */
1556 radialBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RadialBlur");
1557 if (radialBlurKernel == NULL)
1558 {
cristya22457d2013-12-07 14:03:06 +00001559 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001560 goto cleanup;
1561 }
1562
1563
1564 /* set the kernel arguments */
1565 i = 0;
1566 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1567 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1568
dirke19d0cc2013-12-01 10:07:42 +00001569 GetPixelInfo(inputImage,&bias);
cristyf034abb2013-11-24 14:16:14 +00001570 biasPixel.s[0] = bias.red;
1571 biasPixel.s[1] = bias.green;
1572 biasPixel.s[2] = bias.blue;
dirke19d0cc2013-12-01 10:07:42 +00001573 biasPixel.s[3] = bias.alpha;
cristyf034abb2013-11-24 14:16:14 +00001574 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
1575 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
1576
dirke19d0cc2013-12-01 10:07:42 +00001577 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +00001578 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
1579
1580 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
1581
1582 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
1583 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
1584 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
1585 if (clStatus != CL_SUCCESS)
1586 {
cristya22457d2013-12-07 14:03:06 +00001587 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001588 goto cleanup;
1589 }
1590
1591
1592 global_work_size[0] = inputImage->columns;
1593 global_work_size[1] = inputImage->rows;
1594 /* launch the kernel */
1595 clStatus = clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
1596 if (clStatus != CL_SUCCESS)
1597 {
cristya22457d2013-12-07 14:03:06 +00001598 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001599 goto cleanup;
1600 }
1601 clFlush(queue);
1602
1603 if (ALIGNED(filteredPixels,CLPixelPacket))
1604 {
1605 length = inputImage->columns * inputImage->rows;
1606 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1607 }
1608 else
1609 {
1610 length = inputImage->columns * inputImage->rows;
1611 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1612 }
1613 if (clStatus != CL_SUCCESS)
1614 {
cristya22457d2013-12-07 14:03:06 +00001615 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001616 goto cleanup;
1617 }
1618 outputReady = MagickTrue;
1619
1620cleanup:
cristya22457d2013-12-07 14:03:06 +00001621 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1622
cristyf034abb2013-11-24 14:16:14 +00001623 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1624 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1625 if (sinThetaBuffer!=NULL) clReleaseMemObject(sinThetaBuffer);
1626 if (cosThetaBuffer!=NULL) clReleaseMemObject(cosThetaBuffer);
1627 if (radialBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, radialBlurKernel);
1628 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1629 if (outputReady == MagickFalse)
1630 {
1631 if (filteredImage != NULL)
1632 {
1633 DestroyImage(filteredImage);
1634 filteredImage = NULL;
1635 }
1636 }
1637 return filteredImage;
1638}
1639
1640/*
1641%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1642% %
1643% %
1644% %
1645% R a d i a l B l u r I m a g e w i t h O p e n C L %
1646% %
1647% %
1648% %
1649%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1650%
1651% RadialBlurImage() applies a radial blur to the image.
1652%
1653% Andrew Protano contributed this effect.
1654%
1655% The format of the RadialBlurImage method is:
1656%
1657% Image *RadialBlurImage(const Image *image,const double angle,
1658% ExceptionInfo *exception)
1659% Image *RadialBlurImageChannel(const Image *image,const ChannelType channel,
1660% const double angle,ExceptionInfo *exception)
1661%
1662% A description of each parameter follows:
1663%
1664% o image: the image.
1665%
1666% o channel: the channel type.
1667%
1668% o angle: the angle of the radial blur.
1669%
1670% o exception: return any errors or warnings in this structure.
1671%
1672*/
1673
1674MagickExport
1675Image* AccelerateRadialBlurImage(const Image *image, const ChannelType channel, const double angle, ExceptionInfo *exception)
1676{
1677 MagickBooleanType status;
1678 Image* filteredImage;
1679
1680
1681 assert(image != NULL);
1682 assert(exception != NULL);
1683
1684 status = checkOpenCLEnvironment(exception);
1685 if (status == MagickFalse)
1686 return NULL;
1687
dirk5dcb7622013-12-01 10:43:43 +00001688 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00001689 if (status == MagickFalse)
1690 return NULL;
1691
1692 filteredImage = ComputeRadialBlurImage(image, channel, angle, exception);
cristyf034abb2013-11-24 14:16:14 +00001693 return filteredImage;
1694}
1695
1696
1697
1698static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1699 const double gain,const double threshold,ExceptionInfo *exception)
1700{
1701 MagickBooleanType outputReady = MagickFalse;
1702 Image* filteredImage = NULL;
1703 MagickCLEnv clEnv = NULL;
1704
1705 cl_int clStatus;
1706
1707 const void *inputPixels;
1708 void *filteredPixels;
1709 cl_mem_flags mem_flags;
1710
1711 KernelInfo *kernel = NULL;
1712 char geometry[MaxTextExtent];
1713
1714 cl_context context = NULL;
1715 cl_mem inputImageBuffer = NULL;
1716 cl_mem filteredImageBuffer = NULL;
1717 cl_mem tempImageBuffer = NULL;
1718 cl_mem imageKernelBuffer = NULL;
1719 cl_kernel blurRowKernel = NULL;
1720 cl_kernel unsharpMaskBlurColumnKernel = NULL;
1721 cl_command_queue queue = NULL;
1722
1723 void* hostPtr;
1724 float* kernelBufferPtr;
1725 MagickSizeType length;
1726 unsigned int kernelWidth;
1727 float fGain;
1728 float fThreshold;
1729 unsigned int imageColumns, imageRows;
1730 int chunkSize;
1731 unsigned int i;
1732
1733 clEnv = GetDefaultOpenCLEnv();
1734 context = GetOpenCLContext(clEnv);
1735 queue = AcquireOpenCLCommandQueue(clEnv);
1736
1737 /* Create and initialize OpenCL buffers. */
1738 {
1739 inputPixels = NULL;
1740 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1741 if (inputPixels == (const void *) NULL)
1742 {
cristya22457d2013-12-07 14:03:06 +00001743 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001744 goto cleanup;
1745 }
1746
1747 /* If the host pointer is aligned to the size of CLPixelPacket,
1748 then use the host buffer directly from the GPU; otherwise,
1749 create a buffer on the GPU and copy the data over */
1750 if (ALIGNED(inputPixels,CLPixelPacket))
1751 {
1752 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1753 }
1754 else
1755 {
1756 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1757 }
1758 /* create a CL buffer from image pixel buffer */
1759 length = inputImage->columns * inputImage->rows;
1760 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1761 if (clStatus != CL_SUCCESS)
1762 {
cristya22457d2013-12-07 14:03:06 +00001763 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001764 goto cleanup;
1765 }
1766 }
1767
1768 /* create output */
1769 {
1770 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1771 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001772 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001773 {
cristya22457d2013-12-07 14:03:06 +00001774 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001775 goto cleanup;
1776 }
1777 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1778 if (filteredPixels == (void *) NULL)
1779 {
cristya22457d2013-12-07 14:03:06 +00001780 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001781 goto cleanup;
1782 }
1783
1784 if (ALIGNED(filteredPixels,CLPixelPacket))
1785 {
1786 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1787 hostPtr = filteredPixels;
1788 }
1789 else
1790 {
1791 mem_flags = CL_MEM_WRITE_ONLY;
1792 hostPtr = NULL;
1793 }
1794
1795 /* create a CL buffer from image pixel buffer */
1796 length = inputImage->columns * inputImage->rows;
1797 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1798 if (clStatus != CL_SUCCESS)
1799 {
cristya22457d2013-12-07 14:03:06 +00001800 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001801 goto cleanup;
1802 }
1803 }
1804
1805 /* create the blur kernel */
1806 {
1807 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1808 kernel=AcquireKernelInfo(geometry);
1809 if (kernel == (KernelInfo *) NULL)
1810 {
cristya22457d2013-12-07 14:03:06 +00001811 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001812 goto cleanup;
1813 }
1814
1815 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
1816 if (clStatus != CL_SUCCESS)
1817 {
cristya22457d2013-12-07 14:03:06 +00001818 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001819 goto cleanup;
1820 }
1821
1822
1823 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1824 if (clStatus != CL_SUCCESS)
1825 {
cristya22457d2013-12-07 14:03:06 +00001826 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001827 goto cleanup;
1828 }
1829 for (i = 0; i < kernel->width; i++)
1830 {
1831 kernelBufferPtr[i] = (float) kernel->values[i];
1832 }
1833 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1834 if (clStatus != CL_SUCCESS)
1835 {
cristya22457d2013-12-07 14:03:06 +00001836 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001837 goto cleanup;
1838 }
1839 }
1840
1841 {
1842 /* create temp buffer */
1843 {
1844 length = inputImage->columns * inputImage->rows;
1845 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1846 if (clStatus != CL_SUCCESS)
1847 {
cristya22457d2013-12-07 14:03:06 +00001848 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001849 goto cleanup;
1850 }
1851 }
1852
1853 /* get the opencl kernel */
1854 {
1855 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
1856 if (blurRowKernel == NULL)
1857 {
cristya22457d2013-12-07 14:03:06 +00001858 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001859 goto cleanup;
1860 };
1861
1862 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
1863 if (unsharpMaskBlurColumnKernel == NULL)
1864 {
cristya22457d2013-12-07 14:03:06 +00001865 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001866 goto cleanup;
1867 };
1868 }
1869
1870 {
1871 chunkSize = 256;
1872
1873 imageColumns = inputImage->columns;
1874 imageRows = inputImage->rows;
1875
1876 kernelWidth = kernel->width;
1877
1878 /* set the kernel arguments */
1879 i = 0;
1880 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1881 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1882 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1883 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1884 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1885 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1886 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1887 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1888 if (clStatus != CL_SUCCESS)
1889 {
cristya22457d2013-12-07 14:03:06 +00001890 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001891 goto cleanup;
1892 }
1893 }
1894
1895 /* launch the kernel */
1896 {
1897 size_t gsize[2];
1898 size_t wsize[2];
1899
1900 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
1901 gsize[1] = inputImage->rows;
1902 wsize[0] = chunkSize;
1903 wsize[1] = 1;
1904
1905 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1906 if (clStatus != CL_SUCCESS)
1907 {
cristya22457d2013-12-07 14:03:06 +00001908 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001909 goto cleanup;
1910 }
1911 clFlush(queue);
1912 }
1913
1914
1915 {
1916 chunkSize = 256;
1917 imageColumns = inputImage->columns;
1918 imageRows = inputImage->rows;
1919 kernelWidth = kernel->width;
1920 fGain = (float)gain;
1921 fThreshold = (float)threshold;
1922
1923 i = 0;
1924 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1925 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1926 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1927 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1928 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1929 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
1930 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
1931 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
1932 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1933 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1934 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
1935 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
1936
1937 if (clStatus != CL_SUCCESS)
1938 {
cristya22457d2013-12-07 14:03:06 +00001939 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001940 goto cleanup;
1941 }
1942 }
1943
1944 /* launch the kernel */
1945 {
1946 size_t gsize[2];
1947 size_t wsize[2];
1948
1949 gsize[0] = inputImage->columns;
1950 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
1951 wsize[0] = 1;
1952 wsize[1] = chunkSize;
1953
1954 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1955 if (clStatus != CL_SUCCESS)
1956 {
cristya22457d2013-12-07 14:03:06 +00001957 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001958 goto cleanup;
1959 }
1960 clFlush(queue);
1961 }
1962
1963 }
1964
1965 /* get result */
1966 if (ALIGNED(filteredPixels,CLPixelPacket))
1967 {
1968 length = inputImage->columns * inputImage->rows;
1969 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1970 }
1971 else
1972 {
1973 length = inputImage->columns * inputImage->rows;
1974 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1975 }
1976 if (clStatus != CL_SUCCESS)
1977 {
cristya22457d2013-12-07 14:03:06 +00001978 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001979 goto cleanup;
1980 }
1981
1982 outputReady = MagickTrue;
1983
1984cleanup:
cristya22457d2013-12-07 14:03:06 +00001985 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1986
cristyf034abb2013-11-24 14:16:14 +00001987 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
1988 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1989 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1990 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1991 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1992 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1993 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
1994 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1995 if (outputReady == MagickFalse)
1996 {
1997 if (filteredImage != NULL)
1998 {
1999 DestroyImage(filteredImage);
2000 filteredImage = NULL;
2001 }
2002 }
2003 return filteredImage;
2004}
2005
2006
2007static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
2008 const double gain,const double threshold,ExceptionInfo *exception)
2009{
2010 MagickBooleanType outputReady = MagickFalse;
2011 Image* filteredImage = NULL;
2012 MagickCLEnv clEnv = NULL;
2013
2014 cl_int clStatus;
2015
2016 const void *inputPixels;
2017 void *filteredPixels;
2018 cl_mem_flags mem_flags;
2019
2020 KernelInfo *kernel = NULL;
2021 char geometry[MaxTextExtent];
2022
2023 cl_context context = NULL;
2024 cl_mem inputImageBuffer = NULL;
2025 cl_mem filteredImageBuffer = NULL;
2026 cl_mem tempImageBuffer = NULL;
2027 cl_mem imageKernelBuffer = NULL;
2028 cl_kernel blurRowKernel = NULL;
2029 cl_kernel unsharpMaskBlurColumnKernel = NULL;
2030 cl_command_queue queue = NULL;
2031
2032 void* hostPtr;
2033 float* kernelBufferPtr;
2034 MagickSizeType length;
2035 unsigned int kernelWidth;
2036 float fGain;
2037 float fThreshold;
2038 unsigned int imageColumns, imageRows;
2039 int chunkSize;
2040 unsigned int i;
2041
2042 clEnv = GetDefaultOpenCLEnv();
2043 context = GetOpenCLContext(clEnv);
2044 queue = AcquireOpenCLCommandQueue(clEnv);
2045
2046 /* Create and initialize OpenCL buffers. */
2047 {
2048 inputPixels = NULL;
2049 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2050 if (inputPixels == (const void *) NULL)
2051 {
cristya22457d2013-12-07 14:03:06 +00002052 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002053 goto cleanup;
2054 }
2055
2056 /* If the host pointer is aligned to the size of CLPixelPacket,
2057 then use the host buffer directly from the GPU; otherwise,
2058 create a buffer on the GPU and copy the data over */
2059 if (ALIGNED(inputPixels,CLPixelPacket))
2060 {
2061 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2062 }
2063 else
2064 {
2065 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2066 }
2067 /* create a CL buffer from image pixel buffer */
2068 length = inputImage->columns * inputImage->rows;
2069 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2070 if (clStatus != CL_SUCCESS)
2071 {
cristya22457d2013-12-07 14:03:06 +00002072 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002073 goto cleanup;
2074 }
2075 }
2076
2077 /* create output */
2078 {
2079 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
2080 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00002081 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00002082 {
cristya22457d2013-12-07 14:03:06 +00002083 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002084 goto cleanup;
2085 }
2086 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2087 if (filteredPixels == (void *) NULL)
2088 {
cristya22457d2013-12-07 14:03:06 +00002089 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002090 goto cleanup;
2091 }
2092
2093 if (ALIGNED(filteredPixels,CLPixelPacket))
2094 {
2095 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2096 hostPtr = filteredPixels;
2097 }
2098 else
2099 {
2100 mem_flags = CL_MEM_WRITE_ONLY;
2101 hostPtr = NULL;
2102 }
2103
2104 /* create a CL buffer from image pixel buffer */
2105 length = inputImage->columns * inputImage->rows;
2106 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2107 if (clStatus != CL_SUCCESS)
2108 {
cristya22457d2013-12-07 14:03:06 +00002109 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002110 goto cleanup;
2111 }
2112 }
2113
2114 /* create the blur kernel */
2115 {
2116 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2117 kernel=AcquireKernelInfo(geometry);
2118 if (kernel == (KernelInfo *) NULL)
2119 {
cristya22457d2013-12-07 14:03:06 +00002120 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002121 goto cleanup;
2122 }
2123
2124 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2125 if (clStatus != CL_SUCCESS)
2126 {
cristya22457d2013-12-07 14:03:06 +00002127 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002128 goto cleanup;
2129 }
2130
2131
2132 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2133 if (clStatus != CL_SUCCESS)
2134 {
cristya22457d2013-12-07 14:03:06 +00002135 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002136 goto cleanup;
2137 }
2138 for (i = 0; i < kernel->width; i++)
2139 {
2140 kernelBufferPtr[i] = (float) kernel->values[i];
2141 }
2142 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2143 if (clStatus != CL_SUCCESS)
2144 {
cristya22457d2013-12-07 14:03:06 +00002145 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002146 goto cleanup;
2147 }
2148 }
2149
2150 {
2151 unsigned int offsetRows;
2152 unsigned int sec;
2153
2154 /* create temp buffer */
2155 {
2156 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
2157 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
2158 if (clStatus != CL_SUCCESS)
2159 {
cristya22457d2013-12-07 14:03:06 +00002160 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002161 goto cleanup;
2162 }
2163 }
2164
2165 /* get the opencl kernel */
2166 {
2167 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
2168 if (blurRowKernel == NULL)
2169 {
cristya22457d2013-12-07 14:03:06 +00002170 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002171 goto cleanup;
2172 };
2173
2174 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
2175 if (unsharpMaskBlurColumnKernel == NULL)
2176 {
cristya22457d2013-12-07 14:03:06 +00002177 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002178 goto cleanup;
2179 };
2180 }
2181
2182 for (sec = 0; sec < 2; sec++)
2183 {
2184 {
2185 chunkSize = 256;
2186
2187 imageColumns = inputImage->columns;
2188 if (sec == 0)
2189 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2190 else
2191 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
2192
2193 offsetRows = sec * inputImage->rows / 2;
2194
2195 kernelWidth = kernel->width;
2196
2197 /* set the kernel arguments */
2198 i = 0;
2199 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2200 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2201 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
2202 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2203 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2204 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2205 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2206 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
2207 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2208 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
2209 if (clStatus != CL_SUCCESS)
2210 {
cristya22457d2013-12-07 14:03:06 +00002211 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002212 goto cleanup;
2213 }
2214 }
2215 /* launch the kernel */
2216 {
2217 size_t gsize[2];
2218 size_t wsize[2];
2219
2220 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
2221 gsize[1] = imageRows;
2222 wsize[0] = chunkSize;
2223 wsize[1] = 1;
2224
2225 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2226 if (clStatus != CL_SUCCESS)
2227 {
cristya22457d2013-12-07 14:03:06 +00002228 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002229 goto cleanup;
2230 }
2231 clFlush(queue);
2232 }
2233
2234
2235 {
2236 chunkSize = 256;
2237
2238 imageColumns = inputImage->columns;
2239 if (sec == 0)
cristya22457d2013-12-07 14:03:06 +00002240 imageRows = inputImage->rows / 2;
cristyf034abb2013-11-24 14:16:14 +00002241 else
2242 imageRows = (inputImage->rows - inputImage->rows / 2);
2243
2244 offsetRows = sec * inputImage->rows / 2;
2245
2246 kernelWidth = kernel->width;
2247
2248 fGain = (float)gain;
2249 fThreshold = (float)threshold;
2250
2251 i = 0;
2252 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2253 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2254 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2255 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2256 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2257 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
2258 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
2259 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
2260 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2261 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2262 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
2263 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
2264 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2265 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
2266
2267 if (clStatus != CL_SUCCESS)
2268 {
cristya22457d2013-12-07 14:03:06 +00002269 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002270 goto cleanup;
2271 }
2272 }
2273
2274 /* launch the kernel */
2275 {
2276 size_t gsize[2];
2277 size_t wsize[2];
2278
2279 gsize[0] = imageColumns;
2280 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
2281 wsize[0] = 1;
2282 wsize[1] = chunkSize;
2283
2284 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2285 if (clStatus != CL_SUCCESS)
2286 {
cristya22457d2013-12-07 14:03:06 +00002287 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002288 goto cleanup;
2289 }
2290 clFlush(queue);
2291 }
2292 }
2293 }
2294
2295 /* get result */
2296 if (ALIGNED(filteredPixels,CLPixelPacket))
2297 {
2298 length = inputImage->columns * inputImage->rows;
2299 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2300 }
2301 else
2302 {
2303 length = inputImage->columns * inputImage->rows;
2304 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2305 }
2306 if (clStatus != CL_SUCCESS)
2307 {
cristya22457d2013-12-07 14:03:06 +00002308 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002309 goto cleanup;
2310 }
2311
2312 outputReady = MagickTrue;
2313
2314cleanup:
cristya22457d2013-12-07 14:03:06 +00002315 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2316
cristyf034abb2013-11-24 14:16:14 +00002317 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
2318 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2319 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2320 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2321 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
2322 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
2323 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
2324 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2325 if (outputReady == MagickFalse)
2326 {
2327 if (filteredImage != NULL)
2328 {
2329 DestroyImage(filteredImage);
2330 filteredImage = NULL;
2331 }
2332 }
2333 return filteredImage;
2334}
2335
2336
2337/*
2338%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2339% %
2340% %
2341% %
2342% U n s h a r p M a s k I m a g e w i t h O p e n C L %
2343% %
2344% %
2345% %
2346%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2347%
2348% UnsharpMaskImage() sharpens one or more image channels. We convolve the
2349% image with a Gaussian operator of the given radius and standard deviation
2350% (sigma). For reasonable results, radius should be larger than sigma. Use a
2351% radius of 0 and UnsharpMaskImage() selects a suitable radius for you.
2352%
2353% The format of the UnsharpMaskImage method is:
2354%
2355% Image *UnsharpMaskImage(const Image *image,const double radius,
2356% const double sigma,const double amount,const double threshold,
2357% ExceptionInfo *exception)
2358% Image *UnsharpMaskImageChannel(const Image *image,
2359% const ChannelType channel,const double radius,const double sigma,
2360% const double gain,const double threshold,ExceptionInfo *exception)
2361%
2362% A description of each parameter follows:
2363%
2364% o image: the image.
2365%
2366% o channel: the channel type.
2367%
2368% o radius: the radius of the Gaussian, in pixels, not counting the center
2369% pixel.
2370%
2371% o sigma: the standard deviation of the Gaussian, in pixels.
2372%
2373% o gain: the percentage of the difference between the original and the
2374% blur image that is added back into the original.
2375%
2376% o threshold: the threshold in pixels needed to apply the diffence gain.
2377%
2378% o exception: return any errors or warnings in this structure.
2379%
2380*/
2381
2382
2383MagickExport
2384Image* AccelerateUnsharpMaskImage(const Image *image, const ChannelType channel,const double radius,const double sigma,
2385 const double gain,const double threshold,ExceptionInfo *exception)
2386{
2387 MagickBooleanType status;
2388 Image* filteredImage;
2389
2390
2391 assert(image != NULL);
2392 assert(exception != NULL);
2393
2394 status = checkOpenCLEnvironment(exception);
2395 if (status == MagickFalse)
2396 return NULL;
2397
dirk5dcb7622013-12-01 10:43:43 +00002398 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00002399 if (status == MagickFalse)
2400 return NULL;
2401
2402 if (splitImage(image) && (image->rows / 2 > radius))
2403 filteredImage = ComputeUnsharpMaskImageSection(image,channel,radius,sigma,gain,threshold,exception);
2404 else
2405 filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
cristyf034abb2013-11-24 14:16:14 +00002406 return filteredImage;
2407
2408}
2409
cristyf034abb2013-11-24 14:16:14 +00002410static MagickBooleanType resizeHorizontalFilter(cl_mem inputImage
2411 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2412 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2413 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float xFactor
2414 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2415{
2416 MagickBooleanType status = MagickFalse;
2417
2418 float scale, support;
2419 unsigned int i;
2420 cl_kernel horizontalKernel = NULL;
2421 cl_int clStatus;
2422 size_t global_work_size[2];
2423 size_t local_work_size[2];
2424 int resizeFilterType, resizeWindowType;
2425 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2426 size_t totalLocalMemorySize;
2427 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2428 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2429 size_t deviceLocalMemorySize;
2430 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2431
2432 const unsigned int workgroupSize = 256;
2433 unsigned int pixelPerWorkgroup;
2434 unsigned int chunkSize;
2435
2436 /*
2437 Apply filter to resize vertically from image to resize image.
2438 */
cristye85d0f72013-11-27 02:25:43 +00002439 scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002440 support=scale*GetResizeFilterSupport(resizeFilter);
2441 if (support < 0.5)
2442 {
2443 /*
2444 Support too small even for nearest neighbour: Reduce to point
2445 sampling.
2446 */
2447 support=(MagickRealType) 0.5;
2448 scale=1.0;
2449 }
2450 scale=PerceptibleReciprocal(scale);
2451
2452 if (resizedColumns < workgroupSize)
2453 {
2454 chunkSize = 32;
2455 pixelPerWorkgroup = 32;
2456 }
2457 else
2458 {
2459 chunkSize = workgroupSize;
2460 pixelPerWorkgroup = workgroupSize;
2461 }
2462
2463 /* get the local memory size supported by the device */
2464 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2465
2466 while(1)
2467 {
2468 /* calculate the local memory size needed per workgroup */
2469 cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
2470 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
2471 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2472 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2473 totalLocalMemorySize = imageCacheLocalMemorySize;
2474
2475 /* local size for the pixel accumulator */
2476 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2477 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2478
2479 /* local memory size for the weight accumulator */
2480 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2481 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2482
2483 /* local memory size for the gamma accumulator */
2484 if (matte == 0)
2485 gammaAccumulatorLocalMemorySize = sizeof(float);
2486 else
2487 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2488 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2489
2490 if (totalLocalMemorySize <= deviceLocalMemorySize)
2491 break;
2492 else
2493 {
2494 pixelPerWorkgroup = pixelPerWorkgroup/2;
2495 chunkSize = chunkSize/2;
2496 if (pixelPerWorkgroup == 0
2497 || chunkSize == 0)
2498 {
2499 /* quit, fallback to CPU */
2500 goto cleanup;
2501 }
2502 }
2503 }
2504
2505 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2506 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2507
2508
2509 if (resizeFilterType == SincFastWeightingFunction
2510 && resizeWindowType == SincFastWeightingFunction)
2511 {
2512 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilterSinc");
2513 }
2514 else
2515 {
2516 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
2517 }
2518 if (horizontalKernel == NULL)
2519 {
cristya22457d2013-12-07 14:03:06 +00002520 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002521 goto cleanup;
2522 }
2523
2524 i = 0;
2525 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2526 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2527 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2528 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2529 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
2530 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2531
2532 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2533 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2534
2535 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2536 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2537 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2538
2539 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2540 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2541
2542 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2543 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2544
2545 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2546 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2547
2548 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2549 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2550
2551
2552 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2553 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2554 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2555 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2556
2557
2558 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2559 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2560 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2561
2562 if (clStatus != CL_SUCCESS)
2563 {
cristya22457d2013-12-07 14:03:06 +00002564 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002565 goto cleanup;
2566 }
2567
2568 global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2569 global_work_size[1] = resizedRows;
2570
2571 local_work_size[0] = workgroupSize;
2572 local_work_size[1] = 1;
2573 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2574 if (clStatus != CL_SUCCESS)
2575 {
cristya22457d2013-12-07 14:03:06 +00002576 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002577 goto cleanup;
2578 }
2579 clFlush(queue);
2580 status = MagickTrue;
2581
2582
2583cleanup:
cristya22457d2013-12-07 14:03:06 +00002584 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2585
cristyf034abb2013-11-24 14:16:14 +00002586 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2587
2588 return status;
2589}
2590
2591
2592static MagickBooleanType resizeVerticalFilter(cl_mem inputImage
2593 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2594 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2595 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float yFactor
2596 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2597{
2598 MagickBooleanType status = MagickFalse;
2599
2600 float scale, support;
2601 unsigned int i;
2602 cl_kernel horizontalKernel = NULL;
2603 cl_int clStatus;
2604 size_t global_work_size[2];
2605 size_t local_work_size[2];
2606 int resizeFilterType, resizeWindowType;
2607 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2608 size_t totalLocalMemorySize;
2609 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2610 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2611 size_t deviceLocalMemorySize;
2612 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2613
2614 const unsigned int workgroupSize = 256;
2615 unsigned int pixelPerWorkgroup;
2616 unsigned int chunkSize;
2617
2618 /*
2619 Apply filter to resize vertically from image to resize image.
2620 */
cristye85d0f72013-11-27 02:25:43 +00002621 scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002622 support=scale*GetResizeFilterSupport(resizeFilter);
2623 if (support < 0.5)
2624 {
2625 /*
2626 Support too small even for nearest neighbour: Reduce to point
2627 sampling.
2628 */
2629 support=(MagickRealType) 0.5;
2630 scale=1.0;
2631 }
2632 scale=PerceptibleReciprocal(scale);
2633
2634 if (resizedRows < workgroupSize)
2635 {
2636 chunkSize = 32;
2637 pixelPerWorkgroup = 32;
2638 }
2639 else
2640 {
2641 chunkSize = workgroupSize;
2642 pixelPerWorkgroup = workgroupSize;
2643 }
2644
2645 /* get the local memory size supported by the device */
2646 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2647
2648 while(1)
2649 {
2650 /* calculate the local memory size needed per workgroup */
2651 cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
2652 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
2653 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2654 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2655 totalLocalMemorySize = imageCacheLocalMemorySize;
2656
2657 /* local size for the pixel accumulator */
2658 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2659 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2660
2661 /* local memory size for the weight accumulator */
2662 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2663 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2664
2665 /* local memory size for the gamma accumulator */
2666 if (matte == 0)
2667 gammaAccumulatorLocalMemorySize = sizeof(float);
2668 else
2669 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2670 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2671
2672 if (totalLocalMemorySize <= deviceLocalMemorySize)
2673 break;
2674 else
2675 {
2676 pixelPerWorkgroup = pixelPerWorkgroup/2;
2677 chunkSize = chunkSize/2;
2678 if (pixelPerWorkgroup == 0
2679 || chunkSize == 0)
2680 {
2681 /* quit, fallback to CPU */
2682 goto cleanup;
2683 }
2684 }
2685 }
2686
2687 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2688 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2689
2690 if (resizeFilterType == SincFastWeightingFunction
2691 && resizeWindowType == SincFastWeightingFunction)
2692 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilterSinc");
2693 else
2694 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
2695
2696 if (horizontalKernel == NULL)
2697 {
cristya22457d2013-12-07 14:03:06 +00002698 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002699 goto cleanup;
2700 }
2701
2702 i = 0;
2703 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2704 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2705 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2706 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2707 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
2708 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2709
2710 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2711 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2712
2713 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2714 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2715 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2716
2717 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2718 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2719
2720 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2721 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2722
2723 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2724 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2725
2726 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2727 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2728
2729
2730 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2731 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2732 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2733 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2734
2735
2736 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2737 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2738 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2739
2740 if (clStatus != CL_SUCCESS)
2741 {
cristya22457d2013-12-07 14:03:06 +00002742 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002743 goto cleanup;
2744 }
2745
2746 global_work_size[0] = resizedColumns;
2747 global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2748
2749 local_work_size[0] = 1;
2750 local_work_size[1] = workgroupSize;
2751 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2752 if (clStatus != CL_SUCCESS)
2753 {
cristya22457d2013-12-07 14:03:06 +00002754 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002755 goto cleanup;
2756 }
2757 clFlush(queue);
2758 status = MagickTrue;
2759
2760
2761cleanup:
cristya22457d2013-12-07 14:03:06 +00002762 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2763
cristyf034abb2013-11-24 14:16:14 +00002764 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2765
2766 return status;
2767}
2768
2769
2770
2771static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedColumns, const size_t resizedRows
2772 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
2773{
2774
2775 MagickBooleanType outputReady = MagickFalse;
2776 Image* filteredImage = NULL;
2777 MagickCLEnv clEnv = NULL;
2778
2779 cl_int clStatus;
2780 MagickBooleanType status;
2781 const void *inputPixels;
2782 void* filteredPixels;
2783 void* hostPtr;
2784 const MagickRealType* resizeFilterCoefficient;
2785 float* mappedCoefficientBuffer;
2786 float xFactor, yFactor;
2787 MagickSizeType length;
2788
2789 cl_mem_flags mem_flags;
2790 cl_context context = NULL;
2791 cl_mem inputImageBuffer = NULL;
2792 cl_mem tempImageBuffer = NULL;
2793 cl_mem filteredImageBuffer = NULL;
2794 cl_mem cubicCoefficientsBuffer = NULL;
2795 cl_command_queue queue = NULL;
2796
2797 unsigned int i;
2798
2799 clEnv = GetDefaultOpenCLEnv();
2800 context = GetOpenCLContext(clEnv);
2801
2802 /* Create and initialize OpenCL buffers. */
2803 inputPixels = NULL;
2804 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2805 if (inputPixels == (const void *) NULL)
2806 {
cristya22457d2013-12-07 14:03:06 +00002807 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002808 goto cleanup;
2809 }
2810
2811 /* If the host pointer is aligned to the size of CLPixelPacket,
2812 then use the host buffer directly from the GPU; otherwise,
2813 create a buffer on the GPU and copy the data over */
2814 if (ALIGNED(inputPixels,CLPixelPacket))
2815 {
2816 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2817 }
2818 else
2819 {
2820 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2821 }
2822 /* create a CL buffer from image pixel buffer */
2823 length = inputImage->columns * inputImage->rows;
2824 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2825 if (clStatus != CL_SUCCESS)
2826 {
cristya22457d2013-12-07 14:03:06 +00002827 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002828 goto cleanup;
2829 }
2830
2831 cubicCoefficientsBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
2832 if (clStatus != CL_SUCCESS)
2833 {
cristya22457d2013-12-07 14:03:06 +00002834 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002835 goto cleanup;
2836 }
2837 queue = AcquireOpenCLCommandQueue(clEnv);
2838 mappedCoefficientBuffer = (float*)clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
2839 , 0, NULL, NULL, &clStatus);
2840 if (clStatus != CL_SUCCESS)
2841 {
cristya22457d2013-12-07 14:03:06 +00002842 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002843 goto cleanup;
2844 }
2845 resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
2846 for (i = 0; i < 7; i++)
2847 {
2848 mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
2849 }
2850 clStatus = clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
2851 if (clStatus != CL_SUCCESS)
2852 {
cristya22457d2013-12-07 14:03:06 +00002853 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002854 goto cleanup;
2855 }
2856
2857 filteredImage = CloneImage(inputImage,resizedColumns,resizedRows,MagickTrue,exception);
2858 if (filteredImage == NULL)
2859 goto cleanup;
2860
dirke19d0cc2013-12-01 10:07:42 +00002861 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00002862 {
cristya22457d2013-12-07 14:03:06 +00002863 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002864 goto cleanup;
2865 }
2866 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2867 if (filteredPixels == (void *) NULL)
2868 {
cristya22457d2013-12-07 14:03:06 +00002869 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002870 goto cleanup;
2871 }
2872
2873 if (ALIGNED(filteredPixels,CLPixelPacket))
2874 {
2875 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2876 hostPtr = filteredPixels;
2877 }
2878 else
2879 {
2880 mem_flags = CL_MEM_WRITE_ONLY;
2881 hostPtr = NULL;
2882 }
2883
2884 /* create a CL buffer from image pixel buffer */
2885 length = filteredImage->columns * filteredImage->rows;
2886 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2887 if (clStatus != CL_SUCCESS)
2888 {
cristya22457d2013-12-07 14:03:06 +00002889 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002890 goto cleanup;
2891 }
2892
2893 xFactor=(float) resizedColumns/(float) inputImage->columns;
2894 yFactor=(float) resizedRows/(float) inputImage->rows;
2895 if (xFactor > yFactor)
2896 {
2897
2898 length = resizedColumns*inputImage->rows;
2899 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2900 if (clStatus != CL_SUCCESS)
2901 {
cristya22457d2013-12-07 14:03:06 +00002902 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002903 goto cleanup;
2904 }
2905
dirke19d0cc2013-12-01 10:07:42 +00002906 status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002907 , tempImageBuffer, resizedColumns, inputImage->rows
2908 , resizeFilter, cubicCoefficientsBuffer
2909 , xFactor, clEnv, queue, exception);
2910 if (status != MagickTrue)
2911 goto cleanup;
2912
dirke19d0cc2013-12-01 10:07:42 +00002913 status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002914 , filteredImageBuffer, resizedColumns, resizedRows
2915 , resizeFilter, cubicCoefficientsBuffer
2916 , yFactor, clEnv, queue, exception);
2917 if (status != MagickTrue)
2918 goto cleanup;
2919 }
2920 else
2921 {
2922 length = inputImage->columns*resizedRows;
2923 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2924 if (clStatus != CL_SUCCESS)
2925 {
cristya22457d2013-12-07 14:03:06 +00002926 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002927 goto cleanup;
2928 }
2929
dirke19d0cc2013-12-01 10:07:42 +00002930 status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002931 , tempImageBuffer, inputImage->columns, resizedRows
2932 , resizeFilter, cubicCoefficientsBuffer
2933 , yFactor, clEnv, queue, exception);
2934 if (status != MagickTrue)
2935 goto cleanup;
2936
dirke19d0cc2013-12-01 10:07:42 +00002937 status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002938 , filteredImageBuffer, resizedColumns, resizedRows
2939 , resizeFilter, cubicCoefficientsBuffer
2940 , xFactor, clEnv, queue, exception);
2941 if (status != MagickTrue)
2942 goto cleanup;
2943 }
2944 length = resizedColumns*resizedRows;
2945 if (ALIGNED(filteredPixels,CLPixelPacket))
2946 {
2947 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2948 }
2949 else
2950 {
2951 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2952 }
2953 if (clStatus != CL_SUCCESS)
2954 {
cristya22457d2013-12-07 14:03:06 +00002955 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002956 goto cleanup;
2957 }
2958 outputReady = MagickTrue;
2959
2960cleanup:
cristya22457d2013-12-07 14:03:06 +00002961 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2962
cristyf034abb2013-11-24 14:16:14 +00002963 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2964 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2965 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2966 if (cubicCoefficientsBuffer!=NULL) clReleaseMemObject(cubicCoefficientsBuffer);
2967 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2968 if (outputReady == MagickFalse)
2969 {
2970 if (filteredImage != NULL)
2971 {
2972 DestroyImage(filteredImage);
2973 filteredImage = NULL;
2974 }
2975 }
2976
2977 return filteredImage;
2978}
2979
2980const ResizeWeightingFunctionType supportedResizeWeighting[] =
2981{
2982 BoxWeightingFunction
2983 ,TriangleWeightingFunction
2984 ,HanningWeightingFunction
2985 ,HammingWeightingFunction
2986 ,BlackmanWeightingFunction
2987 ,CubicBCWeightingFunction
2988 ,SincWeightingFunction
2989 ,SincFastWeightingFunction
2990 ,LastWeightingFunction
2991};
2992
2993static MagickBooleanType gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)
2994{
2995 MagickBooleanType supported = MagickFalse;
2996 unsigned int i;
2997 for (i = 0; ;i++)
2998 {
2999 if (supportedResizeWeighting[i] == LastWeightingFunction)
3000 break;
3001 if (supportedResizeWeighting[i] == f)
3002 {
3003 supported = MagickTrue;
3004 break;
3005 }
3006 }
3007 return supported;
3008}
3009
3010
3011/*
3012%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3013% %
3014% %
3015% %
3016% A c c e l e r a t e R e s i z e I m a g e %
3017% %
3018% %
3019% %
3020%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3021%
3022% AccelerateResizeImage() is an OpenCL implementation of ResizeImage()
3023%
3024% AccelerateResizeImage() scales an image to the desired dimensions, using the given
3025% filter (see AcquireFilterInfo()).
3026%
3027% If an undefined filter is given the filter defaults to Mitchell for a
3028% colormapped image, a image with a matte channel, or if the image is
3029% enlarged. Otherwise the filter defaults to a Lanczos.
3030%
3031% AccelerateResizeImage() was inspired by Paul Heckbert's "zoom" program.
3032%
3033% The format of the AccelerateResizeImage method is:
3034%
3035% Image *ResizeImage(Image *image,const size_t columns,
3036% const size_t rows, const ResizeFilter* filter,
cristy3f6d1482010-01-20 21:01:21 +00003037% ExceptionInfo *exception)
3038%
3039% A description of each parameter follows:
3040%
3041% o image: the image.
3042%
cristyf034abb2013-11-24 14:16:14 +00003043% o columns: the number of columns in the scaled image.
cristy3f6d1482010-01-20 21:01:21 +00003044%
cristyf034abb2013-11-24 14:16:14 +00003045% o rows: the number of rows in the scaled image.
3046%
3047% o filter: Image filter to use.
cristy3f6d1482010-01-20 21:01:21 +00003048%
3049% o exception: return any errors or warnings in this structure.
3050%
3051*/
cristyd43a46b2010-01-21 02:13:41 +00003052
cristyf034abb2013-11-24 14:16:14 +00003053MagickExport
3054Image* AccelerateResizeImage(const Image* image, const size_t resizedColumns, const size_t resizedRows
3055 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
cristyd43a46b2010-01-21 02:13:41 +00003056{
cristyf034abb2013-11-24 14:16:14 +00003057 MagickBooleanType status;
3058 Image* filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003059
cristyf034abb2013-11-24 14:16:14 +00003060 assert(image != NULL);
3061 assert(resizeFilter != NULL);
cristyd43a46b2010-01-21 02:13:41 +00003062
cristyf034abb2013-11-24 14:16:14 +00003063 status = checkOpenCLEnvironment(exception);
3064 if (status == MagickFalse)
3065 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003066
dirk5dcb7622013-12-01 10:43:43 +00003067 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003068 if (status == MagickFalse)
3069 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003070
cristyf034abb2013-11-24 14:16:14 +00003071 if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse
3072 || gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
3073 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003074
cristyf034abb2013-11-24 14:16:14 +00003075 filteredImage = ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
cristyf034abb2013-11-24 14:16:14 +00003076 return filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003077
cristyd43a46b2010-01-21 02:13:41 +00003078}
3079
cristyd43a46b2010-01-21 02:13:41 +00003080
cristyf034abb2013-11-24 14:16:14 +00003081static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBooleanType sharpen, ExceptionInfo *exception)
3082{
3083 MagickBooleanType outputReady = MagickFalse;
3084 MagickCLEnv clEnv = NULL;
3085
3086 cl_int clStatus;
3087 size_t global_work_size[2];
3088
3089 void *inputPixels = NULL;
3090 MagickSizeType length;
3091 unsigned int uSharpen;
3092 unsigned int i;
3093
3094 cl_mem_flags mem_flags;
3095 cl_context context = NULL;
3096 cl_mem inputImageBuffer = NULL;
3097 cl_kernel filterKernel = NULL;
3098 cl_command_queue queue = NULL;
3099
3100 clEnv = GetDefaultOpenCLEnv();
3101 context = GetOpenCLContext(clEnv);
3102
3103 /* Create and initialize OpenCL buffers. */
3104 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3105 if (inputPixels == (void *) NULL)
3106 {
cristya22457d2013-12-07 14:03:06 +00003107 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00003108 goto cleanup;
3109 }
3110
3111 /* If the host pointer is aligned to the size of CLPixelPacket,
3112 then use the host buffer directly from the GPU; otherwise,
3113 create a buffer on the GPU and copy the data over */
3114 if (ALIGNED(inputPixels,CLPixelPacket))
3115 {
3116 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3117 }
3118 else
3119 {
3120 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3121 }
3122 /* create a CL buffer from image pixel buffer */
3123 length = inputImage->columns * inputImage->rows;
3124 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3125 if (clStatus != CL_SUCCESS)
3126 {
cristya22457d2013-12-07 14:03:06 +00003127 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003128 goto cleanup;
3129 }
3130
3131 filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
3132 if (filterKernel == NULL)
3133 {
cristya22457d2013-12-07 14:03:06 +00003134 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003135 goto cleanup;
3136 }
3137
3138 i = 0;
3139 clStatus=clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3140
3141 uSharpen = (sharpen == MagickFalse)?0:1;
3142 clStatus|=clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
3143 if (clStatus != CL_SUCCESS)
3144 {
cristya22457d2013-12-07 14:03:06 +00003145 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003146 goto cleanup;
3147 }
3148
3149 global_work_size[0] = inputImage->columns;
3150 global_work_size[1] = inputImage->rows;
3151 /* launch the kernel */
3152 queue = AcquireOpenCLCommandQueue(clEnv);
3153 clStatus = clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3154 if (clStatus != CL_SUCCESS)
3155 {
cristya22457d2013-12-07 14:03:06 +00003156 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003157 goto cleanup;
3158 }
3159 clFlush(queue);
3160
3161 if (ALIGNED(inputPixels,CLPixelPacket))
3162 {
3163 length = inputImage->columns * inputImage->rows;
3164 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3165 }
3166 else
3167 {
3168 length = inputImage->columns * inputImage->rows;
3169 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3170 }
3171 if (clStatus != CL_SUCCESS)
3172 {
cristya22457d2013-12-07 14:03:06 +00003173 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003174 goto cleanup;
3175 }
3176 outputReady = MagickTrue;
3177
3178cleanup:
cristya22457d2013-12-07 14:03:06 +00003179 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +00003180
3181 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
3182 if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel);
3183 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
3184 return outputReady;
3185}
3186
3187/*
3188%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3189% %
3190% %
3191% %
3192% C o n t r a s t I m a g e w i t h O p e n C L %
3193% %
3194% %
3195% %
3196%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3197%
3198% ContrastImage() enhances the intensity differences between the lighter and
3199% darker elements of the image. Set sharpen to a MagickTrue to increase the
3200% image contrast otherwise the contrast is reduced.
3201%
3202% The format of the ContrastImage method is:
3203%
3204% MagickBooleanType ContrastImage(Image *image,
3205% const MagickBooleanType sharpen)
3206%
3207% A description of each parameter follows:
3208%
3209% o image: the image.
3210%
3211% o sharpen: Increase or decrease image contrast.
3212%
3213*/
3214
3215MagickExport
3216MagickBooleanType AccelerateContrastImage(Image* image, const MagickBooleanType sharpen, ExceptionInfo* exception)
3217{
3218 MagickBooleanType status;
3219
3220 assert(image != NULL);
3221 assert(exception != NULL);
3222
3223 status = checkOpenCLEnvironment(exception);
3224 if (status == MagickFalse)
3225 return MagickFalse;
3226
dirk5dcb7622013-12-01 10:43:43 +00003227 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003228 if (status == MagickFalse)
3229 return MagickFalse;
3230
3231 status = ComputeContrastImage(image,sharpen,exception);
cristyf034abb2013-11-24 14:16:14 +00003232 return status;
3233}
3234
3235
3236
3237MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3238{
3239 register ssize_t
cristyd43a46b2010-01-21 02:13:41 +00003240 i;
3241
cristyf034abb2013-11-24 14:16:14 +00003242 cl_float
3243 bright,
3244 hue,
3245 saturation;
3246
3247 cl_int color;
3248
3249 MagickBooleanType outputReady;
3250
3251 MagickCLEnv clEnv;
3252
3253 void *inputPixels;
3254
3255 MagickSizeType length;
3256
3257 cl_context context;
3258 cl_command_queue queue;
3259 cl_kernel modulateKernel;
3260
3261 cl_mem inputImageBuffer;
3262 cl_mem_flags mem_flags;
3263
3264 cl_int clStatus;
3265
3266 Image * inputImage = image;
3267
3268 inputImageBuffer = NULL;
3269 modulateKernel = NULL;
3270
3271 assert(inputImage != (Image *) NULL);
3272 assert(inputImage->signature == MagickSignature);
3273 if (inputImage->debug != MagickFalse)
3274 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
cristyd43a46b2010-01-21 02:13:41 +00003275
3276 /*
cristyf034abb2013-11-24 14:16:14 +00003277 * initialize opencl env
3278 */
3279 clEnv = GetDefaultOpenCLEnv();
3280 context = GetOpenCLContext(clEnv);
3281 queue = AcquireOpenCLCommandQueue(clEnv);
cristyd43a46b2010-01-21 02:13:41 +00003282
cristyf034abb2013-11-24 14:16:14 +00003283 outputReady = MagickFalse;
cristyd43a46b2010-01-21 02:13:41 +00003284
cristyf034abb2013-11-24 14:16:14 +00003285 /* Create and initialize OpenCL buffers.
3286 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3287 assume this will get a writable image
3288 */
3289 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3290 if (inputPixels == (void *) NULL)
cristyd43a46b2010-01-21 02:13:41 +00003291 {
cristya22457d2013-12-07 14:03:06 +00003292 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00003293 goto cleanup;
cristyd43a46b2010-01-21 02:13:41 +00003294 }
cristyf034abb2013-11-24 14:16:14 +00003295
3296 /* If the host pointer is aligned to the size of CLPixelPacket,
3297 then use the host buffer directly from the GPU; otherwise,
3298 create a buffer on the GPU and copy the data over
3299 */
3300 if (ALIGNED(inputPixels,CLPixelPacket))
3301 {
3302 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3303 }
3304 else
3305 {
3306 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3307 }
3308 /* create a CL buffer from image pixel buffer */
3309 length = inputImage->columns * inputImage->rows;
3310 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3311 if (clStatus != CL_SUCCESS)
3312 {
cristya22457d2013-12-07 14:03:06 +00003313 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003314 goto cleanup;
3315 }
3316
3317 modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
3318 if (modulateKernel == NULL)
3319 {
cristya22457d2013-12-07 14:03:06 +00003320 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003321 goto cleanup;
3322 }
3323
3324 bright=percent_brightness;
3325 hue=percent_hue;
3326 saturation=percent_saturation;
3327 color=colorspace;
3328
3329 i = 0;
3330 clStatus=clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3331 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3332 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3333 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3334 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3335 if (clStatus != CL_SUCCESS)
3336 {
cristya22457d2013-12-07 14:03:06 +00003337 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003338 printf("no kernel\n");
3339 goto cleanup;
3340 }
3341
3342 {
3343 size_t global_work_size[2];
3344 global_work_size[0] = inputImage->columns;
3345 global_work_size[1] = inputImage->rows;
3346 /* launch the kernel */
3347 clStatus = clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3348 if (clStatus != CL_SUCCESS)
3349 {
cristya22457d2013-12-07 14:03:06 +00003350 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003351 goto cleanup;
3352 }
3353 clFlush(queue);
3354 }
3355
3356 if (ALIGNED(inputPixels,CLPixelPacket))
3357 {
3358 length = inputImage->columns * inputImage->rows;
3359 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3360 }
3361 else
3362 {
3363 length = inputImage->columns * inputImage->rows;
3364 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3365 }
3366 if (clStatus != CL_SUCCESS)
3367 {
cristya22457d2013-12-07 14:03:06 +00003368 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003369 goto cleanup;
3370 }
3371
3372 outputReady = MagickTrue;
3373
3374cleanup:
cristya22457d2013-12-07 14:03:06 +00003375 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +00003376
3377 if (inputPixels) {
3378 //ReleasePixelCachePixels();
3379 inputPixels = NULL;
3380 }
3381
3382 if (inputImageBuffer!=NULL)
3383 clReleaseMemObject(inputImageBuffer);
3384 if (modulateKernel!=NULL)
3385 RelinquishOpenCLKernel(clEnv, modulateKernel);
3386 if (queue != NULL)
3387 RelinquishOpenCLCommandQueue(clEnv, queue);
3388
3389 return outputReady;
3390
cristy3f6d1482010-01-20 21:01:21 +00003391}
cristyf034abb2013-11-24 14:16:14 +00003392
3393/*
3394%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3395% %
3396% %
3397% %
3398% M o d u l a t e I m a g e w i t h O p e n C L %
3399% %
3400% %
3401% %
3402%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3403%
3404% ModulateImage() lets you control the brightness, saturation, and hue
3405% of an image. Modulate represents the brightness, saturation, and hue
3406% as one parameter (e.g. 90,150,100). If the image colorspace is HSL, the
3407% modulation is lightness, saturation, and hue. For HWB, use blackness,
3408% whiteness, and hue. And for HCL, use chrome, luma, and hue.
3409%
3410% The format of the ModulateImage method is:
3411%
3412% MagickBooleanType ModulateImage(Image *image,const char *modulate)
3413%
3414% A description of each parameter follows:
3415%
3416% o image: the image.
3417%
3418% o percent_*: Define the percent change in brightness, saturation, and
3419% hue.
3420%
3421*/
3422
3423MagickExport
3424MagickBooleanType AccelerateModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3425{
3426 MagickBooleanType status;
3427
3428 assert(image != NULL);
3429 assert(exception != NULL);
3430
3431 status = checkOpenCLEnvironment(exception);
3432 if (status == MagickFalse)
3433 return MagickFalse;
3434
dirk5dcb7622013-12-01 10:43:43 +00003435 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003436 if (status == MagickFalse)
3437 return MagickFalse;
3438
3439 if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
3440 return MagickFalse;
3441
3442
3443 status = ComputeModulateImage(image,percent_brightness, percent_hue, percent_saturation, colorspace, exception);
cristyf034abb2013-11-24 14:16:14 +00003444 return status;
3445}
3446
3447
3448MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception)
3449{
3450#define EqualizeImageTag "Equalize/Image"
3451
3452 ExceptionInfo
3453 *exception=_exception;
3454
3455 FloatPixelPacket
3456 white,
3457 black,
3458 intensity,
3459 *map;
3460
3461 cl_uint4
3462 *histogram;
3463
3464 PixelPacket
3465 *equalize_map;
3466
3467 register ssize_t
3468 i;
3469
3470 Image * image = inputImage;
3471
3472 MagickBooleanType outputReady;
3473 MagickCLEnv clEnv;
3474
3475 cl_int clStatus;
3476 size_t global_work_size[2];
3477
3478 void *inputPixels;
3479 cl_mem_flags mem_flags;
3480
3481 cl_context context;
3482 cl_mem inputImageBuffer;
3483 cl_mem histogramBuffer;
3484 cl_mem equalizeMapBuffer;
3485 cl_kernel histogramKernel;
3486 cl_kernel equalizeKernel;
3487 cl_command_queue queue;
3488 cl_int colorspace;
3489
3490 void* hostPtr;
3491
3492 MagickSizeType length;
3493
3494 inputPixels = NULL;
3495 inputImageBuffer = NULL;
3496 histogramBuffer = NULL;
3497 histogramKernel = NULL;
3498 equalizeKernel = NULL;
3499 context = NULL;
3500 queue = NULL;
3501 outputReady = MagickFalse;
3502
3503 assert(inputImage != (Image *) NULL);
3504 assert(inputImage->signature == MagickSignature);
3505 if (inputImage->debug != MagickFalse)
3506 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
3507
3508 /*
3509 Allocate and initialize histogram arrays.
3510 */
3511 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
3512 if (histogram == (cl_uint4 *) NULL)
3513 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3514
3515 /* reset histogram */
3516 (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
3517
3518 /*
3519 * initialize opencl env
3520 */
3521 clEnv = GetDefaultOpenCLEnv();
3522 context = GetOpenCLContext(clEnv);
3523 queue = AcquireOpenCLCommandQueue(clEnv);
3524
3525 /* Create and initialize OpenCL buffers. */
3526 /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */
3527 /* assume this will get a writable image */
3528 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3529
3530 if (inputPixels == (void *) NULL)
3531 {
cristya22457d2013-12-07 14:03:06 +00003532 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00003533 goto cleanup;
3534 }
3535 /* If the host pointer is aligned to the size of CLPixelPacket,
3536 then use the host buffer directly from the GPU; otherwise,
3537 create a buffer on the GPU and copy the data over */
3538 if (ALIGNED(inputPixels,CLPixelPacket))
3539 {
3540 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3541 }
3542 else
3543 {
3544 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3545 }
3546 /* create a CL buffer from image pixel buffer */
3547 length = inputImage->columns * inputImage->rows;
3548 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3549 if (clStatus != CL_SUCCESS)
3550 {
cristya22457d2013-12-07 14:03:06 +00003551 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003552 goto cleanup;
3553 }
3554
3555 /* If the host pointer is aligned to the size of cl_uint,
3556 then use the host buffer directly from the GPU; otherwise,
3557 create a buffer on the GPU and copy the data over */
3558 if (ALIGNED(histogram,cl_uint4))
3559 {
3560 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3561 hostPtr = histogram;
3562 }
3563 else
3564 {
3565 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3566 hostPtr = histogram;
3567 }
3568 /* create a CL buffer for histogram */
3569 length = (MaxMap+1);
3570 histogramBuffer = clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
3571 if (clStatus != CL_SUCCESS)
3572 {
cristya22457d2013-12-07 14:03:06 +00003573 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003574 goto cleanup;
3575 }
3576
3577 switch (inputImage->colorspace)
3578 {
3579 case RGBColorspace:
3580 colorspace = 1;
3581 break;
3582 case sRGBColorspace:
3583 colorspace = 0;
3584 break;
3585 default:
3586 {
3587 /* something is wrong, as we checked in checkAccelerateCondition */
3588 }
3589 }
3590
3591 /* get the OpenCL kernel */
3592 histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
3593 if (histogramKernel == NULL)
3594 {
cristya22457d2013-12-07 14:03:06 +00003595 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003596 goto cleanup;
3597 }
3598
3599 /* set the kernel arguments */
3600 i = 0;
3601 clStatus=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3602 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
3603 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
3604 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
3605 if (clStatus != CL_SUCCESS)
3606 {
cristya22457d2013-12-07 14:03:06 +00003607 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003608 goto cleanup;
3609 }
3610
3611 /* launch the kernel */
3612 global_work_size[0] = inputImage->columns;
3613 global_work_size[1] = inputImage->rows;
3614
3615 clStatus = clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3616
3617 if (clStatus != CL_SUCCESS)
3618 {
cristya22457d2013-12-07 14:03:06 +00003619 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003620 goto cleanup;
3621 }
3622 clFlush(queue);
3623
3624 /* read from the kenel output */
3625 if (ALIGNED(histogram,cl_uint4))
3626 {
3627 length = (MaxMap+1);
3628 clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
3629 }
3630 else
3631 {
3632 length = (MaxMap+1);
3633 clStatus = clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
3634 }
3635 if (clStatus != CL_SUCCESS)
3636 {
cristya22457d2013-12-07 14:03:06 +00003637 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003638 goto cleanup;
3639 }
3640
3641 /* unmap, don't block gpu to use this buffer again. */
3642 if (ALIGNED(histogram,cl_uint4))
3643 {
3644 clStatus = clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
3645 if (clStatus != CL_SUCCESS)
3646 {
cristya22457d2013-12-07 14:03:06 +00003647 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003648 goto cleanup;
3649 }
3650 }
3651
3652 if (getenv("TEST")) {
3653 unsigned int i;
3654 for (i=0; i<(MaxMap+1UL); i++)
3655 {
3656 printf("histogram %d: red %d\n", i, histogram[i].s[2]);
3657 printf("histogram %d: green %d\n", i, histogram[i].s[1]);
3658 printf("histogram %d: blue %d\n", i, histogram[i].s[0]);
cristya22457d2013-12-07 14:03:06 +00003659 printf("histogram %d: alpha %d\n", i, histogram[i].s[3]);
cristyf034abb2013-11-24 14:16:14 +00003660 }
3661 }
3662
3663 /* cpu stuff */
3664 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
3665 if (equalize_map == (PixelPacket *) NULL)
3666 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3667
3668 map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
3669 if (map == (FloatPixelPacket *) NULL)
3670 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3671
3672 /*
3673 Integrate the histogram to get the equalization map.
3674 */
3675 (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
3676 for (i=0; i <= (ssize_t) MaxMap; i++)
3677 {
3678 if ((channel & SyncChannels) != 0)
3679 {
3680 intensity.red+=histogram[i].s[2];
3681 map[i]=intensity;
3682 continue;
3683 }
3684 if ((channel & RedChannel) != 0)
3685 intensity.red+=histogram[i].s[2];
3686 if ((channel & GreenChannel) != 0)
3687 intensity.green+=histogram[i].s[1];
3688 if ((channel & BlueChannel) != 0)
3689 intensity.blue+=histogram[i].s[0];
3690 if ((channel & OpacityChannel) != 0)
dirke19d0cc2013-12-01 10:07:42 +00003691 intensity.alpha+=histogram[i].s[3];
cristyf034abb2013-11-24 14:16:14 +00003692 if (((channel & IndexChannel) != 0) &&
3693 (image->colorspace == CMYKColorspace))
3694 {
3695 printf("something here\n");
3696 /*intensity.index+=histogram[i].index; */
3697 }
3698 map[i]=intensity;
3699 }
3700 black=map[0];
3701 white=map[(int) MaxMap];
3702 (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
3703 for (i=0; i <= (ssize_t) MaxMap; i++)
3704 {
3705 if ((channel & SyncChannels) != 0)
3706 {
3707 if (white.red != black.red)
3708 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3709 (map[i].red-black.red))/(white.red-black.red)));
3710 continue;
3711 }
3712 if (((channel & RedChannel) != 0) && (white.red != black.red))
3713 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3714 (map[i].red-black.red))/(white.red-black.red)));
3715 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3716 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3717 (map[i].green-black.green))/(white.green-black.green)));
3718 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3719 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3720 (map[i].blue-black.blue))/(white.blue-black.blue)));
dirke19d0cc2013-12-01 10:07:42 +00003721 if (((channel & OpacityChannel) != 0) && (white.alpha != black.alpha))
3722 equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3723 (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
cristyf034abb2013-11-24 14:16:14 +00003724 /*
3725 if ((((channel & IndexChannel) != 0) &&
3726 (image->colorspace == CMYKColorspace)) &&
3727 (white.index != black.index))
3728 equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3729 (map[i].index-black.index))/(white.index-black.index)));
3730 */
3731 }
3732
3733 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
3734 map=(FloatPixelPacket *) RelinquishMagickMemory(map);
3735
3736 if (image->storage_class == PseudoClass)
3737 {
3738 /*
3739 Equalize colormap.
3740 */
3741 for (i=0; i < (ssize_t) image->colors; i++)
3742 {
3743 if ((channel & SyncChannels) != 0)
3744 {
3745 if (white.red != black.red)
3746 {
3747 image->colormap[i].red=equalize_map[
3748 ScaleQuantumToMap(image->colormap[i].red)].red;
3749 image->colormap[i].green=equalize_map[
3750 ScaleQuantumToMap(image->colormap[i].green)].red;
3751 image->colormap[i].blue=equalize_map[
3752 ScaleQuantumToMap(image->colormap[i].blue)].red;
dirke19d0cc2013-12-01 10:07:42 +00003753 image->colormap[i].alpha=equalize_map[
3754 ScaleQuantumToMap(image->colormap[i].alpha)].red;
cristyf034abb2013-11-24 14:16:14 +00003755 }
3756 continue;
3757 }
3758 if (((channel & RedChannel) != 0) && (white.red != black.red))
3759 image->colormap[i].red=equalize_map[
3760 ScaleQuantumToMap(image->colormap[i].red)].red;
3761 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3762 image->colormap[i].green=equalize_map[
3763 ScaleQuantumToMap(image->colormap[i].green)].green;
3764 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3765 image->colormap[i].blue=equalize_map[
3766 ScaleQuantumToMap(image->colormap[i].blue)].blue;
3767 if (((channel & OpacityChannel) != 0) &&
dirke19d0cc2013-12-01 10:07:42 +00003768 (white.alpha != black.alpha))
3769 image->colormap[i].alpha=equalize_map[
3770 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
cristyf034abb2013-11-24 14:16:14 +00003771 }
3772 }
3773
3774 /*
3775 Equalize image.
3776 */
3777
3778 /* GPU can work on this again, image and equalize map as input
3779 image: uchar4 (CLPixelPacket)
3780 equalize_map: uchar4 (PixelPacket)
3781 black, white: float4 (FloatPixelPacket) */
3782
3783 if (inputImageBuffer!=NULL)
3784 clReleaseMemObject(inputImageBuffer);
3785
3786 /* If the host pointer is aligned to the size of CLPixelPacket,
3787 then use the host buffer directly from the GPU; otherwise,
3788 create a buffer on the GPU and copy the data over */
3789 if (ALIGNED(inputPixels,CLPixelPacket))
3790 {
3791 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3792 }
3793 else
3794 {
3795 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3796 }
3797 /* create a CL buffer from image pixel buffer */
3798 length = inputImage->columns * inputImage->rows;
3799 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3800 if (clStatus != CL_SUCCESS)
3801 {
cristya22457d2013-12-07 14:03:06 +00003802 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003803 goto cleanup;
3804 }
3805
3806 /* Create and initialize OpenCL buffers. */
3807 if (ALIGNED(equalize_map, PixelPacket))
3808 {
3809 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3810 hostPtr = equalize_map;
3811 }
3812 else
3813 {
3814 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3815 hostPtr = equalize_map;
3816 }
3817 /* create a CL buffer for eqaulize_map */
3818 length = (MaxMap+1);
3819 equalizeMapBuffer = clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
3820 if (clStatus != CL_SUCCESS)
3821 {
cristya22457d2013-12-07 14:03:06 +00003822 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003823 goto cleanup;
3824 }
3825
3826 /* get the OpenCL kernel */
3827 equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
3828 if (equalizeKernel == NULL)
3829 {
cristya22457d2013-12-07 14:03:06 +00003830 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003831 goto cleanup;
3832 }
3833
3834 /* set the kernel arguments */
3835 i = 0;
3836 clStatus=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3837 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
3838 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
3839 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
3840 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
3841 if (clStatus != CL_SUCCESS)
3842 {
cristya22457d2013-12-07 14:03:06 +00003843 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003844 goto cleanup;
3845 }
3846
3847 /* launch the kernel */
3848 global_work_size[0] = inputImage->columns;
3849 global_work_size[1] = inputImage->rows;
3850
3851 clStatus = clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3852
3853 if (clStatus != CL_SUCCESS)
3854 {
cristya22457d2013-12-07 14:03:06 +00003855 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003856 goto cleanup;
3857 }
3858 clFlush(queue);
3859
3860 /* read the data back */
3861 if (ALIGNED(inputPixels,CLPixelPacket))
3862 {
3863 length = inputImage->columns * inputImage->rows;
3864 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3865 }
3866 else
3867 {
3868 length = inputImage->columns * inputImage->rows;
3869 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3870 }
3871 if (clStatus != CL_SUCCESS)
3872 {
cristya22457d2013-12-07 14:03:06 +00003873 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003874 goto cleanup;
3875 }
3876
3877 outputReady = MagickTrue;
3878
3879 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
3880
3881cleanup:
cristya22457d2013-12-07 14:03:06 +00003882 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +00003883
3884 if (inputPixels) {
3885 /*ReleasePixelCachePixels();*/
3886 inputPixels = NULL;
3887 }
3888
3889 if (inputImageBuffer!=NULL)
3890 clReleaseMemObject(inputImageBuffer);
3891 if (histogramBuffer!=NULL)
3892 clReleaseMemObject(histogramBuffer);
3893 if (histogramKernel!=NULL)
3894 RelinquishOpenCLKernel(clEnv, histogramKernel);
3895 if (queue != NULL)
3896 RelinquishOpenCLCommandQueue(clEnv, queue);
3897
3898 return outputReady;
3899}
3900
3901/*
3902%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3903% %
3904% %
3905% %
3906% E q u a l i z e I m a g e w i t h O p e n C L %
3907% %
3908% %
3909% %
3910%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3911%
3912% EqualizeImage() applies a histogram equalization to the image.
3913%
3914% The format of the EqualizeImage method is:
3915%
3916% MagickBooleanType EqualizeImage(Image *image)
3917% MagickBooleanType EqualizeImageChannel(Image *image,
3918% const ChannelType channel)
3919%
3920% A description of each parameter follows:
3921%
3922% o image: the image.
3923%
3924% o channel: the channel.
3925%
3926*/
3927
3928
3929MagickExport
3930MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channel, ExceptionInfo* exception)
3931{
3932 MagickBooleanType status;
3933
3934 assert(image != NULL);
3935 assert(exception != NULL);
3936
3937 status = checkOpenCLEnvironment(exception);
3938 if (status == MagickFalse)
3939 return MagickFalse;
3940
dirk5dcb7622013-12-01 10:43:43 +00003941 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00003942 if (status == MagickFalse)
3943 return MagickFalse;
3944
3945 /* ensure this is the only pass get in for now. */
3946 if ((channel & SyncChannels) == 0)
3947 return MagickFalse;
3948
3949 if (image->colorspace != sRGBColorspace)
3950 return MagickFalse;
3951
3952 status = ComputeEqualizeImage(image,channel,exception);
cristyf034abb2013-11-24 14:16:14 +00003953 return status;
3954}
3955
3956
3957static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception)
3958{
3959
3960 MagickBooleanType outputReady = MagickFalse;
3961 MagickCLEnv clEnv = NULL;
3962
3963 cl_int clStatus;
3964 size_t global_work_size[2];
3965
3966 const void *inputPixels = NULL;
3967 Image* filteredImage = NULL;
3968 void *filteredPixels = NULL;
3969 void *hostPtr;
3970 MagickSizeType length;
3971
3972 cl_mem_flags mem_flags;
3973 cl_context context = NULL;
3974 cl_mem inputImageBuffer = NULL;
3975 cl_mem tempImageBuffer[2];
3976 cl_mem filteredImageBuffer = NULL;
3977 cl_command_queue queue = NULL;
3978 cl_kernel hullPass1 = NULL;
3979 cl_kernel hullPass2 = NULL;
3980
3981 unsigned int imageWidth, imageHeight;
3982 int matte;
3983 int k;
3984
3985 static const int
3986 X[4] = {0, 1, 1,-1},
3987 Y[4] = {1, 0, 1, 1};
3988
3989 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
3990 clEnv = GetDefaultOpenCLEnv();
3991 context = GetOpenCLContext(clEnv);
3992 queue = AcquireOpenCLCommandQueue(clEnv);
3993
3994 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3995 if (inputPixels == (void *) NULL)
3996 {
cristya22457d2013-12-07 14:03:06 +00003997 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00003998 goto cleanup;
3999 }
4000
4001 if (ALIGNED(inputPixels,CLPixelPacket))
4002 {
4003 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4004 }
4005 else
4006 {
4007 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4008 }
4009 /* create a CL buffer from image pixel buffer */
4010 length = inputImage->columns * inputImage->rows;
4011 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4012 if (clStatus != CL_SUCCESS)
4013 {
cristya22457d2013-12-07 14:03:06 +00004014 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00004015 goto cleanup;
4016 }
4017
4018 mem_flags = CL_MEM_READ_WRITE;
4019 length = inputImage->columns * inputImage->rows;
4020 for (k = 0; k < 2; k++)
4021 {
4022 tempImageBuffer[k] = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
4023 if (clStatus != CL_SUCCESS)
4024 {
cristya22457d2013-12-07 14:03:06 +00004025 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00004026 goto cleanup;
4027 }
4028 }
4029
4030 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4031 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004032 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00004033 {
cristya22457d2013-12-07 14:03:06 +00004034 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004035 goto cleanup;
4036 }
4037 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4038 if (filteredPixels == (void *) NULL)
4039 {
cristya22457d2013-12-07 14:03:06 +00004040 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00004041 goto cleanup;
4042 }
4043
4044 if (ALIGNED(filteredPixels,CLPixelPacket))
4045 {
4046 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4047 hostPtr = filteredPixels;
4048 }
4049 else
4050 {
4051 mem_flags = CL_MEM_WRITE_ONLY;
4052 hostPtr = NULL;
4053 }
4054 /* create a CL buffer from image pixel buffer */
4055 length = inputImage->columns * inputImage->rows;
4056 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4057 if (clStatus != CL_SUCCESS)
4058 {
cristya22457d2013-12-07 14:03:06 +00004059 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00004060 goto cleanup;
4061 }
4062
4063 hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
4064 hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
4065
4066 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer);
4067 clStatus |=clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4068 imageWidth = inputImage->columns;
4069 clStatus |=clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
4070 imageHeight = inputImage->rows;
4071 clStatus |=clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
cristya22457d2013-12-07 14:03:06 +00004072 matte = (inputImage->matte==MagickFalse)?0:1;
cristyf034abb2013-11-24 14:16:14 +00004073 clStatus |=clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
4074 if (clStatus != CL_SUCCESS)
4075 {
cristya22457d2013-12-07 14:03:06 +00004076 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004077 goto cleanup;
4078 }
4079
4080 clStatus = clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4081 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
4082 imageWidth = inputImage->columns;
4083 clStatus |=clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
4084 imageHeight = inputImage->rows;
4085 clStatus |=clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
dirke19d0cc2013-12-01 10:07:42 +00004086 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +00004087 clStatus |=clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
4088 if (clStatus != CL_SUCCESS)
4089 {
cristya22457d2013-12-07 14:03:06 +00004090 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004091 goto cleanup;
4092 }
4093
4094
4095 global_work_size[0] = inputImage->columns;
4096 global_work_size[1] = inputImage->rows;
4097
4098
4099 for (k = 0; k < 4; k++)
4100 {
4101 cl_int2 offset;
4102 int polarity;
4103
4104
4105 offset.s[0] = X[k];
4106 offset.s[1] = Y[k];
4107 polarity = 1;
4108 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4109 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4110 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4111 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4112 if (clStatus != CL_SUCCESS)
4113 {
cristya22457d2013-12-07 14:03:06 +00004114 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004115 goto cleanup;
4116 }
4117 /* launch the kernel */
4118 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4119 if (clStatus != CL_SUCCESS)
4120 {
cristya22457d2013-12-07 14:03:06 +00004121 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004122 goto cleanup;
4123 }
4124 /* launch the kernel */
4125 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4126 if (clStatus != CL_SUCCESS)
4127 {
cristya22457d2013-12-07 14:03:06 +00004128 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004129 goto cleanup;
4130 }
4131
4132
4133 if (k == 0)
4134 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
4135 offset.s[0] = -X[k];
4136 offset.s[1] = -Y[k];
4137 polarity = 1;
4138 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4139 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4140 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4141 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4142 if (clStatus != CL_SUCCESS)
4143 {
cristya22457d2013-12-07 14:03:06 +00004144 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004145 goto cleanup;
4146 }
4147 /* launch the kernel */
4148 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4149 if (clStatus != CL_SUCCESS)
4150 {
cristya22457d2013-12-07 14:03:06 +00004151 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004152 goto cleanup;
4153 }
4154 /* launch the kernel */
4155 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4156 if (clStatus != CL_SUCCESS)
4157 {
cristya22457d2013-12-07 14:03:06 +00004158 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004159 goto cleanup;
4160 }
4161
4162 offset.s[0] = -X[k];
4163 offset.s[1] = -Y[k];
4164 polarity = -1;
4165 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4166 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4167 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4168 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4169 if (clStatus != CL_SUCCESS)
4170 {
cristya22457d2013-12-07 14:03:06 +00004171 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004172 goto cleanup;
4173 }
4174 /* launch the kernel */
4175 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4176 if (clStatus != CL_SUCCESS)
4177 {
cristya22457d2013-12-07 14:03:06 +00004178 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004179 goto cleanup;
4180 }
4181 /* launch the kernel */
4182 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4183 if (clStatus != CL_SUCCESS)
4184 {
cristya22457d2013-12-07 14:03:06 +00004185 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004186 goto cleanup;
4187 }
4188
4189 offset.s[0] = X[k];
4190 offset.s[1] = Y[k];
4191 polarity = -1;
4192 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4193 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4194 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4195 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4196
4197 if (k == 3)
4198 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
4199
4200 if (clStatus != CL_SUCCESS)
4201 {
cristya22457d2013-12-07 14:03:06 +00004202 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004203 goto cleanup;
4204 }
4205 /* launch the kernel */
4206 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4207 if (clStatus != CL_SUCCESS)
4208 {
cristya22457d2013-12-07 14:03:06 +00004209 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004210 goto cleanup;
4211 }
4212 /* launch the kernel */
4213 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4214 if (clStatus != CL_SUCCESS)
4215 {
cristya22457d2013-12-07 14:03:06 +00004216 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004217 goto cleanup;
4218 }
4219 }
4220
4221 if (ALIGNED(filteredPixels,CLPixelPacket))
4222 {
4223 length = inputImage->columns * inputImage->rows;
4224 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4225 }
4226 else
4227 {
4228 length = inputImage->columns * inputImage->rows;
4229 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4230 }
4231 if (clStatus != CL_SUCCESS)
4232 {
cristya22457d2013-12-07 14:03:06 +00004233 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004234 goto cleanup;
4235 }
4236
4237 outputReady = MagickTrue;
4238
4239cleanup:
cristya22457d2013-12-07 14:03:06 +00004240 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4241
cristyf034abb2013-11-24 14:16:14 +00004242 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4243 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4244 for (k = 0; k < 2; k++)
4245 {
4246 if (tempImageBuffer[k]!=NULL) clReleaseMemObject(tempImageBuffer[k]);
4247 }
4248 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4249 if (hullPass1!=NULL) RelinquishOpenCLKernel(clEnv, hullPass1);
4250 if (hullPass2!=NULL) RelinquishOpenCLKernel(clEnv, hullPass2);
4251 if (outputReady == MagickFalse)
4252 {
4253 if (filteredImage != NULL)
4254 {
4255 DestroyImage(filteredImage);
4256 filteredImage = NULL;
4257 }
4258 }
4259 return filteredImage;
4260}
4261
4262/*
4263%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4264% %
4265% %
4266% %
4267% D e s p e c k l e I m a g e w i t h O p e n C L %
4268% %
4269% %
4270% %
4271%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4272%
4273% DespeckleImage() reduces the speckle noise in an image while perserving the
4274% edges of the original image. A speckle removing filter uses a complementary
4275% hulling technique (raising pixels that are darker than their surrounding
4276% neighbors, then complementarily lowering pixels that are brighter than their
4277% surrounding neighbors) to reduce the speckle index of that image (reference
4278% Crimmins speckle removal).
4279%
4280% The format of the DespeckleImage method is:
4281%
4282% Image *DespeckleImage(const Image *image,ExceptionInfo *exception)
4283%
4284% A description of each parameter follows:
4285%
4286% o image: the image.
4287%
4288% o exception: return any errors or warnings in this structure.
4289%
4290*/
4291
4292MagickExport
4293Image* AccelerateDespeckleImage(const Image* image, ExceptionInfo* exception)
4294{
4295 MagickBooleanType status;
4296 Image* newImage = NULL;
4297
4298 assert(image != NULL);
4299 assert(exception != NULL);
4300
4301 status = checkOpenCLEnvironment(exception);
4302 if (status == MagickFalse)
4303 return NULL;
4304
dirk5dcb7622013-12-01 10:43:43 +00004305 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00004306 if (status == MagickFalse)
4307 return NULL;
4308
4309 newImage = ComputeDespeckleImage(image,exception);
cristyf034abb2013-11-24 14:16:14 +00004310 return newImage;
4311}
4312
cristye85d0f72013-11-27 02:25:43 +00004313static Image* ComputeAddNoiseImage(const Image* inputImage,
4314 const ChannelType channel, const NoiseType noise_type,
4315 ExceptionInfo *exception)
4316{
4317 MagickBooleanType outputReady = MagickFalse;
4318 MagickCLEnv clEnv = NULL;
4319
4320 cl_int clStatus;
4321 size_t global_work_size[2];
4322
4323 const void *inputPixels = NULL;
4324 Image* filteredImage = NULL;
4325 void *filteredPixels = NULL;
4326 void *hostPtr;
4327 unsigned int inputColumns, inputRows;
4328 float attenuate;
4329 float *randomNumberBufferPtr = NULL;
4330 MagickSizeType length;
4331 unsigned int numRandomNumberPerPixel;
4332 unsigned int numRowsPerKernelLaunch;
4333 unsigned int numRandomNumberPerBuffer;
4334 unsigned int r;
4335 unsigned int k;
4336 int i;
4337
4338 RandomInfo **restrict random_info;
4339 const char *option;
4340#if defined(MAGICKCORE_OPENMP_SUPPORT)
4341 unsigned long key;
4342#endif
4343
4344 cl_mem_flags mem_flags;
4345 cl_context context = NULL;
4346 cl_mem inputImageBuffer = NULL;
4347 cl_mem randomNumberBuffer = NULL;
4348 cl_mem filteredImageBuffer = NULL;
4349 cl_command_queue queue = NULL;
4350 cl_kernel addNoiseKernel = NULL;
4351
4352
4353 clEnv = GetDefaultOpenCLEnv();
4354 context = GetOpenCLContext(clEnv);
4355 queue = AcquireOpenCLCommandQueue(clEnv);
4356
4357 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4358 if (inputPixels == (void *) NULL)
4359 {
cristya22457d2013-12-07 14:03:06 +00004360 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004361 goto cleanup;
4362 }
4363
4364 if (ALIGNED(inputPixels,CLPixelPacket))
4365 {
4366 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4367 }
4368 else
4369 {
4370 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4371 }
4372 /* create a CL buffer from image pixel buffer */
4373 length = inputImage->columns * inputImage->rows;
4374 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4375 if (clStatus != CL_SUCCESS)
4376 {
cristya22457d2013-12-07 14:03:06 +00004377 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004378 goto cleanup;
4379 }
4380
4381
4382 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4383 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004384 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristye85d0f72013-11-27 02:25:43 +00004385 {
cristya22457d2013-12-07 14:03:06 +00004386 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004387 goto cleanup;
4388 }
4389 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4390 if (filteredPixels == (void *) NULL)
4391 {
cristya22457d2013-12-07 14:03:06 +00004392 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004393 goto cleanup;
4394 }
4395
4396 if (ALIGNED(filteredPixels,CLPixelPacket))
4397 {
4398 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4399 hostPtr = filteredPixels;
4400 }
4401 else
4402 {
4403 mem_flags = CL_MEM_WRITE_ONLY;
4404 hostPtr = NULL;
4405 }
4406 /* create a CL buffer from image pixel buffer */
4407 length = inputImage->columns * inputImage->rows;
4408 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4409 if (clStatus != CL_SUCCESS)
4410 {
cristya22457d2013-12-07 14:03:06 +00004411 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004412 goto cleanup;
4413 }
4414
4415 /* find out how many random numbers needed by pixel */
4416 numRandomNumberPerPixel = 0;
4417 {
4418 unsigned int numRandPerChannel = 0;
4419 switch (noise_type)
4420 {
4421 case UniformNoise:
4422 case ImpulseNoise:
4423 case LaplacianNoise:
4424 case RandomNoise:
4425 default:
4426 numRandPerChannel = 1;
4427 break;
4428 case GaussianNoise:
4429 case MultiplicativeGaussianNoise:
4430 case PoissonNoise:
4431 numRandPerChannel = 2;
4432 break;
4433 };
4434
4435 if ((channel & RedChannel) != 0)
4436 numRandomNumberPerPixel+=numRandPerChannel;
4437 if ((channel & GreenChannel) != 0)
4438 numRandomNumberPerPixel+=numRandPerChannel;
4439 if ((channel & BlueChannel) != 0)
4440 numRandomNumberPerPixel+=numRandPerChannel;
4441 if ((channel & OpacityChannel) != 0)
4442 numRandomNumberPerPixel+=numRandPerChannel;
4443 }
4444
4445 numRowsPerKernelLaunch = 512;
4446 /* create a buffer for random numbers */
4447 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4448 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float)
4449 , NULL, &clStatus);
4450
4451
4452 /* set up the random number generators */
4453 attenuate=1.0;
4454 option=GetImageArtifact(inputImage,"attenuate");
4455 if (option != (char *) NULL)
4456 attenuate=StringToDouble(option,(char **) NULL);
4457 random_info=AcquireRandomInfoThreadSet();
4458#if defined(MAGICKCORE_OPENMP_SUPPORT)
4459 key=GetRandomSecretKey(random_info[0]);
4460#endif
4461
4462 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4463
4464 k = 0;
4465 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4466 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4467 inputColumns = inputImage->columns;
4468 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4469 inputRows = inputImage->rows;
4470 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4471 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4472 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4473 attenuate=1.0f;
4474 option=GetImageArtifact(inputImage,"attenuate");
4475 if (option != (char *) NULL)
4476 attenuate=(float)StringToDouble(option,(char **) NULL);
4477 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4478 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4479 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4480
4481 global_work_size[0] = inputColumns;
4482 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4483 {
4484 /* Generate random numbers in the buffer */
4485 randomNumberBufferPtr = (float*)clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0
4486 , numRandomNumberPerBuffer*sizeof(float), 0, NULL, NULL, &clStatus);
4487 if (clStatus != CL_SUCCESS)
4488 {
cristya22457d2013-12-07 14:03:06 +00004489 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004490 goto cleanup;
4491 }
4492
4493#if defined(MAGICKCORE_OPENMP_SUPPORT)
4494 #pragma omp parallel for schedule(static,4) \
4495 num_threads((key == ~0UL) == 0 ? 1 : (size_t) GetMagickResourceLimit(ThreadResource))
4496#endif
4497 for (i = 0; i < numRandomNumberPerBuffer; i++)
4498 {
4499 const int id = GetOpenMPThreadId();
4500 randomNumberBufferPtr[i] = (float)GetPseudoRandomValue(random_info[id]);
4501 }
4502
4503 clStatus = clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL);
4504 if (clStatus != CL_SUCCESS)
4505 {
cristya22457d2013-12-07 14:03:06 +00004506 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004507 goto cleanup;
4508 }
4509
4510 /* set the row offset */
4511 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4512 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4513 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4514 }
4515
4516 if (ALIGNED(filteredPixels,CLPixelPacket))
4517 {
4518 length = inputImage->columns * inputImage->rows;
4519 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4520 }
4521 else
4522 {
4523 length = inputImage->columns * inputImage->rows;
4524 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4525 }
4526 if (clStatus != CL_SUCCESS)
4527 {
cristya22457d2013-12-07 14:03:06 +00004528 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004529 goto cleanup;
4530 }
4531
cristye85d0f72013-11-27 02:25:43 +00004532 outputReady = MagickTrue;
cristya22457d2013-12-07 14:03:06 +00004533
cristye85d0f72013-11-27 02:25:43 +00004534cleanup:
cristya22457d2013-12-07 14:03:06 +00004535 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4536
cristye85d0f72013-11-27 02:25:43 +00004537 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4538 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4539 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4540 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4541 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4542 if (outputReady == MagickFalse
4543 && filteredImage != NULL)
4544 {
4545 DestroyImage(filteredImage);
4546 filteredImage = NULL;
4547 }
4548 return filteredImage;
4549}
4550
4551
4552static Image* ComputeAddNoiseImageOptRandomNum(const Image* inputImage,
4553 const ChannelType channel, const NoiseType noise_type,
4554 ExceptionInfo *exception)
4555{
4556 MagickBooleanType outputReady = MagickFalse;
4557 MagickCLEnv clEnv = NULL;
4558
4559 cl_int clStatus;
4560 size_t global_work_size[2];
4561 size_t random_work_size;
4562
4563 const void *inputPixels = NULL;
4564 Image* filteredImage = NULL;
4565 void *filteredPixels = NULL;
4566 void *hostPtr;
4567 unsigned int inputColumns, inputRows;
4568 float attenuate;
4569 MagickSizeType length;
4570 unsigned int numRandomNumberPerPixel;
4571 unsigned int numRowsPerKernelLaunch;
4572 unsigned int numRandomNumberPerBuffer;
4573 unsigned int numRandomNumberGenerators;
4574 unsigned int initRandom;
4575 float fNormalize;
4576 unsigned int r;
4577 unsigned int k;
4578 int i;
4579 const char *option;
4580
4581 cl_mem_flags mem_flags;
4582 cl_context context = NULL;
4583 cl_mem inputImageBuffer = NULL;
4584 cl_mem randomNumberBuffer = NULL;
4585 cl_mem filteredImageBuffer = NULL;
4586 cl_mem randomNumberSeedsBuffer = NULL;
4587 cl_command_queue queue = NULL;
4588 cl_kernel addNoiseKernel = NULL;
4589 cl_kernel randomNumberGeneratorKernel = NULL;
4590
4591
4592 clEnv = GetDefaultOpenCLEnv();
4593 context = GetOpenCLContext(clEnv);
4594 queue = AcquireOpenCLCommandQueue(clEnv);
4595
4596 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4597 if (inputPixels == (void *) NULL)
4598 {
cristya22457d2013-12-07 14:03:06 +00004599 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004600 goto cleanup;
4601 }
4602
4603 if (ALIGNED(inputPixels,CLPixelPacket))
4604 {
4605 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4606 }
4607 else
4608 {
4609 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4610 }
4611 /* create a CL buffer from image pixel buffer */
4612 length = inputImage->columns * inputImage->rows;
4613 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4614 if (clStatus != CL_SUCCESS)
4615 {
cristya22457d2013-12-07 14:03:06 +00004616 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004617 goto cleanup;
4618 }
4619
4620
4621 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4622 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004623 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristye85d0f72013-11-27 02:25:43 +00004624 {
cristya22457d2013-12-07 14:03:06 +00004625 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004626 goto cleanup;
4627 }
4628 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4629 if (filteredPixels == (void *) NULL)
4630 {
cristya22457d2013-12-07 14:03:06 +00004631 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004632 goto cleanup;
4633 }
4634
4635 if (ALIGNED(filteredPixels,CLPixelPacket))
4636 {
4637 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4638 hostPtr = filteredPixels;
4639 }
4640 else
4641 {
4642 mem_flags = CL_MEM_WRITE_ONLY;
4643 hostPtr = NULL;
4644 }
4645 /* create a CL buffer from image pixel buffer */
4646 length = inputImage->columns * inputImage->rows;
4647 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4648 if (clStatus != CL_SUCCESS)
4649 {
cristya22457d2013-12-07 14:03:06 +00004650 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004651 goto cleanup;
4652 }
4653
4654 /* find out how many random numbers needed by pixel */
4655 numRandomNumberPerPixel = 0;
4656 {
4657 unsigned int numRandPerChannel = 0;
4658 switch (noise_type)
4659 {
4660 case UniformNoise:
4661 case ImpulseNoise:
4662 case LaplacianNoise:
4663 case RandomNoise:
4664 default:
4665 numRandPerChannel = 1;
4666 break;
4667 case GaussianNoise:
4668 case MultiplicativeGaussianNoise:
4669 case PoissonNoise:
4670 numRandPerChannel = 2;
4671 break;
4672 };
4673
4674 if ((channel & RedChannel) != 0)
4675 numRandomNumberPerPixel+=numRandPerChannel;
4676 if ((channel & GreenChannel) != 0)
4677 numRandomNumberPerPixel+=numRandPerChannel;
4678 if ((channel & BlueChannel) != 0)
4679 numRandomNumberPerPixel+=numRandPerChannel;
4680 if ((channel & OpacityChannel) != 0)
4681 numRandomNumberPerPixel+=numRandPerChannel;
4682 }
4683
4684 numRowsPerKernelLaunch = 512;
4685
4686 /* create a buffer for random numbers */
4687 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4688 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float)
4689 , NULL, &clStatus);
4690
4691 {
4692 /* setup the random number generators */
4693 unsigned long* seeds;
4694 numRandomNumberGenerators = 512;
4695 randomNumberSeedsBuffer = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE
4696 , numRandomNumberGenerators * 4 * sizeof(unsigned long), NULL, &clStatus);
4697 if (clStatus != CL_SUCCESS)
4698 {
cristya22457d2013-12-07 14:03:06 +00004699 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004700 goto cleanup;
4701 }
4702 seeds = (unsigned long*) clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0
4703 , numRandomNumberGenerators*4*sizeof(unsigned long), 0, NULL, NULL, &clStatus);
4704 if (clStatus != CL_SUCCESS)
4705 {
cristya22457d2013-12-07 14:03:06 +00004706 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004707 goto cleanup;
4708 }
4709
4710 for (i = 0; i < numRandomNumberGenerators; i++) {
4711 RandomInfo* randomInfo = AcquireRandomInfo();
4712 const unsigned long* s = GetRandomInfoSeed(randomInfo);
4713
4714 if (i == 0)
4715 fNormalize = GetRandomInfoNormalize(randomInfo);
4716
4717 seeds[i*4] = s[0];
4718 randomInfo = DestroyRandomInfo(randomInfo);
4719 }
4720
4721 clStatus = clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL);
4722 if (clStatus != CL_SUCCESS)
4723 {
cristya22457d2013-12-07 14:03:06 +00004724 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004725 goto cleanup;
4726 }
4727
4728 randomNumberGeneratorKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE
4729 ,"randomNumberGeneratorKernel");
4730
4731 k = 0;
4732 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer);
4733 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize);
4734 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4735 initRandom = 1;
4736 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom);
4737 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer);
4738
4739 random_work_size = numRandomNumberGenerators;
4740 }
4741
4742 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4743 k = 0;
4744 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4745 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4746 inputColumns = inputImage->columns;
4747 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4748 inputRows = inputImage->rows;
4749 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4750 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4751 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4752 attenuate=1.0f;
4753 option=GetImageArtifact(inputImage,"attenuate");
4754 if (option != (char *) NULL)
4755 attenuate=(float)StringToDouble(option,(char **) NULL);
4756 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4757 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4758 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4759
4760 global_work_size[0] = inputColumns;
4761 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4762 {
4763 size_t generator_local_size = 64;
4764 /* Generate random numbers in the buffer */
4765 clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL
4766 ,&random_work_size,&generator_local_size,0,NULL,NULL);
4767 if (initRandom != 0)
4768 {
4769 /* make sure we only do init once */
4770 initRandom = 0;
4771 clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom);
4772 }
4773
4774 /* set the row offset */
4775 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4776 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4777 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4778 }
4779
4780 if (ALIGNED(filteredPixels,CLPixelPacket))
4781 {
4782 length = inputImage->columns * inputImage->rows;
4783 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4784 }
4785 else
4786 {
4787 length = inputImage->columns * inputImage->rows;
4788 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4789 }
4790 if (clStatus != CL_SUCCESS)
4791 {
cristya22457d2013-12-07 14:03:06 +00004792 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004793 goto cleanup;
4794 }
4795
cristye85d0f72013-11-27 02:25:43 +00004796 outputReady = MagickTrue;
cristya22457d2013-12-07 14:03:06 +00004797
cristye85d0f72013-11-27 02:25:43 +00004798cleanup:
cristya22457d2013-12-07 14:03:06 +00004799 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4800
cristye85d0f72013-11-27 02:25:43 +00004801 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4802 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4803 if (randomNumberGeneratorKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomNumberGeneratorKernel);
4804 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4805 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4806 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4807 if (randomNumberSeedsBuffer!=NULL) clReleaseMemObject(randomNumberSeedsBuffer);
4808 if (outputReady == MagickFalse
4809 && filteredImage != NULL)
4810 {
4811 DestroyImage(filteredImage);
4812 filteredImage = NULL;
4813 }
4814 return filteredImage;
4815}
4816
4817
4818
4819MagickExport
4820Image* AccelerateAddNoiseImage(const Image *image, const ChannelType channel,
4821 const NoiseType noise_type,ExceptionInfo *exception)
4822{
4823 MagickBooleanType status;
4824 Image* filteredImage = NULL;
4825
4826 assert(image != NULL);
4827 assert(exception != NULL);
4828
4829 status = checkOpenCLEnvironment(exception);
4830 if (status == MagickFalse)
4831 return NULL;
4832
dirk5dcb7622013-12-01 10:43:43 +00004833 status = checkAccelerateCondition(image, channel);
cristye85d0f72013-11-27 02:25:43 +00004834 if (status == MagickFalse)
4835 return NULL;
4836
4837 if (sizeof(unsigned long) == 4)
4838 filteredImage = ComputeAddNoiseImageOptRandomNum(image,channel,noise_type,exception);
4839 else
4840 filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception);
4841
cristye85d0f72013-11-27 02:25:43 +00004842 return filteredImage;
4843}
4844
4845
cristyf034abb2013-11-24 14:16:14 +00004846#else /* MAGICKCORE_OPENCL_SUPPORT */
4847
4848MagickExport Image *AccelerateConvolveImageChannel(
4849 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4850 const KernelInfo *magick_unused(kernel),
4851 ExceptionInfo *magick_unused(exception))
4852{
4853 magick_unreferenced(image);
4854 magick_unreferenced(channel);
4855 magick_unreferenced(kernel);
4856 magick_unreferenced(exception);
4857
4858 return NULL;
4859}
4860
4861MagickExport MagickBooleanType AccelerateFunctionImage(
4862 Image *magick_unused(image),const ChannelType magick_unused(channel),
4863 const MagickFunction magick_unused(function),
4864 const size_t magick_unused(number_parameters),
4865 const double *magick_unused(parameters),
4866 ExceptionInfo *magick_unused(exception))
4867{
4868 magick_unreferenced(image);
4869 magick_unreferenced(channel);
4870 magick_unreferenced(function);
4871 magick_unreferenced(number_parameters);
4872 magick_unreferenced(parameters);
4873 magick_unreferenced(exception);
4874
4875 return MagickFalse;
4876}
4877
4878MagickExport Image *AccelerateBlurImage(const Image *magick_unused(image),
4879 const ChannelType magick_unused(channel),const double magick_unused(radius),
4880 const double magick_unused(sigma),ExceptionInfo *magick_unused(exception))
4881{
4882 magick_unreferenced(image);
4883 magick_unreferenced(channel);
4884 magick_unreferenced(radius);
4885 magick_unreferenced(sigma);
4886 magick_unreferenced(exception);
4887
4888 return NULL;
4889}
4890
4891MagickExport Image *AccelerateRadialBlurImage(
4892 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4893 const double magick_unused(angle),ExceptionInfo *magick_unused(exception))
4894{
4895 magick_unreferenced(image);
4896 magick_unreferenced(channel);
4897 magick_unreferenced(angle);
4898 magick_unreferenced(exception);
4899
4900 return NULL;
4901}
4902
4903
4904MagickExport Image *AccelerateUnsharpMaskImage(
4905 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4906 const double magick_unused(radius),const double magick_unused(sigma),
4907 const double magick_unused(gain),const double magick_unused(threshold),
4908 ExceptionInfo *magick_unused(exception))
4909{
4910 magick_unreferenced(image);
4911 magick_unreferenced(channel);
4912 magick_unreferenced(radius);
4913 magick_unreferenced(sigma);
4914 magick_unreferenced(gain);
4915 magick_unreferenced(threshold);
4916 magick_unreferenced(exception);
4917
4918 return NULL;
4919}
4920
4921
4922MagickExport MagickBooleanType AccelerateContrastImage(
4923 Image* magick_unused(image),const MagickBooleanType magick_unused(sharpen),
4924 ExceptionInfo* magick_unused(exception))
4925{
4926 magick_unreferenced(image);
4927 magick_unreferenced(sharpen);
4928 magick_unreferenced(exception);
4929
4930 return MagickFalse;
4931}
4932
4933MagickExport MagickBooleanType AccelerateEqualizeImage(
4934 Image* magick_unused(image), const ChannelType magick_unused(channel),
4935 ExceptionInfo* magick_unused(exception))
4936{
4937 magick_unreferenced(image);
4938 magick_unreferenced(channel);
4939 magick_unreferenced(exception);
4940
4941 return MagickFalse;
4942}
4943
4944MagickExport Image *AccelerateDespeckleImage(const Image* magick_unused(image),
4945 ExceptionInfo* magick_unused(exception))
4946{
4947 magick_unreferenced(image);
4948 magick_unreferenced(exception);
4949
4950 return NULL;
4951}
4952
4953MagickExport Image *AccelerateResizeImage(const Image* magick_unused(image),
4954 const size_t magick_unused(resizedColumns),
4955 const size_t magick_unused(resizedRows),
4956 const ResizeFilter* magick_unused(resizeFilter),
4957 ExceptionInfo *magick_unused(exception))
4958{
4959 magick_unreferenced(image);
4960 magick_unreferenced(resizedColumns);
4961 magick_unreferenced(resizedRows);
4962 magick_unreferenced(resizeFilter);
4963 magick_unreferenced(exception);
4964
4965 return NULL;
4966}
4967
4968
4969MagickExport
4970MagickBooleanType AccelerateModulateImage(
4971 Image* image, double percent_brightness, double percent_hue,
4972 double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
4973{
4974 magick_unreferenced(image);
4975 magick_unreferenced(percent_brightness);
4976 magick_unreferenced(percent_hue);
4977 magick_unreferenced(percent_saturation);
4978 magick_unreferenced(colorspace);
4979 magick_unreferenced(exception);
4980 return(MagickFalse);
4981}
4982
cristye85d0f72013-11-27 02:25:43 +00004983MagickExport Image *AccelerateAddNoiseImage(const Image *image,
4984 const ChannelType channel, const NoiseType noise_type,ExceptionInfo *exception)
4985{
4986 magick_unreferenced(image);
4987 magick_unreferenced(channel);
4988 magick_unreferenced(noise_type);
4989 magick_unreferenced(exception);
4990 return NULL;
4991}
cristyf034abb2013-11-24 14:16:14 +00004992
4993#endif /* MAGICKCORE_OPENCL_SUPPORT */
4994
4995MagickExport MagickBooleanType AccelerateConvolveImage(
4996 const Image *magick_unused(image),const KernelInfo *magick_unused(kernel),
4997 Image *magick_unused(convolve_image),ExceptionInfo *magick_unused(exception))
4998{
4999 magick_unreferenced(image);
5000 magick_unreferenced(kernel);
5001 magick_unreferenced(convolve_image);
5002 magick_unreferenced(exception);
5003
5004 /* legacy, do not use */
5005 return(MagickFalse);
5006}
5007