blob: 34806e29ddfdc756fa4b9748c2a36d2a543c41a7 [file] [log] [blame]
cristy3f6d1482010-01-20 21:01:21 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7% A A C C E L E R R A A T E %
8% AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9% A A C C E L E R R A A T E %
10% A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11% %
12% %
13% MagickCore Acceleration Methods %
14% %
15% Software Design %
cristyde984cd2013-12-01 14:49:27 +000016% Cristy %
cristyf034abb2013-11-24 14:16:14 +000017% SiuChi Chan %
18% Guansong Zhang %
cristy3f6d1482010-01-20 21:01:21 +000019% January 2010 %
20% %
21% %
cristyfe676ee2013-11-18 13:03:38 +000022% Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization %
cristy3f6d1482010-01-20 21:01:21 +000023% dedicated to making software imaging solutions freely available. %
24% %
25% You may not use this file except in compliance with the License. You may %
26% obtain a copy of the License at %
27% %
28% http://www.imagemagick.org/script/license.php %
29% %
30% Unless required by applicable law or agreed to in writing, software %
31% distributed under the License is distributed on an "AS IS" BASIS, %
32% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
33% See the License for the specific language governing permissions and %
34% limitations under the License. %
35% %
36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cristy3f6d1482010-01-20 21:01:21 +000037*/
cristyf034abb2013-11-24 14:16:14 +000038
cristy3f6d1482010-01-20 21:01:21 +000039/*
cristyf034abb2013-11-24 14:16:14 +000040Include declarations.
cristy3f6d1482010-01-20 21:01:21 +000041*/
cristy4c08aed2011-07-01 19:47:50 +000042#include "MagickCore/studio.h"
43#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000044#include "MagickCore/accelerate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000045#include "MagickCore/artifact.h"
cristy35f33492011-07-07 16:54:49 +000046#include "MagickCore/cache.h"
cristyd1dd6e42011-09-04 01:46:08 +000047#include "MagickCore/cache-private.h"
cristy4c08aed2011-07-01 19:47:50 +000048#include "MagickCore/cache-view.h"
49#include "MagickCore/color-private.h"
cristy7f866842013-07-11 01:15:58 +000050#include "MagickCore/delegate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000051#include "MagickCore/enhance.h"
52#include "MagickCore/exception.h"
53#include "MagickCore/exception-private.h"
54#include "MagickCore/gem.h"
55#include "MagickCore/hashmap.h"
56#include "MagickCore/image.h"
57#include "MagickCore/image-private.h"
58#include "MagickCore/list.h"
59#include "MagickCore/memory_.h"
60#include "MagickCore/monitor-private.h"
61#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000062#include "MagickCore/opencl.h"
63#include "MagickCore/opencl-private.h"
cristy4c08aed2011-07-01 19:47:50 +000064#include "MagickCore/option.h"
cristyf034abb2013-11-24 14:16:14 +000065#include "MagickCore/pixel-private.h"
cristy4c08aed2011-07-01 19:47:50 +000066#include "MagickCore/prepress.h"
67#include "MagickCore/quantize.h"
cristye85d0f72013-11-27 02:25:43 +000068#include "MagickCore/random_.h"
69#include "MagickCore/random-private.h"
cristy4c08aed2011-07-01 19:47:50 +000070#include "MagickCore/registry.h"
cristyf034abb2013-11-24 14:16:14 +000071#include "MagickCore/resize.h"
72#include "MagickCore/resize-private.h"
cristy4c08aed2011-07-01 19:47:50 +000073#include "MagickCore/semaphore.h"
74#include "MagickCore/splay-tree.h"
75#include "MagickCore/statistic.h"
76#include "MagickCore/string_.h"
77#include "MagickCore/string-private.h"
78#include "MagickCore/token.h"
cristyf034abb2013-11-24 14:16:14 +000079
80#ifdef MAGICKCORE_CLPERFMARKER
81#include "CLPerfMarker.h"
82#endif
83
cristye85d0f72013-11-27 02:25:43 +000084#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
85#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
86
cristyf034abb2013-11-24 14:16:14 +000087#if defined(MAGICKCORE_OPENCL_SUPPORT)
88
89#define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0)
90/*#define ALIGNED(pointer,type) (0) */
91
92static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
93{
94 MagickBooleanType flag;
95
96 MagickCLEnv clEnv;
97 clEnv = GetDefaultOpenCLEnv();
cristya22457d2013-12-07 14:03:06 +000098
cristyf034abb2013-11-24 14:16:14 +000099 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
100 , sizeof(MagickBooleanType), &flag, exception);
cristycd8b3312013-12-22 01:51:11 +0000101 if (flag != MagickFalse)
cristyf034abb2013-11-24 14:16:14 +0000102 return MagickFalse;
103
104 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED
105 , sizeof(MagickBooleanType), &flag, exception);
106 if (flag == MagickFalse)
107 {
108 if(InitOpenCLEnv(clEnv, exception) == MagickFalse)
109 return MagickFalse;
cristya22457d2013-12-07 14:03:06 +0000110
111 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
112 , sizeof(MagickBooleanType), &flag, exception);
cristycd8b3312013-12-22 01:51:11 +0000113 if (flag != MagickFalse)
cristya22457d2013-12-07 14:03:06 +0000114 return MagickFalse;
cristyf034abb2013-11-24 14:16:14 +0000115 }
116
117 return MagickTrue;
118}
119
120
dirk5dcb7622013-12-01 10:43:43 +0000121static MagickBooleanType checkAccelerateCondition(const Image* image, const ChannelType channel)
cristyf034abb2013-11-24 14:16:14 +0000122{
123 /* check if the image's colorspace is supported */
124 if (image->colorspace != RGBColorspace
125 && image->colorspace != sRGBColorspace)
126 return MagickFalse;
127
128 /* check if the channel is supported */
129 if (((channel&RedChannel) == 0)
130 || ((channel&GreenChannel) == 0)
131 || ((channel&BlueChannel) == 0))
132 {
133 return MagickFalse;
134 }
135
136
137 /* check if if the virtual pixel method is compatible with the OpenCL implementation */
138 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod)&&
139 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
140 return MagickFalse;
141
142 return MagickTrue;
143}
144
145
146static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
147{
148 MagickBooleanType outputReady;
149 MagickCLEnv clEnv;
150
151 cl_int clStatus;
152 size_t global_work_size[2];
153 size_t localGroupSize[2];
154 size_t localMemoryRequirement;
155 Image* filteredImage;
156 MagickSizeType length;
157 const void *inputPixels;
158 void *filteredPixels;
159 cl_mem_flags mem_flags;
160 float* kernelBufferPtr;
161 unsigned kernelSize;
162 unsigned int i;
163 void *hostPtr;
164 unsigned int matte, filterWidth, filterHeight, imageWidth, imageHeight;
165
166 cl_context context;
167 cl_kernel clkernel;
168 cl_mem inputImageBuffer, filteredImageBuffer, convolutionKernel;
169 cl_ulong deviceLocalMemorySize;
170 cl_device_id device;
171
172 cl_command_queue queue;
173
174 /* intialize all CL objects to NULL */
175 context = NULL;
176 inputImageBuffer = NULL;
177 filteredImageBuffer = NULL;
178 convolutionKernel = NULL;
179 clkernel = NULL;
180 queue = NULL;
181 device = NULL;
182
183 filteredImage = NULL;
184 outputReady = MagickFalse;
185
186 clEnv = GetDefaultOpenCLEnv();
187 context = GetOpenCLContext(clEnv);
188
189 inputPixels = NULL;
190 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
191 if (inputPixels == (const void *) NULL)
192 {
cristya22457d2013-12-07 14:03:06 +0000193 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000194 goto cleanup;
195 }
196
197 /* Create and initialize OpenCL buffers. */
198
199 /* If the host pointer is aligned to the size of CLPixelPacket,
200 then use the host buffer directly from the GPU; otherwise,
201 create a buffer on the GPU and copy the data over */
202 if (ALIGNED(inputPixels,CLPixelPacket))
203 {
204 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
205 }
206 else
207 {
208 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
209 }
210 /* create a CL buffer from image pixel buffer */
211 length = inputImage->columns * inputImage->rows;
212 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
213 if (clStatus != CL_SUCCESS)
214 {
cristya22457d2013-12-07 14:03:06 +0000215 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000216 goto cleanup;
217 }
218
219 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
220 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +0000221 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +0000222 {
cristya22457d2013-12-07 14:03:06 +0000223 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000224 goto cleanup;
225 }
226 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
227 if (filteredPixels == (void *) NULL)
228 {
cristya22457d2013-12-07 14:03:06 +0000229 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000230 goto cleanup;
231 }
232
233 if (ALIGNED(filteredPixels,CLPixelPacket))
234 {
235 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
236 hostPtr = filteredPixels;
237 }
238 else
239 {
240 mem_flags = CL_MEM_WRITE_ONLY;
241 hostPtr = NULL;
242 }
243 /* create a CL buffer from image pixel buffer */
244 length = inputImage->columns * inputImage->rows;
245 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
246 if (clStatus != CL_SUCCESS)
247 {
cristya22457d2013-12-07 14:03:06 +0000248 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000249 goto cleanup;
250 }
251
252 kernelSize = kernel->width * kernel->height;
253 convolutionKernel = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
254 if (clStatus != CL_SUCCESS)
255 {
cristya22457d2013-12-07 14:03:06 +0000256 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000257 goto cleanup;
258 }
259
260 queue = AcquireOpenCLCommandQueue(clEnv);
261
262 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
263 , 0, NULL, NULL, &clStatus);
264 if (clStatus != CL_SUCCESS)
265 {
cristya22457d2013-12-07 14:03:06 +0000266 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000267 goto cleanup;
268 }
269 for (i = 0; i < kernelSize; i++)
270 {
271 kernelBufferPtr[i] = (float) kernel->values[i];
272 }
273 clStatus = clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
274 if (clStatus != CL_SUCCESS)
275 {
cristya22457d2013-12-07 14:03:06 +0000276 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000277 goto cleanup;
278 }
279 clFlush(queue);
280
281 /* Compute the local memory requirement for a 16x16 workgroup.
282 If it's larger than 16k, reduce the workgroup size to 8x8 */
283 localGroupSize[0] = 16;
284 localGroupSize[1] = 16;
285 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
286 + kernel->width*kernel->height*sizeof(float);
287 if (localMemoryRequirement > 16384)
288 {
289
290
291 localGroupSize[0] = 8;
292 localGroupSize[1] = 8;
293
294 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
295 + kernel->width*kernel->height*sizeof(float);
296 }
297
298 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_DEVICE, sizeof(cl_device_id), &device, exception);
299 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &deviceLocalMemorySize, NULL);
300 if (localMemoryRequirement <= deviceLocalMemorySize)
301 {
302 /* get the OpenCL kernel */
cristya22457d2013-12-07 14:03:06 +0000303 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ConvolveOptimized");
cristyf034abb2013-11-24 14:16:14 +0000304 if (clkernel == NULL)
305 {
cristya22457d2013-12-07 14:03:06 +0000306 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000307 goto cleanup;
308 }
309
310 /* set the kernel arguments */
311 i = 0;
312 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
313 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
314 imageWidth = inputImage->columns;
315 imageHeight = inputImage->rows;
316 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
317 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
318 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
319 filterWidth = kernel->width;
320 filterHeight = kernel->height;
321 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
322 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
dirke19d0cc2013-12-01 10:07:42 +0000323 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +0000324 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
325 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
326 clStatus|=clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
327 clStatus|=clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
328 if (clStatus != CL_SUCCESS)
329 {
cristya22457d2013-12-07 14:03:06 +0000330 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000331 goto cleanup;
332 }
333
334 /* pad the global size to a multiple of the local work size dimension */
335 global_work_size[0] = ((inputImage->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
336 global_work_size[1] = ((inputImage->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
337
338 /* launch the kernel */
339 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
340 if (clStatus != CL_SUCCESS)
341 {
cristya22457d2013-12-07 14:03:06 +0000342 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000343 goto cleanup;
344 }
345 }
346 else
347 {
348 /* get the OpenCL kernel */
349 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
350 if (clkernel == NULL)
351 {
cristya22457d2013-12-07 14:03:06 +0000352 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000353 goto cleanup;
354 }
355
356 /* set the kernel arguments */
357 i = 0;
358 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
359 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
360 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
361 filterWidth = kernel->width;
362 filterHeight = kernel->height;
363 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
364 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
dirke19d0cc2013-12-01 10:07:42 +0000365 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +0000366 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
367 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
368 if (clStatus != CL_SUCCESS)
369 {
cristya22457d2013-12-07 14:03:06 +0000370 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000371 goto cleanup;
372 }
373
374 global_work_size[0] = inputImage->columns;
375 global_work_size[1] = inputImage->rows;
376
377 /* launch the kernel */
378 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
379 if (clStatus != CL_SUCCESS)
380 {
cristya22457d2013-12-07 14:03:06 +0000381 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000382 goto cleanup;
383 }
384 }
385 clFlush(queue);
386
387 if (ALIGNED(filteredPixels,CLPixelPacket))
388 {
389 length = inputImage->columns * inputImage->rows;
390 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
391 }
392 else
393 {
394 length = inputImage->columns * inputImage->rows;
395 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
396 }
397 if (clStatus != CL_SUCCESS)
398 {
cristya22457d2013-12-07 14:03:06 +0000399 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000400 goto cleanup;
401 }
402
403 /* everything is fine! :) */
404 outputReady = MagickTrue;
405
cristyf034abb2013-11-24 14:16:14 +0000406cleanup:
cristya22457d2013-12-07 14:03:06 +0000407 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +0000408
409 if (inputImageBuffer != NULL)
410 clReleaseMemObject(inputImageBuffer);
411
412 if (filteredImageBuffer != NULL)
413 clReleaseMemObject(filteredImageBuffer);
414
415 if (convolutionKernel != NULL)
416 clReleaseMemObject(convolutionKernel);
417
418 if (clkernel != NULL)
419 RelinquishOpenCLKernel(clEnv, clkernel);
420
421 if (queue != NULL)
422 RelinquishOpenCLCommandQueue(clEnv, queue);
423
424 if (outputReady == MagickFalse)
425 {
426 if (filteredImage != NULL)
427 {
428 DestroyImage(filteredImage);
429 filteredImage = NULL;
430 }
431 }
432
433 return filteredImage;
434}
435
cristy3f6d1482010-01-20 21:01:21 +0000436/*
437%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
438% %
439% %
440% %
cristyf034abb2013-11-24 14:16:14 +0000441% C o n v o l v e I m a g e w i t h O p e n C L %
cristy3f6d1482010-01-20 21:01:21 +0000442% %
443% %
444% %
445%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
446%
cristyf034abb2013-11-24 14:16:14 +0000447% ConvolveImage() applies a custom convolution kernel to the image.
cristy3f6d1482010-01-20 21:01:21 +0000448%
cristyf034abb2013-11-24 14:16:14 +0000449% The format of the ConvolveImage method is:
cristy3f6d1482010-01-20 21:01:21 +0000450%
cristyf034abb2013-11-24 14:16:14 +0000451% Image *ConvolveImage(const Image *image,const size_t order,
452% const double *kernel,ExceptionInfo *exception)
453% Image *ConvolveImageChannel(const Image *image,const ChannelType channel,
454% const size_t order,const double *kernel,ExceptionInfo *exception)
455%
456% A description of each parameter follows:
457%
458% o image: the image.
459%
460% o channel: the channel type.
461%
462% o kernel: kernel info.
463%
464% o exception: return any errors or warnings in this structure.
465%
466*/
467
468MagickExport Image* AccelerateConvolveImageChannel(const Image *image, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
469{
470 MagickBooleanType status;
471 Image* filteredImage = NULL;
472
473 assert(image != NULL);
474 assert(kernel != (KernelInfo *) NULL);
475 assert(exception != (ExceptionInfo *) NULL);
476
477 status = checkOpenCLEnvironment(exception);
478 if (status == MagickFalse)
479 return NULL;
480
dirk5dcb7622013-12-01 10:43:43 +0000481 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +0000482 if (status == MagickFalse)
483 return NULL;
484
485 filteredImage = ComputeConvolveImage(image, channel, kernel, exception);
cristyf034abb2013-11-24 14:16:14 +0000486 return filteredImage;
487}
488
489static MagickBooleanType ComputeFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
490 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
491{
492 MagickBooleanType status;
493
494 MagickCLEnv clEnv;
495
496 MagickSizeType length;
497 void* pixels;
498 float* parametersBufferPtr;
499
500 cl_int clStatus;
501 cl_context context;
502 cl_kernel clkernel;
503 cl_command_queue queue;
504 cl_mem_flags mem_flags;
505 cl_mem imageBuffer;
506 cl_mem parametersBuffer;
507 size_t globalWorkSize[2];
508
509 unsigned int i;
510
511 status = MagickFalse;
512
513 context = NULL;
514 clkernel = NULL;
515 queue = NULL;
516 imageBuffer = NULL;
517 parametersBuffer = NULL;
518
519 clEnv = GetDefaultOpenCLEnv();
520 context = GetOpenCLContext(clEnv);
521
522 pixels = GetPixelCachePixels(image, &length, exception);
523 if (pixels == (void *) NULL)
524 {
cristya22457d2013-12-07 14:03:06 +0000525 (void) OpenCLThrowMagickException(exception, GetMagickModule(), CacheWarning,
cristyf034abb2013-11-24 14:16:14 +0000526 "GetPixelCachePixels failed.",
527 "'%s'", image->filename);
528 goto cleanup;
529 }
530
531
532 if (ALIGNED(pixels,CLPixelPacket))
533 {
534 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
535 }
536 else
537 {
538 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
539 }
540 /* create a CL buffer from image pixel buffer */
541 length = image->columns * image->rows;
542 imageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
543 if (clStatus != CL_SUCCESS)
544 {
cristya22457d2013-12-07 14:03:06 +0000545 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000546 goto cleanup;
547 }
548
549 parametersBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
550 if (clStatus != CL_SUCCESS)
551 {
cristya22457d2013-12-07 14:03:06 +0000552 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000553 goto cleanup;
554 }
555
556 queue = AcquireOpenCLCommandQueue(clEnv);
557
558 parametersBufferPtr = (float*)clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
559 , 0, NULL, NULL, &clStatus);
560 if (clStatus != CL_SUCCESS)
561 {
cristya22457d2013-12-07 14:03:06 +0000562 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000563 goto cleanup;
564 }
565 for (i = 0; i < number_parameters; i++)
566 {
567 parametersBufferPtr[i] = (float)parameters[i];
568 }
569 clStatus = clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
570 if (clStatus != CL_SUCCESS)
571 {
cristya22457d2013-12-07 14:03:06 +0000572 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000573 goto cleanup;
574 }
575 clFlush(queue);
576
577 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage");
578 if (clkernel == NULL)
579 {
cristya22457d2013-12-07 14:03:06 +0000580 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000581 goto cleanup;
582 }
583
584 /* set the kernel arguments */
585 i = 0;
586 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
587 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
588 clStatus|=clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
589 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
590 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
591 if (clStatus != CL_SUCCESS)
592 {
cristya22457d2013-12-07 14:03:06 +0000593 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000594 goto cleanup;
595 }
596
597 globalWorkSize[0] = image->columns;
598 globalWorkSize[1] = image->rows;
599 /* launch the kernel */
600 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
601 if (clStatus != CL_SUCCESS)
602 {
cristya22457d2013-12-07 14:03:06 +0000603 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000604 goto cleanup;
605 }
606 clFlush(queue);
607
608
609 if (ALIGNED(pixels,CLPixelPacket))
610 {
611 length = image->columns * image->rows;
612 clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
613 }
614 else
615 {
616 length = image->columns * image->rows;
617 clStatus = clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
618 }
619 if (clStatus != CL_SUCCESS)
620 {
cristya22457d2013-12-07 14:03:06 +0000621 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000622 goto cleanup;
623 }
624 status = MagickTrue;
625
626cleanup:
cristya22457d2013-12-07 14:03:06 +0000627 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +0000628
629 if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
630 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
631 if (imageBuffer != NULL) clReleaseMemObject(imageBuffer);
632 if (parametersBuffer != NULL) clReleaseMemObject(parametersBuffer);
633
634 return status;
635}
636
637
638
639MagickExport MagickBooleanType
640 AccelerateFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
641 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
642{
643 MagickBooleanType status;
644
645 status = MagickFalse;
646
647 assert(image != NULL);
648 assert(exception != (ExceptionInfo *) NULL);
649
650 status = checkOpenCLEnvironment(exception);
cristycd8b3312013-12-22 01:51:11 +0000651 if (status != MagickFalse)
cristyf034abb2013-11-24 14:16:14 +0000652 {
dirk5dcb7622013-12-01 10:43:43 +0000653 status = checkAccelerateCondition(image, channel);
cristycd8b3312013-12-22 01:51:11 +0000654 if (status != MagickFalse)
cristyf034abb2013-11-24 14:16:14 +0000655 {
656 status = ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
cristyf034abb2013-11-24 14:16:14 +0000657 }
658 }
659 return status;
660}
661
662
663static MagickBooleanType splitImage(const Image* inputImage)
664{
665 MagickBooleanType split;
666
667 MagickCLEnv clEnv;
668 unsigned long allocSize;
669 unsigned long tempSize;
670
671 clEnv = GetDefaultOpenCLEnv();
672
673 allocSize = GetOpenCLDeviceMaxMemAllocSize(clEnv);
674 tempSize = inputImage->columns * inputImage->rows * 4 * 4;
675
676 /*
677 printf("alloc size: %lu\n", allocSize);
678 printf("temp size: %lu\n", tempSize);
679 */
680
681 split = ((tempSize > allocSize) ? MagickTrue:MagickFalse);
682
683 return split;
684}
685
686static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
687{
688 MagickBooleanType outputReady;
689 Image* filteredImage;
690 MagickCLEnv clEnv;
691
692 cl_int clStatus;
693
694 const void *inputPixels;
695 void *filteredPixels;
696 cl_mem_flags mem_flags;
697
698 cl_context context;
699 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
700 cl_kernel blurRowKernel, blurColumnKernel;
701 cl_command_queue queue;
702
703 void* hostPtr;
704 float* kernelBufferPtr;
705 MagickSizeType length;
706
707 char geometry[MaxTextExtent];
708 KernelInfo* kernel = NULL;
709 unsigned int kernelWidth;
710 unsigned int imageColumns, imageRows;
711
712 unsigned int i;
713
714 context = NULL;
715 filteredImage = NULL;
716 inputImageBuffer = NULL;
717 tempImageBuffer = NULL;
718 filteredImageBuffer = NULL;
719 imageKernelBuffer = NULL;
720 blurRowKernel = NULL;
721 blurColumnKernel = NULL;
722 queue = NULL;
723
724 outputReady = MagickFalse;
725
726 clEnv = GetDefaultOpenCLEnv();
727 context = GetOpenCLContext(clEnv);
728 queue = AcquireOpenCLCommandQueue(clEnv);
729
730 /* Create and initialize OpenCL buffers. */
731 {
732 inputPixels = NULL;
733 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
734 if (inputPixels == (const void *) NULL)
735 {
cristya22457d2013-12-07 14:03:06 +0000736 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000737 goto cleanup;
738 }
739 /* If the host pointer is aligned to the size of CLPixelPacket,
740 then use the host buffer directly from the GPU; otherwise,
741 create a buffer on the GPU and copy the data over */
742 if (ALIGNED(inputPixels,CLPixelPacket))
743 {
744 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
745 }
746 else
747 {
748 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
749 }
750 /* create a CL buffer from image pixel buffer */
751 length = inputImage->columns * inputImage->rows;
752 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
753 if (clStatus != CL_SUCCESS)
754 {
cristya22457d2013-12-07 14:03:06 +0000755 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000756 goto cleanup;
757 }
758 }
759
760 /* create output */
761 {
762 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
763 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +0000764 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +0000765 {
cristya22457d2013-12-07 14:03:06 +0000766 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000767 goto cleanup;
768 }
769 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
770 if (filteredPixels == (void *) NULL)
771 {
cristya22457d2013-12-07 14:03:06 +0000772 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +0000773 goto cleanup;
774 }
775
776 if (ALIGNED(filteredPixels,CLPixelPacket))
777 {
778 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
779 hostPtr = filteredPixels;
780 }
781 else
782 {
783 mem_flags = CL_MEM_WRITE_ONLY;
784 hostPtr = NULL;
785 }
786 /* create a CL buffer from image pixel buffer */
787 length = inputImage->columns * inputImage->rows;
788 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
789 if (clStatus != CL_SUCCESS)
790 {
cristya22457d2013-12-07 14:03:06 +0000791 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000792 goto cleanup;
793 }
794 }
795
796 /* create processing kernel */
797 {
798 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
799 kernel=AcquireKernelInfo(geometry);
800 if (kernel == (KernelInfo *) NULL)
801 {
cristya22457d2013-12-07 14:03:06 +0000802 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
cristyf034abb2013-11-24 14:16:14 +0000803 goto cleanup;
804 }
805
806 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
807 if (clStatus != CL_SUCCESS)
808 {
cristya22457d2013-12-07 14:03:06 +0000809 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000810 goto cleanup;
811 }
812 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
813 if (clStatus != CL_SUCCESS)
814 {
cristya22457d2013-12-07 14:03:06 +0000815 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000816 goto cleanup;
817 }
818
819 for (i = 0; i < kernel->width; i++)
820 {
821 kernelBufferPtr[i] = (float) kernel->values[i];
822 }
823
824 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
825 if (clStatus != CL_SUCCESS)
826 {
cristya22457d2013-12-07 14:03:06 +0000827 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000828 goto cleanup;
829 }
830 }
831
832 {
833
834 /* create temp buffer */
835 {
836 length = inputImage->columns * inputImage->rows;
837 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
838 if (clStatus != CL_SUCCESS)
839 {
cristya22457d2013-12-07 14:03:06 +0000840 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +0000841 goto cleanup;
842 }
843 }
844
845 /* get the OpenCL kernels */
846 {
847 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
848 if (blurRowKernel == NULL)
849 {
cristya22457d2013-12-07 14:03:06 +0000850 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000851 goto cleanup;
852 };
853
854 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
855 if (blurColumnKernel == NULL)
856 {
cristya22457d2013-12-07 14:03:06 +0000857 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000858 goto cleanup;
859 };
860 }
861
862 {
863 /* need logic to decide this value */
864 int chunkSize = 256;
865
866 {
867 imageColumns = inputImage->columns;
868 imageRows = inputImage->rows;
869
870 /* set the kernel arguments */
871 i = 0;
872 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
873 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
874 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
875 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
876 kernelWidth = kernel->width;
877 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
878 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
879 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
880 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
881 if (clStatus != CL_SUCCESS)
882 {
cristya22457d2013-12-07 14:03:06 +0000883 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000884 goto cleanup;
885 }
886 }
887
888 /* launch the kernel */
889 {
890 size_t gsize[2];
891 size_t wsize[2];
892
893 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
894 gsize[1] = inputImage->rows;
895 wsize[0] = chunkSize;
896 wsize[1] = 1;
897
898 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
899 if (clStatus != CL_SUCCESS)
900 {
cristya22457d2013-12-07 14:03:06 +0000901 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000902 goto cleanup;
903 }
904 clFlush(queue);
905 }
906 }
907
908 {
909 /* need logic to decide this value */
910 int chunkSize = 256;
911
912 {
913 imageColumns = inputImage->columns;
914 imageRows = inputImage->rows;
915
916 /* set the kernel arguments */
917 i = 0;
918 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
919 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
920 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
921 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
922 kernelWidth = kernel->width;
923 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
924 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
925 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
926 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
927 if (clStatus != CL_SUCCESS)
928 {
cristya22457d2013-12-07 14:03:06 +0000929 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000930 goto cleanup;
931 }
932 }
933
934 /* launch the kernel */
935 {
936 size_t gsize[2];
937 size_t wsize[2];
938
939 gsize[0] = inputImage->columns;
940 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
941 wsize[0] = 1;
942 wsize[1] = chunkSize;
943
944 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
945 if (clStatus != CL_SUCCESS)
946 {
cristya22457d2013-12-07 14:03:06 +0000947 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000948 goto cleanup;
949 }
950 clFlush(queue);
951 }
952 }
953
954 }
955
956 /* get result */
957 if (ALIGNED(filteredPixels,CLPixelPacket))
958 {
959 length = inputImage->columns * inputImage->rows;
960 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
961 }
962 else
963 {
964 length = inputImage->columns * inputImage->rows;
965 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
966 }
967 if (clStatus != CL_SUCCESS)
968 {
cristya22457d2013-12-07 14:03:06 +0000969 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +0000970 goto cleanup;
971 }
972
973 outputReady = MagickTrue;
974
975cleanup:
cristya22457d2013-12-07 14:03:06 +0000976 OpenCLLogException(__FUNCTION__,__LINE__,exception);
977
cristyf034abb2013-11-24 14:16:14 +0000978 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
979 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
980 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
981 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
982 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
983 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
984 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
985 if (kernel!=NULL) DestroyKernelInfo(kernel);
986 if (outputReady == MagickFalse)
987 {
988 if (filteredImage != NULL)
989 {
990 DestroyImage(filteredImage);
991 filteredImage = NULL;
992 }
993 }
994 return filteredImage;
995}
996
997static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
998{
999 MagickBooleanType outputReady;
1000 Image* filteredImage;
1001 MagickCLEnv clEnv;
1002
1003 cl_int clStatus;
1004
1005 const void *inputPixels;
1006 void *filteredPixels;
1007 cl_mem_flags mem_flags;
1008
1009 cl_context context;
1010 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
1011 cl_kernel blurRowKernel, blurColumnKernel;
1012 cl_command_queue queue;
1013
1014 void* hostPtr;
1015 float* kernelBufferPtr;
1016 MagickSizeType length;
1017
1018 char geometry[MaxTextExtent];
1019 KernelInfo* kernel = NULL;
1020 unsigned int kernelWidth;
1021 unsigned int imageColumns, imageRows;
1022
1023 unsigned int i;
1024
1025 context = NULL;
1026 filteredImage = NULL;
1027 inputImageBuffer = NULL;
1028 tempImageBuffer = NULL;
1029 filteredImageBuffer = NULL;
1030 imageKernelBuffer = NULL;
1031 blurRowKernel = NULL;
1032 blurColumnKernel = NULL;
1033 queue = NULL;
1034
1035 outputReady = MagickFalse;
1036
1037 clEnv = GetDefaultOpenCLEnv();
1038 context = GetOpenCLContext(clEnv);
1039 queue = AcquireOpenCLCommandQueue(clEnv);
1040
1041 /* Create and initialize OpenCL buffers. */
1042 {
1043 inputPixels = NULL;
1044 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1045 if (inputPixels == (const void *) NULL)
1046 {
cristya22457d2013-12-07 14:03:06 +00001047 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001048 goto cleanup;
1049 }
1050 /* If the host pointer is aligned to the size of CLPixelPacket,
1051 then use the host buffer directly from the GPU; otherwise,
1052 create a buffer on the GPU and copy the data over */
1053 if (ALIGNED(inputPixels,CLPixelPacket))
1054 {
1055 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1056 }
1057 else
1058 {
1059 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1060 }
1061 /* create a CL buffer from image pixel buffer */
1062 length = inputImage->columns * inputImage->rows;
1063 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1064 if (clStatus != CL_SUCCESS)
1065 {
cristya22457d2013-12-07 14:03:06 +00001066 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001067 goto cleanup;
1068 }
1069 }
1070
1071 /* create output */
1072 {
1073 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1074 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001075 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001076 {
cristya22457d2013-12-07 14:03:06 +00001077 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001078 goto cleanup;
1079 }
1080 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1081 if (filteredPixels == (void *) NULL)
1082 {
cristya22457d2013-12-07 14:03:06 +00001083 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001084 goto cleanup;
1085 }
1086
1087 if (ALIGNED(filteredPixels,CLPixelPacket))
1088 {
1089 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1090 hostPtr = filteredPixels;
1091 }
1092 else
1093 {
1094 mem_flags = CL_MEM_WRITE_ONLY;
1095 hostPtr = NULL;
1096 }
1097 /* create a CL buffer from image pixel buffer */
1098 length = inputImage->columns * inputImage->rows;
1099 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1100 if (clStatus != CL_SUCCESS)
1101 {
cristya22457d2013-12-07 14:03:06 +00001102 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001103 goto cleanup;
1104 }
1105 }
1106
1107 /* create processing kernel */
1108 {
1109 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1110 kernel=AcquireKernelInfo(geometry);
1111 if (kernel == (KernelInfo *) NULL)
1112 {
cristya22457d2013-12-07 14:03:06 +00001113 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
cristyf034abb2013-11-24 14:16:14 +00001114 goto cleanup;
1115 }
1116
1117 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
1118 if (clStatus != CL_SUCCESS)
1119 {
cristya22457d2013-12-07 14:03:06 +00001120 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001121 goto cleanup;
1122 }
1123 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1124 if (clStatus != CL_SUCCESS)
1125 {
cristya22457d2013-12-07 14:03:06 +00001126 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001127 goto cleanup;
1128 }
1129
1130 for (i = 0; i < kernel->width; i++)
1131 {
1132 kernelBufferPtr[i] = (float) kernel->values[i];
1133 }
1134
1135 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1136 if (clStatus != CL_SUCCESS)
1137 {
cristya22457d2013-12-07 14:03:06 +00001138 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001139 goto cleanup;
1140 }
1141 }
1142
1143 {
1144 unsigned int offsetRows;
1145 unsigned int sec;
1146
1147 /* create temp buffer */
1148 {
1149 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
1150 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1151 if (clStatus != CL_SUCCESS)
1152 {
cristya22457d2013-12-07 14:03:06 +00001153 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001154 goto cleanup;
1155 }
1156 }
1157
1158 /* get the OpenCL kernels */
1159 {
1160 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
1161 if (blurRowKernel == NULL)
1162 {
cristya22457d2013-12-07 14:03:06 +00001163 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001164 goto cleanup;
1165 };
1166
1167 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumnSection");
1168 if (blurColumnKernel == NULL)
1169 {
cristya22457d2013-12-07 14:03:06 +00001170 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001171 goto cleanup;
1172 };
1173 }
1174
1175 for (sec = 0; sec < 2; sec++)
1176 {
1177 {
1178 /* need logic to decide this value */
1179 int chunkSize = 256;
1180
1181 {
1182 imageColumns = inputImage->columns;
1183 if (sec == 0)
1184 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
1185 else
1186 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
1187
1188 offsetRows = sec * inputImage->rows / 2;
1189
1190 kernelWidth = kernel->width;
1191
1192 /* set the kernel arguments */
1193 i = 0;
1194 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1195 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1196 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1197 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1198 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1199 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1200 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1201 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1202 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1203 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
1204 if (clStatus != CL_SUCCESS)
1205 {
cristya22457d2013-12-07 14:03:06 +00001206 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001207 goto cleanup;
1208 }
1209 }
1210
1211 /* launch the kernel */
1212 {
1213 size_t gsize[2];
1214 size_t wsize[2];
1215
1216 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
1217 gsize[1] = imageRows;
1218 wsize[0] = chunkSize;
1219 wsize[1] = 1;
1220
1221 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1222 if (clStatus != CL_SUCCESS)
1223 {
cristya22457d2013-12-07 14:03:06 +00001224 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001225 goto cleanup;
1226 }
1227 clFlush(queue);
1228 }
1229 }
1230
1231 {
1232 /* need logic to decide this value */
1233 int chunkSize = 256;
1234
1235 {
1236 imageColumns = inputImage->columns;
1237 if (sec == 0)
1238 imageRows = inputImage->rows / 2;
1239 else
1240 imageRows = (inputImage->rows - inputImage->rows / 2);
1241
1242 offsetRows = sec * inputImage->rows / 2;
1243
1244 kernelWidth = kernel->width;
1245
1246 /* set the kernel arguments */
1247 i = 0;
1248 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1249 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1250 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1251 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1252 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1253 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1254 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1255 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
1256 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1257 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
1258 if (clStatus != CL_SUCCESS)
1259 {
cristya22457d2013-12-07 14:03:06 +00001260 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001261 goto cleanup;
1262 }
1263 }
1264
1265 /* launch the kernel */
1266 {
1267 size_t gsize[2];
1268 size_t wsize[2];
1269
1270 gsize[0] = imageColumns;
1271 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
1272 wsize[0] = 1;
1273 wsize[1] = chunkSize;
1274
1275 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1276 if (clStatus != CL_SUCCESS)
1277 {
cristya22457d2013-12-07 14:03:06 +00001278 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001279 goto cleanup;
1280 }
1281 clFlush(queue);
1282 }
1283 }
1284 }
1285
1286 }
1287
1288 /* get result */
1289 if (ALIGNED(filteredPixels,CLPixelPacket))
1290 {
1291 length = inputImage->columns * inputImage->rows;
1292 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1293 }
1294 else
1295 {
1296 length = inputImage->columns * inputImage->rows;
1297 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1298 }
1299 if (clStatus != CL_SUCCESS)
1300 {
cristya22457d2013-12-07 14:03:06 +00001301 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001302 goto cleanup;
1303 }
1304
1305 outputReady = MagickTrue;
1306
1307cleanup:
cristya22457d2013-12-07 14:03:06 +00001308 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1309
cristyf034abb2013-11-24 14:16:14 +00001310 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1311 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1312 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1313 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1314 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1315 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1316 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1317 if (kernel!=NULL) DestroyKernelInfo(kernel);
1318 if (outputReady == MagickFalse)
1319 {
1320 if (filteredImage != NULL)
1321 {
1322 DestroyImage(filteredImage);
1323 filteredImage = NULL;
1324 }
1325 }
1326 return filteredImage;
1327}
1328
1329/*
1330%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1331% %
1332% %
1333% %
1334% B l u r I m a g e w i t h O p e n C L %
1335% %
1336% %
1337% %
1338%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1339%
1340% BlurImage() blurs an image. We convolve the image with a Gaussian operator
1341% of the given radius and standard deviation (sigma). For reasonable results,
1342% the radius should be larger than sigma. Use a radius of 0 and BlurImage()
1343% selects a suitable radius for you.
1344%
1345% The format of the BlurImage method is:
1346%
1347% Image *BlurImage(const Image *image,const double radius,
1348% const double sigma,ExceptionInfo *exception)
1349% Image *BlurImageChannel(const Image *image,const ChannelType channel,
1350% const double radius,const double sigma,ExceptionInfo *exception)
1351%
1352% A description of each parameter follows:
1353%
1354% o image: the image.
1355%
1356% o channel: the channel type.
1357%
1358% o radius: the radius of the Gaussian, in pixels, not counting the center
1359% pixel.
1360%
1361% o sigma: the standard deviation of the Gaussian, in pixels.
1362%
1363% o exception: return any errors or warnings in this structure.
1364%
1365*/
1366
1367MagickExport
1368Image* AccelerateBlurImage(const Image *image, const ChannelType channel, const double radius, const double sigma,ExceptionInfo *exception)
1369{
1370 MagickBooleanType status;
1371 Image* filteredImage = NULL;
1372
1373 assert(image != NULL);
1374 assert(exception != (ExceptionInfo *) NULL);
1375
1376 status = checkOpenCLEnvironment(exception);
1377 if (status == MagickFalse)
1378 return NULL;
1379
dirk5dcb7622013-12-01 10:43:43 +00001380 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00001381 if (status == MagickFalse)
1382 return NULL;
1383
1384 if (splitImage(image) && (image->rows / 2 > radius))
1385 filteredImage = ComputeBlurImageSection(image, channel, radius, sigma, exception);
1386 else
1387 filteredImage = ComputeBlurImage(image, channel, radius, sigma, exception);
1388
cristyf034abb2013-11-24 14:16:14 +00001389 return filteredImage;
1390}
1391
1392
1393static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType channel, const double angle, ExceptionInfo *exception)
1394{
1395
1396 MagickBooleanType outputReady;
1397 Image* filteredImage;
1398 MagickCLEnv clEnv;
1399
1400 cl_int clStatus;
1401 size_t global_work_size[2];
1402
1403 cl_context context;
1404 cl_mem_flags mem_flags;
1405 cl_mem inputImageBuffer, filteredImageBuffer, sinThetaBuffer, cosThetaBuffer;
1406 cl_kernel radialBlurKernel;
1407 cl_command_queue queue;
1408
1409 const void *inputPixels;
1410 void *filteredPixels;
1411 void* hostPtr;
1412 float* sinThetaPtr;
1413 float* cosThetaPtr;
1414 MagickSizeType length;
1415 unsigned int matte;
dirke19d0cc2013-12-01 10:07:42 +00001416 PixelInfo bias;
cristyf034abb2013-11-24 14:16:14 +00001417 cl_float4 biasPixel;
1418 cl_float2 blurCenter;
1419 float blurRadius;
1420 unsigned int cossin_theta_size;
1421 float offset, theta;
1422
1423 unsigned int i;
1424
1425 outputReady = MagickFalse;
1426 context = NULL;
1427 filteredImage = NULL;
1428 inputImageBuffer = NULL;
1429 filteredImageBuffer = NULL;
1430 sinThetaBuffer = NULL;
1431 cosThetaBuffer = NULL;
1432 queue = NULL;
1433 radialBlurKernel = NULL;
1434
1435
1436 clEnv = GetDefaultOpenCLEnv();
1437 context = GetOpenCLContext(clEnv);
1438
1439
1440 /* Create and initialize OpenCL buffers. */
1441
1442 inputPixels = NULL;
1443 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1444 if (inputPixels == (const void *) NULL)
1445 {
cristya22457d2013-12-07 14:03:06 +00001446 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001447 goto cleanup;
1448 }
1449
1450 /* If the host pointer is aligned to the size of CLPixelPacket,
1451 then use the host buffer directly from the GPU; otherwise,
1452 create a buffer on the GPU and copy the data over */
1453 if (ALIGNED(inputPixels,CLPixelPacket))
1454 {
1455 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1456 }
1457 else
1458 {
1459 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1460 }
1461 /* create a CL buffer from image pixel buffer */
1462 length = inputImage->columns * inputImage->rows;
1463 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1464 if (clStatus != CL_SUCCESS)
1465 {
cristya22457d2013-12-07 14:03:06 +00001466 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001467 goto cleanup;
1468 }
1469
1470
1471 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1472 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001473 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001474 {
cristya22457d2013-12-07 14:03:06 +00001475 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001476 goto cleanup;
1477 }
1478 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1479 if (filteredPixels == (void *) NULL)
1480 {
cristya22457d2013-12-07 14:03:06 +00001481 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001482 goto cleanup;
1483 }
1484
1485 if (ALIGNED(filteredPixels,CLPixelPacket))
1486 {
1487 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1488 hostPtr = filteredPixels;
1489 }
1490 else
1491 {
1492 mem_flags = CL_MEM_WRITE_ONLY;
1493 hostPtr = NULL;
1494 }
1495 /* create a CL buffer from image pixel buffer */
1496 length = inputImage->columns * inputImage->rows;
1497 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1498 if (clStatus != CL_SUCCESS)
1499 {
cristya22457d2013-12-07 14:03:06 +00001500 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001501 goto cleanup;
1502 }
1503
1504 blurCenter.s[0] = (float) (inputImage->columns-1)/2.0;
1505 blurCenter.s[1] = (float) (inputImage->rows-1)/2.0;
1506 blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
1507 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
1508
1509 /* create a buffer for sin_theta and cos_theta */
1510 sinThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1511 if (clStatus != CL_SUCCESS)
1512 {
cristya22457d2013-12-07 14:03:06 +00001513 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001514 goto cleanup;
1515 }
1516 cosThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1517 if (clStatus != CL_SUCCESS)
1518 {
cristya22457d2013-12-07 14:03:06 +00001519 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001520 goto cleanup;
1521 }
1522
1523
1524 queue = AcquireOpenCLCommandQueue(clEnv);
1525 sinThetaPtr = (float*) clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1526 if (clStatus != CL_SUCCESS)
1527 {
cristya22457d2013-12-07 14:03:06 +00001528 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001529 goto cleanup;
1530 }
1531
1532 cosThetaPtr = (float*) clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1533 if (clStatus != CL_SUCCESS)
1534 {
cristya22457d2013-12-07 14:03:06 +00001535 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001536 goto cleanup;
1537 }
1538
1539 theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
1540 offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
1541 for (i=0; i < (ssize_t) cossin_theta_size; i++)
1542 {
1543 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
1544 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
1545 }
1546
1547 clStatus = clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
1548 clStatus |= clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
1549 if (clStatus != CL_SUCCESS)
1550 {
cristya22457d2013-12-07 14:03:06 +00001551 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001552 goto cleanup;
1553 }
1554
1555 /* get the OpenCL kernel */
1556 radialBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RadialBlur");
1557 if (radialBlurKernel == NULL)
1558 {
cristya22457d2013-12-07 14:03:06 +00001559 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001560 goto cleanup;
1561 }
1562
1563
1564 /* set the kernel arguments */
1565 i = 0;
1566 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1567 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1568
dirke19d0cc2013-12-01 10:07:42 +00001569 GetPixelInfo(inputImage,&bias);
cristyf034abb2013-11-24 14:16:14 +00001570 biasPixel.s[0] = bias.red;
1571 biasPixel.s[1] = bias.green;
1572 biasPixel.s[2] = bias.blue;
dirke19d0cc2013-12-01 10:07:42 +00001573 biasPixel.s[3] = bias.alpha;
cristyf034abb2013-11-24 14:16:14 +00001574 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
1575 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
1576
dirke19d0cc2013-12-01 10:07:42 +00001577 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +00001578 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
1579
1580 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
1581
1582 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
1583 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
1584 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
1585 if (clStatus != CL_SUCCESS)
1586 {
cristya22457d2013-12-07 14:03:06 +00001587 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001588 goto cleanup;
1589 }
1590
1591
1592 global_work_size[0] = inputImage->columns;
1593 global_work_size[1] = inputImage->rows;
1594 /* launch the kernel */
1595 clStatus = clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
1596 if (clStatus != CL_SUCCESS)
1597 {
cristya22457d2013-12-07 14:03:06 +00001598 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001599 goto cleanup;
1600 }
1601 clFlush(queue);
1602
1603 if (ALIGNED(filteredPixels,CLPixelPacket))
1604 {
1605 length = inputImage->columns * inputImage->rows;
1606 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1607 }
1608 else
1609 {
1610 length = inputImage->columns * inputImage->rows;
1611 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1612 }
1613 if (clStatus != CL_SUCCESS)
1614 {
cristya22457d2013-12-07 14:03:06 +00001615 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001616 goto cleanup;
1617 }
1618 outputReady = MagickTrue;
1619
1620cleanup:
cristya22457d2013-12-07 14:03:06 +00001621 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1622
cristyf034abb2013-11-24 14:16:14 +00001623 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1624 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1625 if (sinThetaBuffer!=NULL) clReleaseMemObject(sinThetaBuffer);
1626 if (cosThetaBuffer!=NULL) clReleaseMemObject(cosThetaBuffer);
1627 if (radialBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, radialBlurKernel);
1628 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1629 if (outputReady == MagickFalse)
1630 {
1631 if (filteredImage != NULL)
1632 {
1633 DestroyImage(filteredImage);
1634 filteredImage = NULL;
1635 }
1636 }
1637 return filteredImage;
1638}
1639
1640/*
1641%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1642% %
1643% %
1644% %
1645% R a d i a l B l u r I m a g e w i t h O p e n C L %
1646% %
1647% %
1648% %
1649%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1650%
1651% RadialBlurImage() applies a radial blur to the image.
1652%
1653% Andrew Protano contributed this effect.
1654%
1655% The format of the RadialBlurImage method is:
1656%
1657% Image *RadialBlurImage(const Image *image,const double angle,
1658% ExceptionInfo *exception)
1659% Image *RadialBlurImageChannel(const Image *image,const ChannelType channel,
1660% const double angle,ExceptionInfo *exception)
1661%
1662% A description of each parameter follows:
1663%
1664% o image: the image.
1665%
1666% o channel: the channel type.
1667%
1668% o angle: the angle of the radial blur.
1669%
1670% o exception: return any errors or warnings in this structure.
1671%
1672*/
1673
1674MagickExport
1675Image* AccelerateRadialBlurImage(const Image *image, const ChannelType channel, const double angle, ExceptionInfo *exception)
1676{
1677 MagickBooleanType status;
1678 Image* filteredImage;
1679
1680
1681 assert(image != NULL);
1682 assert(exception != NULL);
1683
1684 status = checkOpenCLEnvironment(exception);
1685 if (status == MagickFalse)
1686 return NULL;
1687
dirk5dcb7622013-12-01 10:43:43 +00001688 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00001689 if (status == MagickFalse)
1690 return NULL;
1691
1692 filteredImage = ComputeRadialBlurImage(image, channel, angle, exception);
cristyf034abb2013-11-24 14:16:14 +00001693 return filteredImage;
1694}
1695
1696
1697
1698static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1699 const double gain,const double threshold,ExceptionInfo *exception)
1700{
1701 MagickBooleanType outputReady = MagickFalse;
1702 Image* filteredImage = NULL;
1703 MagickCLEnv clEnv = NULL;
1704
1705 cl_int clStatus;
1706
1707 const void *inputPixels;
1708 void *filteredPixels;
1709 cl_mem_flags mem_flags;
1710
1711 KernelInfo *kernel = NULL;
1712 char geometry[MaxTextExtent];
1713
1714 cl_context context = NULL;
1715 cl_mem inputImageBuffer = NULL;
1716 cl_mem filteredImageBuffer = NULL;
1717 cl_mem tempImageBuffer = NULL;
1718 cl_mem imageKernelBuffer = NULL;
1719 cl_kernel blurRowKernel = NULL;
1720 cl_kernel unsharpMaskBlurColumnKernel = NULL;
1721 cl_command_queue queue = NULL;
1722
1723 void* hostPtr;
1724 float* kernelBufferPtr;
1725 MagickSizeType length;
1726 unsigned int kernelWidth;
1727 float fGain;
1728 float fThreshold;
1729 unsigned int imageColumns, imageRows;
1730 int chunkSize;
1731 unsigned int i;
1732
1733 clEnv = GetDefaultOpenCLEnv();
1734 context = GetOpenCLContext(clEnv);
1735 queue = AcquireOpenCLCommandQueue(clEnv);
1736
1737 /* Create and initialize OpenCL buffers. */
1738 {
1739 inputPixels = NULL;
1740 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1741 if (inputPixels == (const void *) NULL)
1742 {
cristya22457d2013-12-07 14:03:06 +00001743 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001744 goto cleanup;
1745 }
1746
1747 /* If the host pointer is aligned to the size of CLPixelPacket,
1748 then use the host buffer directly from the GPU; otherwise,
1749 create a buffer on the GPU and copy the data over */
1750 if (ALIGNED(inputPixels,CLPixelPacket))
1751 {
1752 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1753 }
1754 else
1755 {
1756 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1757 }
1758 /* create a CL buffer from image pixel buffer */
1759 length = inputImage->columns * inputImage->rows;
1760 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1761 if (clStatus != CL_SUCCESS)
1762 {
cristya22457d2013-12-07 14:03:06 +00001763 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001764 goto cleanup;
1765 }
1766 }
1767
1768 /* create output */
1769 {
1770 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1771 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001772 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001773 {
cristya22457d2013-12-07 14:03:06 +00001774 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001775 goto cleanup;
1776 }
1777 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1778 if (filteredPixels == (void *) NULL)
1779 {
cristya22457d2013-12-07 14:03:06 +00001780 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00001781 goto cleanup;
1782 }
1783
1784 if (ALIGNED(filteredPixels,CLPixelPacket))
1785 {
1786 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1787 hostPtr = filteredPixels;
1788 }
1789 else
1790 {
1791 mem_flags = CL_MEM_WRITE_ONLY;
1792 hostPtr = NULL;
1793 }
1794
1795 /* create a CL buffer from image pixel buffer */
1796 length = inputImage->columns * inputImage->rows;
1797 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1798 if (clStatus != CL_SUCCESS)
1799 {
cristya22457d2013-12-07 14:03:06 +00001800 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001801 goto cleanup;
1802 }
1803 }
1804
1805 /* create the blur kernel */
1806 {
1807 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1808 kernel=AcquireKernelInfo(geometry);
1809 if (kernel == (KernelInfo *) NULL)
1810 {
cristya22457d2013-12-07 14:03:06 +00001811 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001812 goto cleanup;
1813 }
1814
1815 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
1816 if (clStatus != CL_SUCCESS)
1817 {
cristya22457d2013-12-07 14:03:06 +00001818 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001819 goto cleanup;
1820 }
1821
1822
1823 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1824 if (clStatus != CL_SUCCESS)
1825 {
cristya22457d2013-12-07 14:03:06 +00001826 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001827 goto cleanup;
1828 }
1829 for (i = 0; i < kernel->width; i++)
1830 {
1831 kernelBufferPtr[i] = (float) kernel->values[i];
1832 }
1833 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1834 if (clStatus != CL_SUCCESS)
1835 {
cristya22457d2013-12-07 14:03:06 +00001836 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001837 goto cleanup;
1838 }
1839 }
1840
1841 {
1842 /* create temp buffer */
1843 {
1844 length = inputImage->columns * inputImage->rows;
1845 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1846 if (clStatus != CL_SUCCESS)
1847 {
cristya22457d2013-12-07 14:03:06 +00001848 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00001849 goto cleanup;
1850 }
1851 }
1852
1853 /* get the opencl kernel */
1854 {
1855 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
1856 if (blurRowKernel == NULL)
1857 {
cristya22457d2013-12-07 14:03:06 +00001858 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001859 goto cleanup;
1860 };
1861
1862 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
1863 if (unsharpMaskBlurColumnKernel == NULL)
1864 {
cristya22457d2013-12-07 14:03:06 +00001865 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001866 goto cleanup;
1867 };
1868 }
1869
1870 {
1871 chunkSize = 256;
1872
1873 imageColumns = inputImage->columns;
1874 imageRows = inputImage->rows;
1875
1876 kernelWidth = kernel->width;
1877
1878 /* set the kernel arguments */
1879 i = 0;
1880 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1881 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1882 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1883 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1884 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1885 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1886 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1887 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1888 if (clStatus != CL_SUCCESS)
1889 {
cristya22457d2013-12-07 14:03:06 +00001890 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001891 goto cleanup;
1892 }
1893 }
1894
1895 /* launch the kernel */
1896 {
1897 size_t gsize[2];
1898 size_t wsize[2];
1899
1900 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
1901 gsize[1] = inputImage->rows;
1902 wsize[0] = chunkSize;
1903 wsize[1] = 1;
1904
1905 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1906 if (clStatus != CL_SUCCESS)
1907 {
cristya22457d2013-12-07 14:03:06 +00001908 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001909 goto cleanup;
1910 }
1911 clFlush(queue);
1912 }
1913
1914
1915 {
1916 chunkSize = 256;
1917 imageColumns = inputImage->columns;
1918 imageRows = inputImage->rows;
1919 kernelWidth = kernel->width;
1920 fGain = (float)gain;
1921 fThreshold = (float)threshold;
1922
1923 i = 0;
1924 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1925 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1926 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1927 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1928 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1929 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
1930 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
1931 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
1932 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1933 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1934 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
1935 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
1936
1937 if (clStatus != CL_SUCCESS)
1938 {
cristya22457d2013-12-07 14:03:06 +00001939 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001940 goto cleanup;
1941 }
1942 }
1943
1944 /* launch the kernel */
1945 {
1946 size_t gsize[2];
1947 size_t wsize[2];
1948
1949 gsize[0] = inputImage->columns;
1950 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
1951 wsize[0] = 1;
1952 wsize[1] = chunkSize;
1953
1954 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1955 if (clStatus != CL_SUCCESS)
1956 {
cristya22457d2013-12-07 14:03:06 +00001957 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001958 goto cleanup;
1959 }
1960 clFlush(queue);
1961 }
1962
1963 }
1964
1965 /* get result */
1966 if (ALIGNED(filteredPixels,CLPixelPacket))
1967 {
1968 length = inputImage->columns * inputImage->rows;
1969 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1970 }
1971 else
1972 {
1973 length = inputImage->columns * inputImage->rows;
1974 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1975 }
1976 if (clStatus != CL_SUCCESS)
1977 {
cristya22457d2013-12-07 14:03:06 +00001978 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00001979 goto cleanup;
1980 }
1981
1982 outputReady = MagickTrue;
1983
1984cleanup:
cristya22457d2013-12-07 14:03:06 +00001985 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1986
cristyf034abb2013-11-24 14:16:14 +00001987 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
1988 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1989 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1990 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1991 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1992 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1993 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
1994 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1995 if (outputReady == MagickFalse)
1996 {
1997 if (filteredImage != NULL)
1998 {
1999 DestroyImage(filteredImage);
2000 filteredImage = NULL;
2001 }
2002 }
2003 return filteredImage;
2004}
2005
2006
2007static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
2008 const double gain,const double threshold,ExceptionInfo *exception)
2009{
2010 MagickBooleanType outputReady = MagickFalse;
2011 Image* filteredImage = NULL;
2012 MagickCLEnv clEnv = NULL;
2013
2014 cl_int clStatus;
2015
2016 const void *inputPixels;
2017 void *filteredPixels;
2018 cl_mem_flags mem_flags;
2019
2020 KernelInfo *kernel = NULL;
2021 char geometry[MaxTextExtent];
2022
2023 cl_context context = NULL;
2024 cl_mem inputImageBuffer = NULL;
2025 cl_mem filteredImageBuffer = NULL;
2026 cl_mem tempImageBuffer = NULL;
2027 cl_mem imageKernelBuffer = NULL;
2028 cl_kernel blurRowKernel = NULL;
2029 cl_kernel unsharpMaskBlurColumnKernel = NULL;
2030 cl_command_queue queue = NULL;
2031
2032 void* hostPtr;
2033 float* kernelBufferPtr;
2034 MagickSizeType length;
2035 unsigned int kernelWidth;
2036 float fGain;
2037 float fThreshold;
2038 unsigned int imageColumns, imageRows;
2039 int chunkSize;
2040 unsigned int i;
2041
2042 clEnv = GetDefaultOpenCLEnv();
2043 context = GetOpenCLContext(clEnv);
2044 queue = AcquireOpenCLCommandQueue(clEnv);
2045
2046 /* Create and initialize OpenCL buffers. */
2047 {
2048 inputPixels = NULL;
2049 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2050 if (inputPixels == (const void *) NULL)
2051 {
cristya22457d2013-12-07 14:03:06 +00002052 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002053 goto cleanup;
2054 }
2055
2056 /* If the host pointer is aligned to the size of CLPixelPacket,
2057 then use the host buffer directly from the GPU; otherwise,
2058 create a buffer on the GPU and copy the data over */
2059 if (ALIGNED(inputPixels,CLPixelPacket))
2060 {
2061 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2062 }
2063 else
2064 {
2065 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2066 }
2067 /* create a CL buffer from image pixel buffer */
2068 length = inputImage->columns * inputImage->rows;
2069 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2070 if (clStatus != CL_SUCCESS)
2071 {
cristya22457d2013-12-07 14:03:06 +00002072 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002073 goto cleanup;
2074 }
2075 }
2076
2077 /* create output */
2078 {
2079 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
2080 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00002081 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00002082 {
cristya22457d2013-12-07 14:03:06 +00002083 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002084 goto cleanup;
2085 }
2086 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2087 if (filteredPixels == (void *) NULL)
2088 {
cristya22457d2013-12-07 14:03:06 +00002089 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002090 goto cleanup;
2091 }
2092
2093 if (ALIGNED(filteredPixels,CLPixelPacket))
2094 {
2095 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2096 hostPtr = filteredPixels;
2097 }
2098 else
2099 {
2100 mem_flags = CL_MEM_WRITE_ONLY;
2101 hostPtr = NULL;
2102 }
2103
2104 /* create a CL buffer from image pixel buffer */
2105 length = inputImage->columns * inputImage->rows;
2106 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2107 if (clStatus != CL_SUCCESS)
2108 {
cristya22457d2013-12-07 14:03:06 +00002109 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002110 goto cleanup;
2111 }
2112 }
2113
2114 /* create the blur kernel */
2115 {
2116 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2117 kernel=AcquireKernelInfo(geometry);
2118 if (kernel == (KernelInfo *) NULL)
2119 {
cristya22457d2013-12-07 14:03:06 +00002120 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002121 goto cleanup;
2122 }
2123
2124 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2125 if (clStatus != CL_SUCCESS)
2126 {
cristya22457d2013-12-07 14:03:06 +00002127 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002128 goto cleanup;
2129 }
2130
2131
2132 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2133 if (clStatus != CL_SUCCESS)
2134 {
cristya22457d2013-12-07 14:03:06 +00002135 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002136 goto cleanup;
2137 }
2138 for (i = 0; i < kernel->width; i++)
2139 {
2140 kernelBufferPtr[i] = (float) kernel->values[i];
2141 }
2142 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2143 if (clStatus != CL_SUCCESS)
2144 {
cristya22457d2013-12-07 14:03:06 +00002145 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002146 goto cleanup;
2147 }
2148 }
2149
2150 {
2151 unsigned int offsetRows;
2152 unsigned int sec;
2153
2154 /* create temp buffer */
2155 {
2156 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
2157 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
2158 if (clStatus != CL_SUCCESS)
2159 {
cristya22457d2013-12-07 14:03:06 +00002160 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002161 goto cleanup;
2162 }
2163 }
2164
2165 /* get the opencl kernel */
2166 {
2167 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
2168 if (blurRowKernel == NULL)
2169 {
cristya22457d2013-12-07 14:03:06 +00002170 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002171 goto cleanup;
2172 };
2173
2174 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
2175 if (unsharpMaskBlurColumnKernel == NULL)
2176 {
cristya22457d2013-12-07 14:03:06 +00002177 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002178 goto cleanup;
2179 };
2180 }
2181
2182 for (sec = 0; sec < 2; sec++)
2183 {
2184 {
2185 chunkSize = 256;
2186
2187 imageColumns = inputImage->columns;
2188 if (sec == 0)
2189 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2190 else
2191 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
2192
2193 offsetRows = sec * inputImage->rows / 2;
2194
2195 kernelWidth = kernel->width;
2196
2197 /* set the kernel arguments */
2198 i = 0;
2199 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2200 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2201 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
2202 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2203 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2204 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2205 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2206 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
2207 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2208 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
2209 if (clStatus != CL_SUCCESS)
2210 {
cristya22457d2013-12-07 14:03:06 +00002211 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002212 goto cleanup;
2213 }
2214 }
2215 /* launch the kernel */
2216 {
2217 size_t gsize[2];
2218 size_t wsize[2];
2219
2220 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
2221 gsize[1] = imageRows;
2222 wsize[0] = chunkSize;
2223 wsize[1] = 1;
2224
2225 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2226 if (clStatus != CL_SUCCESS)
2227 {
cristya22457d2013-12-07 14:03:06 +00002228 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002229 goto cleanup;
2230 }
2231 clFlush(queue);
2232 }
2233
2234
2235 {
2236 chunkSize = 256;
2237
2238 imageColumns = inputImage->columns;
2239 if (sec == 0)
cristya22457d2013-12-07 14:03:06 +00002240 imageRows = inputImage->rows / 2;
cristyf034abb2013-11-24 14:16:14 +00002241 else
2242 imageRows = (inputImage->rows - inputImage->rows / 2);
2243
2244 offsetRows = sec * inputImage->rows / 2;
2245
2246 kernelWidth = kernel->width;
2247
2248 fGain = (float)gain;
2249 fThreshold = (float)threshold;
2250
2251 i = 0;
2252 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2253 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2254 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2255 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2256 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2257 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
2258 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
2259 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
2260 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2261 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2262 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
2263 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
2264 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2265 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
2266
2267 if (clStatus != CL_SUCCESS)
2268 {
cristya22457d2013-12-07 14:03:06 +00002269 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002270 goto cleanup;
2271 }
2272 }
2273
2274 /* launch the kernel */
2275 {
2276 size_t gsize[2];
2277 size_t wsize[2];
2278
2279 gsize[0] = imageColumns;
2280 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
2281 wsize[0] = 1;
2282 wsize[1] = chunkSize;
2283
2284 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2285 if (clStatus != CL_SUCCESS)
2286 {
cristya22457d2013-12-07 14:03:06 +00002287 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002288 goto cleanup;
2289 }
2290 clFlush(queue);
2291 }
2292 }
2293 }
2294
2295 /* get result */
2296 if (ALIGNED(filteredPixels,CLPixelPacket))
2297 {
2298 length = inputImage->columns * inputImage->rows;
2299 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2300 }
2301 else
2302 {
2303 length = inputImage->columns * inputImage->rows;
2304 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2305 }
2306 if (clStatus != CL_SUCCESS)
2307 {
cristya22457d2013-12-07 14:03:06 +00002308 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002309 goto cleanup;
2310 }
2311
2312 outputReady = MagickTrue;
2313
2314cleanup:
cristya22457d2013-12-07 14:03:06 +00002315 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2316
cristyf034abb2013-11-24 14:16:14 +00002317 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
2318 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2319 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2320 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2321 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
2322 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
2323 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
2324 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2325 if (outputReady == MagickFalse)
2326 {
2327 if (filteredImage != NULL)
2328 {
2329 DestroyImage(filteredImage);
2330 filteredImage = NULL;
2331 }
2332 }
2333 return filteredImage;
2334}
2335
2336
2337/*
2338%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2339% %
2340% %
2341% %
2342% U n s h a r p M a s k I m a g e w i t h O p e n C L %
2343% %
2344% %
2345% %
2346%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2347%
2348% UnsharpMaskImage() sharpens one or more image channels. We convolve the
2349% image with a Gaussian operator of the given radius and standard deviation
2350% (sigma). For reasonable results, radius should be larger than sigma. Use a
2351% radius of 0 and UnsharpMaskImage() selects a suitable radius for you.
2352%
2353% The format of the UnsharpMaskImage method is:
2354%
2355% Image *UnsharpMaskImage(const Image *image,const double radius,
2356% const double sigma,const double amount,const double threshold,
2357% ExceptionInfo *exception)
2358% Image *UnsharpMaskImageChannel(const Image *image,
2359% const ChannelType channel,const double radius,const double sigma,
2360% const double gain,const double threshold,ExceptionInfo *exception)
2361%
2362% A description of each parameter follows:
2363%
2364% o image: the image.
2365%
2366% o channel: the channel type.
2367%
2368% o radius: the radius of the Gaussian, in pixels, not counting the center
2369% pixel.
2370%
2371% o sigma: the standard deviation of the Gaussian, in pixels.
2372%
2373% o gain: the percentage of the difference between the original and the
2374% blur image that is added back into the original.
2375%
2376% o threshold: the threshold in pixels needed to apply the diffence gain.
2377%
2378% o exception: return any errors or warnings in this structure.
2379%
2380*/
2381
2382
2383MagickExport
2384Image* AccelerateUnsharpMaskImage(const Image *image, const ChannelType channel,const double radius,const double sigma,
2385 const double gain,const double threshold,ExceptionInfo *exception)
2386{
2387 MagickBooleanType status;
2388 Image* filteredImage;
2389
2390
2391 assert(image != NULL);
2392 assert(exception != NULL);
2393
2394 status = checkOpenCLEnvironment(exception);
2395 if (status == MagickFalse)
2396 return NULL;
2397
dirk5dcb7622013-12-01 10:43:43 +00002398 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00002399 if (status == MagickFalse)
2400 return NULL;
2401
2402 if (splitImage(image) && (image->rows / 2 > radius))
2403 filteredImage = ComputeUnsharpMaskImageSection(image,channel,radius,sigma,gain,threshold,exception);
2404 else
2405 filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
cristyf034abb2013-11-24 14:16:14 +00002406 return filteredImage;
2407
2408}
2409
cristyf034abb2013-11-24 14:16:14 +00002410static MagickBooleanType resizeHorizontalFilter(cl_mem inputImage
2411 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2412 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2413 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float xFactor
2414 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2415{
2416 MagickBooleanType status = MagickFalse;
2417
2418 float scale, support;
2419 unsigned int i;
2420 cl_kernel horizontalKernel = NULL;
2421 cl_int clStatus;
2422 size_t global_work_size[2];
2423 size_t local_work_size[2];
2424 int resizeFilterType, resizeWindowType;
2425 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2426 size_t totalLocalMemorySize;
2427 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2428 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2429 size_t deviceLocalMemorySize;
2430 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2431
2432 const unsigned int workgroupSize = 256;
2433 unsigned int pixelPerWorkgroup;
2434 unsigned int chunkSize;
2435
2436 /*
2437 Apply filter to resize vertically from image to resize image.
2438 */
cristye85d0f72013-11-27 02:25:43 +00002439 scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002440 support=scale*GetResizeFilterSupport(resizeFilter);
2441 if (support < 0.5)
2442 {
2443 /*
2444 Support too small even for nearest neighbour: Reduce to point
2445 sampling.
2446 */
2447 support=(MagickRealType) 0.5;
2448 scale=1.0;
2449 }
2450 scale=PerceptibleReciprocal(scale);
2451
2452 if (resizedColumns < workgroupSize)
2453 {
2454 chunkSize = 32;
2455 pixelPerWorkgroup = 32;
2456 }
2457 else
2458 {
2459 chunkSize = workgroupSize;
2460 pixelPerWorkgroup = workgroupSize;
2461 }
2462
2463 /* get the local memory size supported by the device */
2464 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2465
dirke3c5f892013-12-10 06:04:40 +00002466DisableMSCWarning(4127)
cristyf034abb2013-11-24 14:16:14 +00002467 while(1)
dirke3c5f892013-12-10 06:04:40 +00002468RestoreMSCWarning
cristyf034abb2013-11-24 14:16:14 +00002469 {
2470 /* calculate the local memory size needed per workgroup */
2471 cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
2472 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
2473 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2474 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2475 totalLocalMemorySize = imageCacheLocalMemorySize;
2476
2477 /* local size for the pixel accumulator */
2478 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2479 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2480
2481 /* local memory size for the weight accumulator */
2482 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2483 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2484
2485 /* local memory size for the gamma accumulator */
2486 if (matte == 0)
2487 gammaAccumulatorLocalMemorySize = sizeof(float);
2488 else
2489 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2490 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2491
2492 if (totalLocalMemorySize <= deviceLocalMemorySize)
2493 break;
2494 else
2495 {
2496 pixelPerWorkgroup = pixelPerWorkgroup/2;
2497 chunkSize = chunkSize/2;
2498 if (pixelPerWorkgroup == 0
2499 || chunkSize == 0)
2500 {
2501 /* quit, fallback to CPU */
2502 goto cleanup;
2503 }
2504 }
2505 }
2506
2507 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2508 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2509
2510
2511 if (resizeFilterType == SincFastWeightingFunction
2512 && resizeWindowType == SincFastWeightingFunction)
2513 {
2514 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilterSinc");
2515 }
2516 else
2517 {
2518 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
2519 }
2520 if (horizontalKernel == NULL)
2521 {
cristya22457d2013-12-07 14:03:06 +00002522 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002523 goto cleanup;
2524 }
2525
2526 i = 0;
2527 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2528 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2529 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2530 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2531 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
2532 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2533
2534 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2535 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2536
2537 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2538 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2539 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2540
2541 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2542 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2543
2544 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2545 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2546
2547 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2548 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2549
2550 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2551 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2552
2553
2554 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2555 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2556 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2557 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2558
2559
2560 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2561 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2562 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2563
2564 if (clStatus != CL_SUCCESS)
2565 {
cristya22457d2013-12-07 14:03:06 +00002566 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002567 goto cleanup;
2568 }
2569
2570 global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2571 global_work_size[1] = resizedRows;
2572
2573 local_work_size[0] = workgroupSize;
2574 local_work_size[1] = 1;
2575 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2576 if (clStatus != CL_SUCCESS)
2577 {
cristya22457d2013-12-07 14:03:06 +00002578 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002579 goto cleanup;
2580 }
2581 clFlush(queue);
2582 status = MagickTrue;
2583
2584
2585cleanup:
cristya22457d2013-12-07 14:03:06 +00002586 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2587
cristyf034abb2013-11-24 14:16:14 +00002588 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2589
2590 return status;
2591}
2592
2593
2594static MagickBooleanType resizeVerticalFilter(cl_mem inputImage
2595 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2596 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2597 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float yFactor
2598 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2599{
2600 MagickBooleanType status = MagickFalse;
2601
2602 float scale, support;
2603 unsigned int i;
2604 cl_kernel horizontalKernel = NULL;
2605 cl_int clStatus;
2606 size_t global_work_size[2];
2607 size_t local_work_size[2];
2608 int resizeFilterType, resizeWindowType;
2609 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2610 size_t totalLocalMemorySize;
2611 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2612 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2613 size_t deviceLocalMemorySize;
2614 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2615
2616 const unsigned int workgroupSize = 256;
2617 unsigned int pixelPerWorkgroup;
2618 unsigned int chunkSize;
2619
2620 /*
2621 Apply filter to resize vertically from image to resize image.
2622 */
cristye85d0f72013-11-27 02:25:43 +00002623 scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002624 support=scale*GetResizeFilterSupport(resizeFilter);
2625 if (support < 0.5)
2626 {
2627 /*
2628 Support too small even for nearest neighbour: Reduce to point
2629 sampling.
2630 */
2631 support=(MagickRealType) 0.5;
2632 scale=1.0;
2633 }
2634 scale=PerceptibleReciprocal(scale);
2635
2636 if (resizedRows < workgroupSize)
2637 {
2638 chunkSize = 32;
2639 pixelPerWorkgroup = 32;
2640 }
2641 else
2642 {
2643 chunkSize = workgroupSize;
2644 pixelPerWorkgroup = workgroupSize;
2645 }
2646
2647 /* get the local memory size supported by the device */
2648 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2649
dirke3c5f892013-12-10 06:04:40 +00002650DisableMSCWarning(4127)
cristyf034abb2013-11-24 14:16:14 +00002651 while(1)
dirke3c5f892013-12-10 06:04:40 +00002652RestoreMSCWarning
cristyf034abb2013-11-24 14:16:14 +00002653 {
2654 /* calculate the local memory size needed per workgroup */
2655 cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
2656 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
2657 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2658 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2659 totalLocalMemorySize = imageCacheLocalMemorySize;
2660
2661 /* local size for the pixel accumulator */
2662 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2663 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2664
2665 /* local memory size for the weight accumulator */
2666 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2667 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2668
2669 /* local memory size for the gamma accumulator */
2670 if (matte == 0)
2671 gammaAccumulatorLocalMemorySize = sizeof(float);
2672 else
2673 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2674 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2675
2676 if (totalLocalMemorySize <= deviceLocalMemorySize)
2677 break;
2678 else
2679 {
2680 pixelPerWorkgroup = pixelPerWorkgroup/2;
2681 chunkSize = chunkSize/2;
2682 if (pixelPerWorkgroup == 0
2683 || chunkSize == 0)
2684 {
2685 /* quit, fallback to CPU */
2686 goto cleanup;
2687 }
2688 }
2689 }
2690
2691 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2692 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2693
2694 if (resizeFilterType == SincFastWeightingFunction
2695 && resizeWindowType == SincFastWeightingFunction)
2696 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilterSinc");
2697 else
2698 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
2699
2700 if (horizontalKernel == NULL)
2701 {
cristya22457d2013-12-07 14:03:06 +00002702 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002703 goto cleanup;
2704 }
2705
2706 i = 0;
2707 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2708 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2709 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2710 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2711 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
2712 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2713
2714 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2715 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2716
2717 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2718 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2719 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2720
2721 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2722 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2723
2724 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2725 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2726
2727 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2728 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2729
2730 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2731 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2732
2733
2734 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2735 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2736 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2737 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2738
2739
2740 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2741 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2742 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2743
2744 if (clStatus != CL_SUCCESS)
2745 {
cristya22457d2013-12-07 14:03:06 +00002746 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002747 goto cleanup;
2748 }
2749
2750 global_work_size[0] = resizedColumns;
2751 global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2752
2753 local_work_size[0] = 1;
2754 local_work_size[1] = workgroupSize;
2755 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2756 if (clStatus != CL_SUCCESS)
2757 {
cristya22457d2013-12-07 14:03:06 +00002758 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002759 goto cleanup;
2760 }
2761 clFlush(queue);
2762 status = MagickTrue;
2763
2764
2765cleanup:
cristya22457d2013-12-07 14:03:06 +00002766 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2767
cristyf034abb2013-11-24 14:16:14 +00002768 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2769
2770 return status;
2771}
2772
2773
2774
2775static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedColumns, const size_t resizedRows
2776 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
2777{
2778
2779 MagickBooleanType outputReady = MagickFalse;
2780 Image* filteredImage = NULL;
2781 MagickCLEnv clEnv = NULL;
2782
2783 cl_int clStatus;
2784 MagickBooleanType status;
2785 const void *inputPixels;
2786 void* filteredPixels;
2787 void* hostPtr;
2788 const MagickRealType* resizeFilterCoefficient;
2789 float* mappedCoefficientBuffer;
2790 float xFactor, yFactor;
2791 MagickSizeType length;
2792
2793 cl_mem_flags mem_flags;
2794 cl_context context = NULL;
2795 cl_mem inputImageBuffer = NULL;
2796 cl_mem tempImageBuffer = NULL;
2797 cl_mem filteredImageBuffer = NULL;
2798 cl_mem cubicCoefficientsBuffer = NULL;
2799 cl_command_queue queue = NULL;
2800
2801 unsigned int i;
2802
2803 clEnv = GetDefaultOpenCLEnv();
2804 context = GetOpenCLContext(clEnv);
2805
2806 /* Create and initialize OpenCL buffers. */
2807 inputPixels = NULL;
2808 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2809 if (inputPixels == (const void *) NULL)
2810 {
cristya22457d2013-12-07 14:03:06 +00002811 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002812 goto cleanup;
2813 }
2814
2815 /* If the host pointer is aligned to the size of CLPixelPacket,
2816 then use the host buffer directly from the GPU; otherwise,
2817 create a buffer on the GPU and copy the data over */
2818 if (ALIGNED(inputPixels,CLPixelPacket))
2819 {
2820 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2821 }
2822 else
2823 {
2824 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2825 }
2826 /* create a CL buffer from image pixel buffer */
2827 length = inputImage->columns * inputImage->rows;
2828 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2829 if (clStatus != CL_SUCCESS)
2830 {
cristya22457d2013-12-07 14:03:06 +00002831 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002832 goto cleanup;
2833 }
2834
2835 cubicCoefficientsBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
2836 if (clStatus != CL_SUCCESS)
2837 {
cristya22457d2013-12-07 14:03:06 +00002838 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002839 goto cleanup;
2840 }
2841 queue = AcquireOpenCLCommandQueue(clEnv);
2842 mappedCoefficientBuffer = (float*)clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
2843 , 0, NULL, NULL, &clStatus);
2844 if (clStatus != CL_SUCCESS)
2845 {
cristya22457d2013-12-07 14:03:06 +00002846 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002847 goto cleanup;
2848 }
2849 resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
2850 for (i = 0; i < 7; i++)
2851 {
2852 mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
2853 }
2854 clStatus = clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
2855 if (clStatus != CL_SUCCESS)
2856 {
cristya22457d2013-12-07 14:03:06 +00002857 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002858 goto cleanup;
2859 }
2860
2861 filteredImage = CloneImage(inputImage,resizedColumns,resizedRows,MagickTrue,exception);
2862 if (filteredImage == NULL)
2863 goto cleanup;
2864
dirke19d0cc2013-12-01 10:07:42 +00002865 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00002866 {
cristya22457d2013-12-07 14:03:06 +00002867 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002868 goto cleanup;
2869 }
2870 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2871 if (filteredPixels == (void *) NULL)
2872 {
cristya22457d2013-12-07 14:03:06 +00002873 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00002874 goto cleanup;
2875 }
2876
2877 if (ALIGNED(filteredPixels,CLPixelPacket))
2878 {
2879 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2880 hostPtr = filteredPixels;
2881 }
2882 else
2883 {
2884 mem_flags = CL_MEM_WRITE_ONLY;
2885 hostPtr = NULL;
2886 }
2887
2888 /* create a CL buffer from image pixel buffer */
2889 length = filteredImage->columns * filteredImage->rows;
2890 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2891 if (clStatus != CL_SUCCESS)
2892 {
cristya22457d2013-12-07 14:03:06 +00002893 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002894 goto cleanup;
2895 }
2896
2897 xFactor=(float) resizedColumns/(float) inputImage->columns;
2898 yFactor=(float) resizedRows/(float) inputImage->rows;
2899 if (xFactor > yFactor)
2900 {
2901
2902 length = resizedColumns*inputImage->rows;
2903 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2904 if (clStatus != CL_SUCCESS)
2905 {
cristya22457d2013-12-07 14:03:06 +00002906 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002907 goto cleanup;
2908 }
2909
dirke19d0cc2013-12-01 10:07:42 +00002910 status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002911 , tempImageBuffer, resizedColumns, inputImage->rows
2912 , resizeFilter, cubicCoefficientsBuffer
2913 , xFactor, clEnv, queue, exception);
2914 if (status != MagickTrue)
2915 goto cleanup;
2916
dirke19d0cc2013-12-01 10:07:42 +00002917 status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002918 , filteredImageBuffer, resizedColumns, resizedRows
2919 , resizeFilter, cubicCoefficientsBuffer
2920 , yFactor, clEnv, queue, exception);
2921 if (status != MagickTrue)
2922 goto cleanup;
2923 }
2924 else
2925 {
2926 length = inputImage->columns*resizedRows;
2927 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2928 if (clStatus != CL_SUCCESS)
2929 {
cristya22457d2013-12-07 14:03:06 +00002930 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00002931 goto cleanup;
2932 }
2933
dirke19d0cc2013-12-01 10:07:42 +00002934 status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002935 , tempImageBuffer, inputImage->columns, resizedRows
2936 , resizeFilter, cubicCoefficientsBuffer
2937 , yFactor, clEnv, queue, exception);
2938 if (status != MagickTrue)
2939 goto cleanup;
2940
dirke19d0cc2013-12-01 10:07:42 +00002941 status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002942 , filteredImageBuffer, resizedColumns, resizedRows
2943 , resizeFilter, cubicCoefficientsBuffer
2944 , xFactor, clEnv, queue, exception);
2945 if (status != MagickTrue)
2946 goto cleanup;
2947 }
2948 length = resizedColumns*resizedRows;
2949 if (ALIGNED(filteredPixels,CLPixelPacket))
2950 {
2951 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2952 }
2953 else
2954 {
2955 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2956 }
2957 if (clStatus != CL_SUCCESS)
2958 {
cristya22457d2013-12-07 14:03:06 +00002959 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00002960 goto cleanup;
2961 }
2962 outputReady = MagickTrue;
2963
2964cleanup:
cristya22457d2013-12-07 14:03:06 +00002965 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2966
cristyf034abb2013-11-24 14:16:14 +00002967 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2968 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2969 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2970 if (cubicCoefficientsBuffer!=NULL) clReleaseMemObject(cubicCoefficientsBuffer);
2971 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2972 if (outputReady == MagickFalse)
2973 {
2974 if (filteredImage != NULL)
2975 {
2976 DestroyImage(filteredImage);
2977 filteredImage = NULL;
2978 }
2979 }
2980
2981 return filteredImage;
2982}
2983
2984const ResizeWeightingFunctionType supportedResizeWeighting[] =
2985{
2986 BoxWeightingFunction
2987 ,TriangleWeightingFunction
2988 ,HanningWeightingFunction
2989 ,HammingWeightingFunction
2990 ,BlackmanWeightingFunction
2991 ,CubicBCWeightingFunction
2992 ,SincWeightingFunction
2993 ,SincFastWeightingFunction
2994 ,LastWeightingFunction
2995};
2996
2997static MagickBooleanType gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)
2998{
2999 MagickBooleanType supported = MagickFalse;
3000 unsigned int i;
3001 for (i = 0; ;i++)
3002 {
3003 if (supportedResizeWeighting[i] == LastWeightingFunction)
3004 break;
3005 if (supportedResizeWeighting[i] == f)
3006 {
3007 supported = MagickTrue;
3008 break;
3009 }
3010 }
3011 return supported;
3012}
3013
3014
3015/*
3016%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3017% %
3018% %
3019% %
3020% A c c e l e r a t e R e s i z e I m a g e %
3021% %
3022% %
3023% %
3024%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3025%
3026% AccelerateResizeImage() is an OpenCL implementation of ResizeImage()
3027%
3028% AccelerateResizeImage() scales an image to the desired dimensions, using the given
3029% filter (see AcquireFilterInfo()).
3030%
3031% If an undefined filter is given the filter defaults to Mitchell for a
3032% colormapped image, a image with a matte channel, or if the image is
3033% enlarged. Otherwise the filter defaults to a Lanczos.
3034%
3035% AccelerateResizeImage() was inspired by Paul Heckbert's "zoom" program.
3036%
3037% The format of the AccelerateResizeImage method is:
3038%
3039% Image *ResizeImage(Image *image,const size_t columns,
3040% const size_t rows, const ResizeFilter* filter,
cristy3f6d1482010-01-20 21:01:21 +00003041% ExceptionInfo *exception)
3042%
3043% A description of each parameter follows:
3044%
3045% o image: the image.
3046%
cristyf034abb2013-11-24 14:16:14 +00003047% o columns: the number of columns in the scaled image.
cristy3f6d1482010-01-20 21:01:21 +00003048%
cristyf034abb2013-11-24 14:16:14 +00003049% o rows: the number of rows in the scaled image.
3050%
3051% o filter: Image filter to use.
cristy3f6d1482010-01-20 21:01:21 +00003052%
3053% o exception: return any errors or warnings in this structure.
3054%
3055*/
cristyd43a46b2010-01-21 02:13:41 +00003056
cristyf034abb2013-11-24 14:16:14 +00003057MagickExport
3058Image* AccelerateResizeImage(const Image* image, const size_t resizedColumns, const size_t resizedRows
3059 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
cristyd43a46b2010-01-21 02:13:41 +00003060{
cristyf034abb2013-11-24 14:16:14 +00003061 MagickBooleanType status;
3062 Image* filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003063
cristyf034abb2013-11-24 14:16:14 +00003064 assert(image != NULL);
3065 assert(resizeFilter != NULL);
cristyd43a46b2010-01-21 02:13:41 +00003066
cristyf034abb2013-11-24 14:16:14 +00003067 status = checkOpenCLEnvironment(exception);
3068 if (status == MagickFalse)
3069 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003070
dirk5dcb7622013-12-01 10:43:43 +00003071 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003072 if (status == MagickFalse)
3073 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003074
cristyf034abb2013-11-24 14:16:14 +00003075 if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse
3076 || gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
3077 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003078
cristyf034abb2013-11-24 14:16:14 +00003079 filteredImage = ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
cristyf034abb2013-11-24 14:16:14 +00003080 return filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003081
cristyd43a46b2010-01-21 02:13:41 +00003082}
3083
cristyd43a46b2010-01-21 02:13:41 +00003084
cristyf034abb2013-11-24 14:16:14 +00003085static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBooleanType sharpen, ExceptionInfo *exception)
3086{
3087 MagickBooleanType outputReady = MagickFalse;
3088 MagickCLEnv clEnv = NULL;
3089
3090 cl_int clStatus;
3091 size_t global_work_size[2];
3092
3093 void *inputPixels = NULL;
3094 MagickSizeType length;
3095 unsigned int uSharpen;
3096 unsigned int i;
3097
3098 cl_mem_flags mem_flags;
3099 cl_context context = NULL;
3100 cl_mem inputImageBuffer = NULL;
3101 cl_kernel filterKernel = NULL;
3102 cl_command_queue queue = NULL;
3103
3104 clEnv = GetDefaultOpenCLEnv();
3105 context = GetOpenCLContext(clEnv);
3106
3107 /* Create and initialize OpenCL buffers. */
3108 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3109 if (inputPixels == (void *) NULL)
3110 {
cristya22457d2013-12-07 14:03:06 +00003111 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00003112 goto cleanup;
3113 }
3114
3115 /* If the host pointer is aligned to the size of CLPixelPacket,
3116 then use the host buffer directly from the GPU; otherwise,
3117 create a buffer on the GPU and copy the data over */
3118 if (ALIGNED(inputPixels,CLPixelPacket))
3119 {
3120 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3121 }
3122 else
3123 {
3124 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3125 }
3126 /* create a CL buffer from image pixel buffer */
3127 length = inputImage->columns * inputImage->rows;
3128 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3129 if (clStatus != CL_SUCCESS)
3130 {
cristya22457d2013-12-07 14:03:06 +00003131 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003132 goto cleanup;
3133 }
3134
3135 filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
3136 if (filterKernel == NULL)
3137 {
cristya22457d2013-12-07 14:03:06 +00003138 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003139 goto cleanup;
3140 }
3141
3142 i = 0;
3143 clStatus=clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3144
3145 uSharpen = (sharpen == MagickFalse)?0:1;
3146 clStatus|=clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
3147 if (clStatus != CL_SUCCESS)
3148 {
cristya22457d2013-12-07 14:03:06 +00003149 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003150 goto cleanup;
3151 }
3152
3153 global_work_size[0] = inputImage->columns;
3154 global_work_size[1] = inputImage->rows;
3155 /* launch the kernel */
3156 queue = AcquireOpenCLCommandQueue(clEnv);
3157 clStatus = clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3158 if (clStatus != CL_SUCCESS)
3159 {
cristya22457d2013-12-07 14:03:06 +00003160 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003161 goto cleanup;
3162 }
3163 clFlush(queue);
3164
3165 if (ALIGNED(inputPixels,CLPixelPacket))
3166 {
3167 length = inputImage->columns * inputImage->rows;
3168 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3169 }
3170 else
3171 {
3172 length = inputImage->columns * inputImage->rows;
3173 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3174 }
3175 if (clStatus != CL_SUCCESS)
3176 {
cristya22457d2013-12-07 14:03:06 +00003177 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003178 goto cleanup;
3179 }
3180 outputReady = MagickTrue;
3181
3182cleanup:
cristya22457d2013-12-07 14:03:06 +00003183 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +00003184
3185 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
3186 if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel);
3187 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
3188 return outputReady;
3189}
3190
3191/*
3192%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3193% %
3194% %
3195% %
3196% C o n t r a s t I m a g e w i t h O p e n C L %
3197% %
3198% %
3199% %
3200%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3201%
3202% ContrastImage() enhances the intensity differences between the lighter and
3203% darker elements of the image. Set sharpen to a MagickTrue to increase the
3204% image contrast otherwise the contrast is reduced.
3205%
3206% The format of the ContrastImage method is:
3207%
3208% MagickBooleanType ContrastImage(Image *image,
3209% const MagickBooleanType sharpen)
3210%
3211% A description of each parameter follows:
3212%
3213% o image: the image.
3214%
3215% o sharpen: Increase or decrease image contrast.
3216%
3217*/
3218
3219MagickExport
3220MagickBooleanType AccelerateContrastImage(Image* image, const MagickBooleanType sharpen, ExceptionInfo* exception)
3221{
3222 MagickBooleanType status;
3223
3224 assert(image != NULL);
3225 assert(exception != NULL);
3226
3227 status = checkOpenCLEnvironment(exception);
3228 if (status == MagickFalse)
3229 return MagickFalse;
3230
dirk5dcb7622013-12-01 10:43:43 +00003231 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003232 if (status == MagickFalse)
3233 return MagickFalse;
3234
3235 status = ComputeContrastImage(image,sharpen,exception);
cristyf034abb2013-11-24 14:16:14 +00003236 return status;
3237}
3238
3239
3240
3241MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3242{
3243 register ssize_t
cristyd43a46b2010-01-21 02:13:41 +00003244 i;
3245
cristyf034abb2013-11-24 14:16:14 +00003246 cl_float
3247 bright,
3248 hue,
3249 saturation;
3250
3251 cl_int color;
3252
3253 MagickBooleanType outputReady;
3254
3255 MagickCLEnv clEnv;
3256
3257 void *inputPixels;
3258
3259 MagickSizeType length;
3260
3261 cl_context context;
3262 cl_command_queue queue;
3263 cl_kernel modulateKernel;
3264
3265 cl_mem inputImageBuffer;
3266 cl_mem_flags mem_flags;
3267
3268 cl_int clStatus;
3269
3270 Image * inputImage = image;
3271
3272 inputImageBuffer = NULL;
3273 modulateKernel = NULL;
3274
3275 assert(inputImage != (Image *) NULL);
3276 assert(inputImage->signature == MagickSignature);
3277 if (inputImage->debug != MagickFalse)
3278 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
cristyd43a46b2010-01-21 02:13:41 +00003279
3280 /*
cristyf034abb2013-11-24 14:16:14 +00003281 * initialize opencl env
3282 */
3283 clEnv = GetDefaultOpenCLEnv();
3284 context = GetOpenCLContext(clEnv);
3285 queue = AcquireOpenCLCommandQueue(clEnv);
cristyd43a46b2010-01-21 02:13:41 +00003286
cristyf034abb2013-11-24 14:16:14 +00003287 outputReady = MagickFalse;
cristyd43a46b2010-01-21 02:13:41 +00003288
cristyf034abb2013-11-24 14:16:14 +00003289 /* Create and initialize OpenCL buffers.
3290 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3291 assume this will get a writable image
3292 */
3293 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3294 if (inputPixels == (void *) NULL)
cristyd43a46b2010-01-21 02:13:41 +00003295 {
cristya22457d2013-12-07 14:03:06 +00003296 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00003297 goto cleanup;
cristyd43a46b2010-01-21 02:13:41 +00003298 }
cristyf034abb2013-11-24 14:16:14 +00003299
3300 /* If the host pointer is aligned to the size of CLPixelPacket,
3301 then use the host buffer directly from the GPU; otherwise,
3302 create a buffer on the GPU and copy the data over
3303 */
3304 if (ALIGNED(inputPixels,CLPixelPacket))
3305 {
3306 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3307 }
3308 else
3309 {
3310 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3311 }
3312 /* create a CL buffer from image pixel buffer */
3313 length = inputImage->columns * inputImage->rows;
3314 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3315 if (clStatus != CL_SUCCESS)
3316 {
cristya22457d2013-12-07 14:03:06 +00003317 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003318 goto cleanup;
3319 }
3320
3321 modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
3322 if (modulateKernel == NULL)
3323 {
cristya22457d2013-12-07 14:03:06 +00003324 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003325 goto cleanup;
3326 }
3327
3328 bright=percent_brightness;
3329 hue=percent_hue;
3330 saturation=percent_saturation;
3331 color=colorspace;
3332
3333 i = 0;
3334 clStatus=clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3335 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3336 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3337 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3338 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3339 if (clStatus != CL_SUCCESS)
3340 {
cristya22457d2013-12-07 14:03:06 +00003341 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003342 printf("no kernel\n");
3343 goto cleanup;
3344 }
3345
3346 {
3347 size_t global_work_size[2];
3348 global_work_size[0] = inputImage->columns;
3349 global_work_size[1] = inputImage->rows;
3350 /* launch the kernel */
3351 clStatus = clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3352 if (clStatus != CL_SUCCESS)
3353 {
cristya22457d2013-12-07 14:03:06 +00003354 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003355 goto cleanup;
3356 }
3357 clFlush(queue);
3358 }
3359
3360 if (ALIGNED(inputPixels,CLPixelPacket))
3361 {
3362 length = inputImage->columns * inputImage->rows;
3363 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3364 }
3365 else
3366 {
3367 length = inputImage->columns * inputImage->rows;
3368 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3369 }
3370 if (clStatus != CL_SUCCESS)
3371 {
cristya22457d2013-12-07 14:03:06 +00003372 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003373 goto cleanup;
3374 }
3375
3376 outputReady = MagickTrue;
3377
3378cleanup:
cristya22457d2013-12-07 14:03:06 +00003379 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +00003380
3381 if (inputPixels) {
3382 //ReleasePixelCachePixels();
3383 inputPixels = NULL;
3384 }
3385
3386 if (inputImageBuffer!=NULL)
3387 clReleaseMemObject(inputImageBuffer);
3388 if (modulateKernel!=NULL)
3389 RelinquishOpenCLKernel(clEnv, modulateKernel);
3390 if (queue != NULL)
3391 RelinquishOpenCLCommandQueue(clEnv, queue);
3392
3393 return outputReady;
3394
cristy3f6d1482010-01-20 21:01:21 +00003395}
cristyf034abb2013-11-24 14:16:14 +00003396
3397/*
3398%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3399% %
3400% %
3401% %
3402% M o d u l a t e I m a g e w i t h O p e n C L %
3403% %
3404% %
3405% %
3406%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3407%
3408% ModulateImage() lets you control the brightness, saturation, and hue
3409% of an image. Modulate represents the brightness, saturation, and hue
3410% as one parameter (e.g. 90,150,100). If the image colorspace is HSL, the
3411% modulation is lightness, saturation, and hue. For HWB, use blackness,
3412% whiteness, and hue. And for HCL, use chrome, luma, and hue.
3413%
3414% The format of the ModulateImage method is:
3415%
3416% MagickBooleanType ModulateImage(Image *image,const char *modulate)
3417%
3418% A description of each parameter follows:
3419%
3420% o image: the image.
3421%
3422% o percent_*: Define the percent change in brightness, saturation, and
3423% hue.
3424%
3425*/
3426
3427MagickExport
3428MagickBooleanType AccelerateModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3429{
3430 MagickBooleanType status;
3431
3432 assert(image != NULL);
3433 assert(exception != NULL);
3434
3435 status = checkOpenCLEnvironment(exception);
3436 if (status == MagickFalse)
3437 return MagickFalse;
3438
dirk5dcb7622013-12-01 10:43:43 +00003439 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003440 if (status == MagickFalse)
3441 return MagickFalse;
3442
3443 if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
3444 return MagickFalse;
3445
3446
3447 status = ComputeModulateImage(image,percent_brightness, percent_hue, percent_saturation, colorspace, exception);
cristyf034abb2013-11-24 14:16:14 +00003448 return status;
3449}
3450
3451
3452MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception)
3453{
3454#define EqualizeImageTag "Equalize/Image"
3455
3456 ExceptionInfo
3457 *exception=_exception;
3458
3459 FloatPixelPacket
3460 white,
3461 black,
3462 intensity,
3463 *map;
3464
3465 cl_uint4
3466 *histogram;
3467
3468 PixelPacket
3469 *equalize_map;
3470
3471 register ssize_t
3472 i;
3473
3474 Image * image = inputImage;
3475
3476 MagickBooleanType outputReady;
3477 MagickCLEnv clEnv;
3478
3479 cl_int clStatus;
3480 size_t global_work_size[2];
3481
3482 void *inputPixels;
3483 cl_mem_flags mem_flags;
3484
3485 cl_context context;
3486 cl_mem inputImageBuffer;
3487 cl_mem histogramBuffer;
3488 cl_mem equalizeMapBuffer;
3489 cl_kernel histogramKernel;
3490 cl_kernel equalizeKernel;
3491 cl_command_queue queue;
3492 cl_int colorspace;
3493
3494 void* hostPtr;
3495
3496 MagickSizeType length;
3497
3498 inputPixels = NULL;
3499 inputImageBuffer = NULL;
3500 histogramBuffer = NULL;
3501 histogramKernel = NULL;
3502 equalizeKernel = NULL;
3503 context = NULL;
3504 queue = NULL;
3505 outputReady = MagickFalse;
3506
3507 assert(inputImage != (Image *) NULL);
3508 assert(inputImage->signature == MagickSignature);
3509 if (inputImage->debug != MagickFalse)
3510 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
3511
3512 /*
3513 Allocate and initialize histogram arrays.
3514 */
3515 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
3516 if (histogram == (cl_uint4 *) NULL)
3517 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3518
3519 /* reset histogram */
3520 (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
3521
3522 /*
3523 * initialize opencl env
3524 */
3525 clEnv = GetDefaultOpenCLEnv();
3526 context = GetOpenCLContext(clEnv);
3527 queue = AcquireOpenCLCommandQueue(clEnv);
3528
3529 /* Create and initialize OpenCL buffers. */
3530 /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */
3531 /* assume this will get a writable image */
3532 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3533
3534 if (inputPixels == (void *) NULL)
3535 {
cristya22457d2013-12-07 14:03:06 +00003536 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00003537 goto cleanup;
3538 }
3539 /* If the host pointer is aligned to the size of CLPixelPacket,
3540 then use the host buffer directly from the GPU; otherwise,
3541 create a buffer on the GPU and copy the data over */
3542 if (ALIGNED(inputPixels,CLPixelPacket))
3543 {
3544 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3545 }
3546 else
3547 {
3548 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3549 }
3550 /* create a CL buffer from image pixel buffer */
3551 length = inputImage->columns * inputImage->rows;
3552 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3553 if (clStatus != CL_SUCCESS)
3554 {
cristya22457d2013-12-07 14:03:06 +00003555 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003556 goto cleanup;
3557 }
3558
3559 /* If the host pointer is aligned to the size of cl_uint,
3560 then use the host buffer directly from the GPU; otherwise,
3561 create a buffer on the GPU and copy the data over */
3562 if (ALIGNED(histogram,cl_uint4))
3563 {
3564 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3565 hostPtr = histogram;
3566 }
3567 else
3568 {
3569 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3570 hostPtr = histogram;
3571 }
3572 /* create a CL buffer for histogram */
3573 length = (MaxMap+1);
3574 histogramBuffer = clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
3575 if (clStatus != CL_SUCCESS)
3576 {
cristya22457d2013-12-07 14:03:06 +00003577 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003578 goto cleanup;
3579 }
3580
3581 switch (inputImage->colorspace)
3582 {
3583 case RGBColorspace:
3584 colorspace = 1;
3585 break;
3586 case sRGBColorspace:
3587 colorspace = 0;
3588 break;
3589 default:
3590 {
3591 /* something is wrong, as we checked in checkAccelerateCondition */
3592 }
3593 }
3594
3595 /* get the OpenCL kernel */
3596 histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
3597 if (histogramKernel == NULL)
3598 {
cristya22457d2013-12-07 14:03:06 +00003599 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003600 goto cleanup;
3601 }
3602
3603 /* set the kernel arguments */
3604 i = 0;
3605 clStatus=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3606 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
3607 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
3608 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
3609 if (clStatus != CL_SUCCESS)
3610 {
cristya22457d2013-12-07 14:03:06 +00003611 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003612 goto cleanup;
3613 }
3614
3615 /* launch the kernel */
3616 global_work_size[0] = inputImage->columns;
3617 global_work_size[1] = inputImage->rows;
3618
3619 clStatus = clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3620
3621 if (clStatus != CL_SUCCESS)
3622 {
cristya22457d2013-12-07 14:03:06 +00003623 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003624 goto cleanup;
3625 }
3626 clFlush(queue);
3627
3628 /* read from the kenel output */
3629 if (ALIGNED(histogram,cl_uint4))
3630 {
3631 length = (MaxMap+1);
3632 clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
3633 }
3634 else
3635 {
3636 length = (MaxMap+1);
3637 clStatus = clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
3638 }
3639 if (clStatus != CL_SUCCESS)
3640 {
cristya22457d2013-12-07 14:03:06 +00003641 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003642 goto cleanup;
3643 }
3644
3645 /* unmap, don't block gpu to use this buffer again. */
3646 if (ALIGNED(histogram,cl_uint4))
3647 {
3648 clStatus = clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
3649 if (clStatus != CL_SUCCESS)
3650 {
cristya22457d2013-12-07 14:03:06 +00003651 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003652 goto cleanup;
3653 }
3654 }
3655
3656 if (getenv("TEST")) {
3657 unsigned int i;
3658 for (i=0; i<(MaxMap+1UL); i++)
3659 {
3660 printf("histogram %d: red %d\n", i, histogram[i].s[2]);
3661 printf("histogram %d: green %d\n", i, histogram[i].s[1]);
3662 printf("histogram %d: blue %d\n", i, histogram[i].s[0]);
cristya22457d2013-12-07 14:03:06 +00003663 printf("histogram %d: alpha %d\n", i, histogram[i].s[3]);
cristyf034abb2013-11-24 14:16:14 +00003664 }
3665 }
3666
3667 /* cpu stuff */
3668 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
3669 if (equalize_map == (PixelPacket *) NULL)
3670 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3671
3672 map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
3673 if (map == (FloatPixelPacket *) NULL)
3674 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3675
3676 /*
3677 Integrate the histogram to get the equalization map.
3678 */
3679 (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
3680 for (i=0; i <= (ssize_t) MaxMap; i++)
3681 {
3682 if ((channel & SyncChannels) != 0)
3683 {
3684 intensity.red+=histogram[i].s[2];
3685 map[i]=intensity;
3686 continue;
3687 }
3688 if ((channel & RedChannel) != 0)
3689 intensity.red+=histogram[i].s[2];
3690 if ((channel & GreenChannel) != 0)
3691 intensity.green+=histogram[i].s[1];
3692 if ((channel & BlueChannel) != 0)
3693 intensity.blue+=histogram[i].s[0];
3694 if ((channel & OpacityChannel) != 0)
dirke19d0cc2013-12-01 10:07:42 +00003695 intensity.alpha+=histogram[i].s[3];
cristyf034abb2013-11-24 14:16:14 +00003696 if (((channel & IndexChannel) != 0) &&
3697 (image->colorspace == CMYKColorspace))
3698 {
3699 printf("something here\n");
3700 /*intensity.index+=histogram[i].index; */
3701 }
3702 map[i]=intensity;
3703 }
3704 black=map[0];
3705 white=map[(int) MaxMap];
3706 (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
3707 for (i=0; i <= (ssize_t) MaxMap; i++)
3708 {
3709 if ((channel & SyncChannels) != 0)
3710 {
3711 if (white.red != black.red)
3712 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3713 (map[i].red-black.red))/(white.red-black.red)));
3714 continue;
3715 }
3716 if (((channel & RedChannel) != 0) && (white.red != black.red))
3717 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3718 (map[i].red-black.red))/(white.red-black.red)));
3719 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3720 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3721 (map[i].green-black.green))/(white.green-black.green)));
3722 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3723 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3724 (map[i].blue-black.blue))/(white.blue-black.blue)));
dirke19d0cc2013-12-01 10:07:42 +00003725 if (((channel & OpacityChannel) != 0) && (white.alpha != black.alpha))
3726 equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3727 (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
cristyf034abb2013-11-24 14:16:14 +00003728 /*
3729 if ((((channel & IndexChannel) != 0) &&
3730 (image->colorspace == CMYKColorspace)) &&
3731 (white.index != black.index))
3732 equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3733 (map[i].index-black.index))/(white.index-black.index)));
3734 */
3735 }
3736
3737 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
3738 map=(FloatPixelPacket *) RelinquishMagickMemory(map);
3739
3740 if (image->storage_class == PseudoClass)
3741 {
3742 /*
3743 Equalize colormap.
3744 */
3745 for (i=0; i < (ssize_t) image->colors; i++)
3746 {
3747 if ((channel & SyncChannels) != 0)
3748 {
3749 if (white.red != black.red)
3750 {
3751 image->colormap[i].red=equalize_map[
3752 ScaleQuantumToMap(image->colormap[i].red)].red;
3753 image->colormap[i].green=equalize_map[
3754 ScaleQuantumToMap(image->colormap[i].green)].red;
3755 image->colormap[i].blue=equalize_map[
3756 ScaleQuantumToMap(image->colormap[i].blue)].red;
dirke19d0cc2013-12-01 10:07:42 +00003757 image->colormap[i].alpha=equalize_map[
3758 ScaleQuantumToMap(image->colormap[i].alpha)].red;
cristyf034abb2013-11-24 14:16:14 +00003759 }
3760 continue;
3761 }
3762 if (((channel & RedChannel) != 0) && (white.red != black.red))
3763 image->colormap[i].red=equalize_map[
3764 ScaleQuantumToMap(image->colormap[i].red)].red;
3765 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3766 image->colormap[i].green=equalize_map[
3767 ScaleQuantumToMap(image->colormap[i].green)].green;
3768 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3769 image->colormap[i].blue=equalize_map[
3770 ScaleQuantumToMap(image->colormap[i].blue)].blue;
3771 if (((channel & OpacityChannel) != 0) &&
dirke19d0cc2013-12-01 10:07:42 +00003772 (white.alpha != black.alpha))
3773 image->colormap[i].alpha=equalize_map[
3774 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
cristyf034abb2013-11-24 14:16:14 +00003775 }
3776 }
3777
3778 /*
3779 Equalize image.
3780 */
3781
3782 /* GPU can work on this again, image and equalize map as input
3783 image: uchar4 (CLPixelPacket)
3784 equalize_map: uchar4 (PixelPacket)
3785 black, white: float4 (FloatPixelPacket) */
3786
3787 if (inputImageBuffer!=NULL)
3788 clReleaseMemObject(inputImageBuffer);
3789
3790 /* If the host pointer is aligned to the size of CLPixelPacket,
3791 then use the host buffer directly from the GPU; otherwise,
3792 create a buffer on the GPU and copy the data over */
3793 if (ALIGNED(inputPixels,CLPixelPacket))
3794 {
3795 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3796 }
3797 else
3798 {
3799 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3800 }
3801 /* create a CL buffer from image pixel buffer */
3802 length = inputImage->columns * inputImage->rows;
3803 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3804 if (clStatus != CL_SUCCESS)
3805 {
cristya22457d2013-12-07 14:03:06 +00003806 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003807 goto cleanup;
3808 }
3809
3810 /* Create and initialize OpenCL buffers. */
3811 if (ALIGNED(equalize_map, PixelPacket))
3812 {
3813 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3814 hostPtr = equalize_map;
3815 }
3816 else
3817 {
3818 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3819 hostPtr = equalize_map;
3820 }
3821 /* create a CL buffer for eqaulize_map */
3822 length = (MaxMap+1);
3823 equalizeMapBuffer = clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
3824 if (clStatus != CL_SUCCESS)
3825 {
cristya22457d2013-12-07 14:03:06 +00003826 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00003827 goto cleanup;
3828 }
3829
3830 /* get the OpenCL kernel */
3831 equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
3832 if (equalizeKernel == NULL)
3833 {
cristya22457d2013-12-07 14:03:06 +00003834 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003835 goto cleanup;
3836 }
3837
3838 /* set the kernel arguments */
3839 i = 0;
3840 clStatus=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3841 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
3842 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
3843 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
3844 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
3845 if (clStatus != CL_SUCCESS)
3846 {
cristya22457d2013-12-07 14:03:06 +00003847 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003848 goto cleanup;
3849 }
3850
3851 /* launch the kernel */
3852 global_work_size[0] = inputImage->columns;
3853 global_work_size[1] = inputImage->rows;
3854
3855 clStatus = clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3856
3857 if (clStatus != CL_SUCCESS)
3858 {
cristya22457d2013-12-07 14:03:06 +00003859 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003860 goto cleanup;
3861 }
3862 clFlush(queue);
3863
3864 /* read the data back */
3865 if (ALIGNED(inputPixels,CLPixelPacket))
3866 {
3867 length = inputImage->columns * inputImage->rows;
3868 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3869 }
3870 else
3871 {
3872 length = inputImage->columns * inputImage->rows;
3873 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3874 }
3875 if (clStatus != CL_SUCCESS)
3876 {
cristya22457d2013-12-07 14:03:06 +00003877 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00003878 goto cleanup;
3879 }
3880
3881 outputReady = MagickTrue;
3882
3883 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
3884
3885cleanup:
cristya22457d2013-12-07 14:03:06 +00003886 OpenCLLogException(__FUNCTION__,__LINE__,exception);
cristyf034abb2013-11-24 14:16:14 +00003887
3888 if (inputPixels) {
3889 /*ReleasePixelCachePixels();*/
3890 inputPixels = NULL;
3891 }
3892
3893 if (inputImageBuffer!=NULL)
3894 clReleaseMemObject(inputImageBuffer);
3895 if (histogramBuffer!=NULL)
3896 clReleaseMemObject(histogramBuffer);
3897 if (histogramKernel!=NULL)
3898 RelinquishOpenCLKernel(clEnv, histogramKernel);
3899 if (queue != NULL)
3900 RelinquishOpenCLCommandQueue(clEnv, queue);
3901
3902 return outputReady;
3903}
3904
3905/*
3906%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3907% %
3908% %
3909% %
3910% E q u a l i z e I m a g e w i t h O p e n C L %
3911% %
3912% %
3913% %
3914%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3915%
3916% EqualizeImage() applies a histogram equalization to the image.
3917%
3918% The format of the EqualizeImage method is:
3919%
3920% MagickBooleanType EqualizeImage(Image *image)
3921% MagickBooleanType EqualizeImageChannel(Image *image,
3922% const ChannelType channel)
3923%
3924% A description of each parameter follows:
3925%
3926% o image: the image.
3927%
3928% o channel: the channel.
3929%
3930*/
3931
3932
3933MagickExport
3934MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channel, ExceptionInfo* exception)
3935{
3936 MagickBooleanType status;
3937
3938 assert(image != NULL);
3939 assert(exception != NULL);
3940
3941 status = checkOpenCLEnvironment(exception);
3942 if (status == MagickFalse)
3943 return MagickFalse;
3944
dirk5dcb7622013-12-01 10:43:43 +00003945 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00003946 if (status == MagickFalse)
3947 return MagickFalse;
3948
3949 /* ensure this is the only pass get in for now. */
3950 if ((channel & SyncChannels) == 0)
3951 return MagickFalse;
3952
3953 if (image->colorspace != sRGBColorspace)
3954 return MagickFalse;
3955
3956 status = ComputeEqualizeImage(image,channel,exception);
cristyf034abb2013-11-24 14:16:14 +00003957 return status;
3958}
3959
3960
3961static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception)
3962{
3963
3964 MagickBooleanType outputReady = MagickFalse;
3965 MagickCLEnv clEnv = NULL;
3966
3967 cl_int clStatus;
3968 size_t global_work_size[2];
3969
3970 const void *inputPixels = NULL;
3971 Image* filteredImage = NULL;
3972 void *filteredPixels = NULL;
3973 void *hostPtr;
3974 MagickSizeType length;
3975
3976 cl_mem_flags mem_flags;
3977 cl_context context = NULL;
3978 cl_mem inputImageBuffer = NULL;
3979 cl_mem tempImageBuffer[2];
3980 cl_mem filteredImageBuffer = NULL;
3981 cl_command_queue queue = NULL;
3982 cl_kernel hullPass1 = NULL;
3983 cl_kernel hullPass2 = NULL;
3984
3985 unsigned int imageWidth, imageHeight;
3986 int matte;
3987 int k;
3988
3989 static const int
3990 X[4] = {0, 1, 1,-1},
3991 Y[4] = {1, 0, 1, 1};
3992
3993 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
3994 clEnv = GetDefaultOpenCLEnv();
3995 context = GetOpenCLContext(clEnv);
3996 queue = AcquireOpenCLCommandQueue(clEnv);
3997
3998 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3999 if (inputPixels == (void *) NULL)
4000 {
cristya22457d2013-12-07 14:03:06 +00004001 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristyf034abb2013-11-24 14:16:14 +00004002 goto cleanup;
4003 }
4004
4005 if (ALIGNED(inputPixels,CLPixelPacket))
4006 {
4007 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4008 }
4009 else
4010 {
4011 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4012 }
4013 /* create a CL buffer from image pixel buffer */
4014 length = inputImage->columns * inputImage->rows;
4015 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4016 if (clStatus != CL_SUCCESS)
4017 {
cristya22457d2013-12-07 14:03:06 +00004018 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00004019 goto cleanup;
4020 }
4021
4022 mem_flags = CL_MEM_READ_WRITE;
4023 length = inputImage->columns * inputImage->rows;
4024 for (k = 0; k < 2; k++)
4025 {
4026 tempImageBuffer[k] = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
4027 if (clStatus != CL_SUCCESS)
4028 {
cristya22457d2013-12-07 14:03:06 +00004029 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00004030 goto cleanup;
4031 }
4032 }
4033
4034 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4035 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004036 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00004037 {
cristya22457d2013-12-07 14:03:06 +00004038 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004039 goto cleanup;
4040 }
4041 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4042 if (filteredPixels == (void *) NULL)
4043 {
cristya22457d2013-12-07 14:03:06 +00004044 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristyf034abb2013-11-24 14:16:14 +00004045 goto cleanup;
4046 }
4047
4048 if (ALIGNED(filteredPixels,CLPixelPacket))
4049 {
4050 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4051 hostPtr = filteredPixels;
4052 }
4053 else
4054 {
4055 mem_flags = CL_MEM_WRITE_ONLY;
4056 hostPtr = NULL;
4057 }
4058 /* create a CL buffer from image pixel buffer */
4059 length = inputImage->columns * inputImage->rows;
4060 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4061 if (clStatus != CL_SUCCESS)
4062 {
cristya22457d2013-12-07 14:03:06 +00004063 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristyf034abb2013-11-24 14:16:14 +00004064 goto cleanup;
4065 }
4066
4067 hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
4068 hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
4069
4070 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer);
4071 clStatus |=clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4072 imageWidth = inputImage->columns;
4073 clStatus |=clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
4074 imageHeight = inputImage->rows;
4075 clStatus |=clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
cristya22457d2013-12-07 14:03:06 +00004076 matte = (inputImage->matte==MagickFalse)?0:1;
cristyf034abb2013-11-24 14:16:14 +00004077 clStatus |=clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
4078 if (clStatus != CL_SUCCESS)
4079 {
cristya22457d2013-12-07 14:03:06 +00004080 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004081 goto cleanup;
4082 }
4083
4084 clStatus = clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4085 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
4086 imageWidth = inputImage->columns;
4087 clStatus |=clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
4088 imageHeight = inputImage->rows;
4089 clStatus |=clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
dirke19d0cc2013-12-01 10:07:42 +00004090 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +00004091 clStatus |=clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
4092 if (clStatus != CL_SUCCESS)
4093 {
cristya22457d2013-12-07 14:03:06 +00004094 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004095 goto cleanup;
4096 }
4097
4098
4099 global_work_size[0] = inputImage->columns;
4100 global_work_size[1] = inputImage->rows;
4101
4102
4103 for (k = 0; k < 4; k++)
4104 {
4105 cl_int2 offset;
4106 int polarity;
4107
4108
4109 offset.s[0] = X[k];
4110 offset.s[1] = Y[k];
4111 polarity = 1;
4112 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4113 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4114 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4115 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4116 if (clStatus != CL_SUCCESS)
4117 {
cristya22457d2013-12-07 14:03:06 +00004118 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004119 goto cleanup;
4120 }
4121 /* launch the kernel */
4122 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4123 if (clStatus != CL_SUCCESS)
4124 {
cristya22457d2013-12-07 14:03:06 +00004125 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004126 goto cleanup;
4127 }
4128 /* launch the kernel */
4129 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4130 if (clStatus != CL_SUCCESS)
4131 {
cristya22457d2013-12-07 14:03:06 +00004132 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004133 goto cleanup;
4134 }
4135
4136
4137 if (k == 0)
4138 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
4139 offset.s[0] = -X[k];
4140 offset.s[1] = -Y[k];
4141 polarity = 1;
4142 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4143 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4144 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4145 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4146 if (clStatus != CL_SUCCESS)
4147 {
cristya22457d2013-12-07 14:03:06 +00004148 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004149 goto cleanup;
4150 }
4151 /* launch the kernel */
4152 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4153 if (clStatus != CL_SUCCESS)
4154 {
cristya22457d2013-12-07 14:03:06 +00004155 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004156 goto cleanup;
4157 }
4158 /* launch the kernel */
4159 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4160 if (clStatus != CL_SUCCESS)
4161 {
cristya22457d2013-12-07 14:03:06 +00004162 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004163 goto cleanup;
4164 }
4165
4166 offset.s[0] = -X[k];
4167 offset.s[1] = -Y[k];
4168 polarity = -1;
4169 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4170 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4171 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4172 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4173 if (clStatus != CL_SUCCESS)
4174 {
cristya22457d2013-12-07 14:03:06 +00004175 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004176 goto cleanup;
4177 }
4178 /* launch the kernel */
4179 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4180 if (clStatus != CL_SUCCESS)
4181 {
cristya22457d2013-12-07 14:03:06 +00004182 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004183 goto cleanup;
4184 }
4185 /* launch the kernel */
4186 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4187 if (clStatus != CL_SUCCESS)
4188 {
cristya22457d2013-12-07 14:03:06 +00004189 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004190 goto cleanup;
4191 }
4192
4193 offset.s[0] = X[k];
4194 offset.s[1] = Y[k];
4195 polarity = -1;
4196 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4197 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4198 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4199 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4200
4201 if (k == 3)
4202 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
4203
4204 if (clStatus != CL_SUCCESS)
4205 {
cristya22457d2013-12-07 14:03:06 +00004206 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004207 goto cleanup;
4208 }
4209 /* launch the kernel */
4210 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4211 if (clStatus != CL_SUCCESS)
4212 {
cristya22457d2013-12-07 14:03:06 +00004213 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004214 goto cleanup;
4215 }
4216 /* launch the kernel */
4217 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4218 if (clStatus != CL_SUCCESS)
4219 {
cristya22457d2013-12-07 14:03:06 +00004220 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004221 goto cleanup;
4222 }
4223 }
4224
4225 if (ALIGNED(filteredPixels,CLPixelPacket))
4226 {
4227 length = inputImage->columns * inputImage->rows;
4228 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4229 }
4230 else
4231 {
4232 length = inputImage->columns * inputImage->rows;
4233 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4234 }
4235 if (clStatus != CL_SUCCESS)
4236 {
cristya22457d2013-12-07 14:03:06 +00004237 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristyf034abb2013-11-24 14:16:14 +00004238 goto cleanup;
4239 }
4240
4241 outputReady = MagickTrue;
4242
4243cleanup:
cristya22457d2013-12-07 14:03:06 +00004244 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4245
cristyf034abb2013-11-24 14:16:14 +00004246 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4247 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4248 for (k = 0; k < 2; k++)
4249 {
4250 if (tempImageBuffer[k]!=NULL) clReleaseMemObject(tempImageBuffer[k]);
4251 }
4252 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4253 if (hullPass1!=NULL) RelinquishOpenCLKernel(clEnv, hullPass1);
4254 if (hullPass2!=NULL) RelinquishOpenCLKernel(clEnv, hullPass2);
4255 if (outputReady == MagickFalse)
4256 {
4257 if (filteredImage != NULL)
4258 {
4259 DestroyImage(filteredImage);
4260 filteredImage = NULL;
4261 }
4262 }
4263 return filteredImage;
4264}
4265
4266/*
4267%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4268% %
4269% %
4270% %
4271% D e s p e c k l e I m a g e w i t h O p e n C L %
4272% %
4273% %
4274% %
4275%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4276%
4277% DespeckleImage() reduces the speckle noise in an image while perserving the
4278% edges of the original image. A speckle removing filter uses a complementary
4279% hulling technique (raising pixels that are darker than their surrounding
4280% neighbors, then complementarily lowering pixels that are brighter than their
4281% surrounding neighbors) to reduce the speckle index of that image (reference
4282% Crimmins speckle removal).
4283%
4284% The format of the DespeckleImage method is:
4285%
4286% Image *DespeckleImage(const Image *image,ExceptionInfo *exception)
4287%
4288% A description of each parameter follows:
4289%
4290% o image: the image.
4291%
4292% o exception: return any errors or warnings in this structure.
4293%
4294*/
4295
4296MagickExport
4297Image* AccelerateDespeckleImage(const Image* image, ExceptionInfo* exception)
4298{
4299 MagickBooleanType status;
4300 Image* newImage = NULL;
4301
4302 assert(image != NULL);
4303 assert(exception != NULL);
4304
4305 status = checkOpenCLEnvironment(exception);
4306 if (status == MagickFalse)
4307 return NULL;
4308
dirk5dcb7622013-12-01 10:43:43 +00004309 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00004310 if (status == MagickFalse)
4311 return NULL;
4312
4313 newImage = ComputeDespeckleImage(image,exception);
cristyf034abb2013-11-24 14:16:14 +00004314 return newImage;
4315}
4316
cristye85d0f72013-11-27 02:25:43 +00004317static Image* ComputeAddNoiseImage(const Image* inputImage,
4318 const ChannelType channel, const NoiseType noise_type,
4319 ExceptionInfo *exception)
4320{
4321 MagickBooleanType outputReady = MagickFalse;
4322 MagickCLEnv clEnv = NULL;
4323
4324 cl_int clStatus;
4325 size_t global_work_size[2];
4326
4327 const void *inputPixels = NULL;
4328 Image* filteredImage = NULL;
4329 void *filteredPixels = NULL;
4330 void *hostPtr;
4331 unsigned int inputColumns, inputRows;
4332 float attenuate;
4333 float *randomNumberBufferPtr = NULL;
4334 MagickSizeType length;
4335 unsigned int numRandomNumberPerPixel;
4336 unsigned int numRowsPerKernelLaunch;
4337 unsigned int numRandomNumberPerBuffer;
4338 unsigned int r;
4339 unsigned int k;
4340 int i;
4341
4342 RandomInfo **restrict random_info;
4343 const char *option;
4344#if defined(MAGICKCORE_OPENMP_SUPPORT)
4345 unsigned long key;
4346#endif
4347
4348 cl_mem_flags mem_flags;
4349 cl_context context = NULL;
4350 cl_mem inputImageBuffer = NULL;
4351 cl_mem randomNumberBuffer = NULL;
4352 cl_mem filteredImageBuffer = NULL;
4353 cl_command_queue queue = NULL;
4354 cl_kernel addNoiseKernel = NULL;
4355
4356
4357 clEnv = GetDefaultOpenCLEnv();
4358 context = GetOpenCLContext(clEnv);
4359 queue = AcquireOpenCLCommandQueue(clEnv);
4360
4361 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4362 if (inputPixels == (void *) NULL)
4363 {
cristya22457d2013-12-07 14:03:06 +00004364 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004365 goto cleanup;
4366 }
4367
4368 if (ALIGNED(inputPixels,CLPixelPacket))
4369 {
4370 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4371 }
4372 else
4373 {
4374 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4375 }
4376 /* create a CL buffer from image pixel buffer */
4377 length = inputImage->columns * inputImage->rows;
4378 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4379 if (clStatus != CL_SUCCESS)
4380 {
cristya22457d2013-12-07 14:03:06 +00004381 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004382 goto cleanup;
4383 }
4384
4385
4386 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4387 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004388 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristye85d0f72013-11-27 02:25:43 +00004389 {
cristya22457d2013-12-07 14:03:06 +00004390 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004391 goto cleanup;
4392 }
4393 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4394 if (filteredPixels == (void *) NULL)
4395 {
cristya22457d2013-12-07 14:03:06 +00004396 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004397 goto cleanup;
4398 }
4399
4400 if (ALIGNED(filteredPixels,CLPixelPacket))
4401 {
4402 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4403 hostPtr = filteredPixels;
4404 }
4405 else
4406 {
4407 mem_flags = CL_MEM_WRITE_ONLY;
4408 hostPtr = NULL;
4409 }
4410 /* create a CL buffer from image pixel buffer */
4411 length = inputImage->columns * inputImage->rows;
4412 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4413 if (clStatus != CL_SUCCESS)
4414 {
cristya22457d2013-12-07 14:03:06 +00004415 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004416 goto cleanup;
4417 }
4418
4419 /* find out how many random numbers needed by pixel */
4420 numRandomNumberPerPixel = 0;
4421 {
4422 unsigned int numRandPerChannel = 0;
4423 switch (noise_type)
4424 {
4425 case UniformNoise:
4426 case ImpulseNoise:
4427 case LaplacianNoise:
4428 case RandomNoise:
4429 default:
4430 numRandPerChannel = 1;
4431 break;
4432 case GaussianNoise:
4433 case MultiplicativeGaussianNoise:
4434 case PoissonNoise:
4435 numRandPerChannel = 2;
4436 break;
4437 };
4438
4439 if ((channel & RedChannel) != 0)
4440 numRandomNumberPerPixel+=numRandPerChannel;
4441 if ((channel & GreenChannel) != 0)
4442 numRandomNumberPerPixel+=numRandPerChannel;
4443 if ((channel & BlueChannel) != 0)
4444 numRandomNumberPerPixel+=numRandPerChannel;
4445 if ((channel & OpacityChannel) != 0)
4446 numRandomNumberPerPixel+=numRandPerChannel;
4447 }
4448
4449 numRowsPerKernelLaunch = 512;
4450 /* create a buffer for random numbers */
4451 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4452 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float)
4453 , NULL, &clStatus);
4454
4455
4456 /* set up the random number generators */
4457 attenuate=1.0;
4458 option=GetImageArtifact(inputImage,"attenuate");
4459 if (option != (char *) NULL)
4460 attenuate=StringToDouble(option,(char **) NULL);
4461 random_info=AcquireRandomInfoThreadSet();
4462#if defined(MAGICKCORE_OPENMP_SUPPORT)
4463 key=GetRandomSecretKey(random_info[0]);
4464#endif
4465
4466 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4467
4468 k = 0;
4469 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4470 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4471 inputColumns = inputImage->columns;
4472 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4473 inputRows = inputImage->rows;
4474 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4475 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4476 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4477 attenuate=1.0f;
4478 option=GetImageArtifact(inputImage,"attenuate");
4479 if (option != (char *) NULL)
4480 attenuate=(float)StringToDouble(option,(char **) NULL);
4481 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4482 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4483 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4484
4485 global_work_size[0] = inputColumns;
4486 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4487 {
4488 /* Generate random numbers in the buffer */
4489 randomNumberBufferPtr = (float*)clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0
4490 , numRandomNumberPerBuffer*sizeof(float), 0, NULL, NULL, &clStatus);
4491 if (clStatus != CL_SUCCESS)
4492 {
cristya22457d2013-12-07 14:03:06 +00004493 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004494 goto cleanup;
4495 }
4496
4497#if defined(MAGICKCORE_OPENMP_SUPPORT)
4498 #pragma omp parallel for schedule(static,4) \
4499 num_threads((key == ~0UL) == 0 ? 1 : (size_t) GetMagickResourceLimit(ThreadResource))
4500#endif
4501 for (i = 0; i < numRandomNumberPerBuffer; i++)
4502 {
4503 const int id = GetOpenMPThreadId();
4504 randomNumberBufferPtr[i] = (float)GetPseudoRandomValue(random_info[id]);
4505 }
4506
4507 clStatus = clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL);
4508 if (clStatus != CL_SUCCESS)
4509 {
cristya22457d2013-12-07 14:03:06 +00004510 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004511 goto cleanup;
4512 }
4513
4514 /* set the row offset */
4515 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4516 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4517 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4518 }
4519
4520 if (ALIGNED(filteredPixels,CLPixelPacket))
4521 {
4522 length = inputImage->columns * inputImage->rows;
4523 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4524 }
4525 else
4526 {
4527 length = inputImage->columns * inputImage->rows;
4528 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4529 }
4530 if (clStatus != CL_SUCCESS)
4531 {
cristya22457d2013-12-07 14:03:06 +00004532 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004533 goto cleanup;
4534 }
4535
cristye85d0f72013-11-27 02:25:43 +00004536 outputReady = MagickTrue;
cristya22457d2013-12-07 14:03:06 +00004537
cristye85d0f72013-11-27 02:25:43 +00004538cleanup:
cristya22457d2013-12-07 14:03:06 +00004539 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4540
cristye85d0f72013-11-27 02:25:43 +00004541 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4542 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4543 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4544 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4545 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4546 if (outputReady == MagickFalse
4547 && filteredImage != NULL)
4548 {
4549 DestroyImage(filteredImage);
4550 filteredImage = NULL;
4551 }
4552 return filteredImage;
4553}
4554
4555
4556static Image* ComputeAddNoiseImageOptRandomNum(const Image* inputImage,
4557 const ChannelType channel, const NoiseType noise_type,
4558 ExceptionInfo *exception)
4559{
4560 MagickBooleanType outputReady = MagickFalse;
4561 MagickCLEnv clEnv = NULL;
4562
4563 cl_int clStatus;
4564 size_t global_work_size[2];
4565 size_t random_work_size;
4566
4567 const void *inputPixels = NULL;
4568 Image* filteredImage = NULL;
4569 void *filteredPixels = NULL;
4570 void *hostPtr;
4571 unsigned int inputColumns, inputRows;
4572 float attenuate;
4573 MagickSizeType length;
4574 unsigned int numRandomNumberPerPixel;
4575 unsigned int numRowsPerKernelLaunch;
4576 unsigned int numRandomNumberPerBuffer;
4577 unsigned int numRandomNumberGenerators;
4578 unsigned int initRandom;
4579 float fNormalize;
4580 unsigned int r;
4581 unsigned int k;
4582 int i;
4583 const char *option;
4584
4585 cl_mem_flags mem_flags;
4586 cl_context context = NULL;
4587 cl_mem inputImageBuffer = NULL;
4588 cl_mem randomNumberBuffer = NULL;
4589 cl_mem filteredImageBuffer = NULL;
4590 cl_mem randomNumberSeedsBuffer = NULL;
4591 cl_command_queue queue = NULL;
4592 cl_kernel addNoiseKernel = NULL;
4593 cl_kernel randomNumberGeneratorKernel = NULL;
4594
4595
4596 clEnv = GetDefaultOpenCLEnv();
4597 context = GetOpenCLContext(clEnv);
4598 queue = AcquireOpenCLCommandQueue(clEnv);
4599
4600 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4601 if (inputPixels == (void *) NULL)
4602 {
cristya22457d2013-12-07 14:03:06 +00004603 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004604 goto cleanup;
4605 }
4606
4607 if (ALIGNED(inputPixels,CLPixelPacket))
4608 {
4609 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4610 }
4611 else
4612 {
4613 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4614 }
4615 /* create a CL buffer from image pixel buffer */
4616 length = inputImage->columns * inputImage->rows;
4617 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4618 if (clStatus != CL_SUCCESS)
4619 {
cristya22457d2013-12-07 14:03:06 +00004620 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004621 goto cleanup;
4622 }
4623
4624
4625 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4626 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004627 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristye85d0f72013-11-27 02:25:43 +00004628 {
cristya22457d2013-12-07 14:03:06 +00004629 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004630 goto cleanup;
4631 }
4632 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4633 if (filteredPixels == (void *) NULL)
4634 {
cristya22457d2013-12-07 14:03:06 +00004635 (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
cristye85d0f72013-11-27 02:25:43 +00004636 goto cleanup;
4637 }
4638
4639 if (ALIGNED(filteredPixels,CLPixelPacket))
4640 {
4641 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4642 hostPtr = filteredPixels;
4643 }
4644 else
4645 {
4646 mem_flags = CL_MEM_WRITE_ONLY;
4647 hostPtr = NULL;
4648 }
4649 /* create a CL buffer from image pixel buffer */
4650 length = inputImage->columns * inputImage->rows;
4651 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4652 if (clStatus != CL_SUCCESS)
4653 {
cristya22457d2013-12-07 14:03:06 +00004654 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004655 goto cleanup;
4656 }
4657
4658 /* find out how many random numbers needed by pixel */
4659 numRandomNumberPerPixel = 0;
4660 {
4661 unsigned int numRandPerChannel = 0;
4662 switch (noise_type)
4663 {
4664 case UniformNoise:
4665 case ImpulseNoise:
4666 case LaplacianNoise:
4667 case RandomNoise:
4668 default:
4669 numRandPerChannel = 1;
4670 break;
4671 case GaussianNoise:
4672 case MultiplicativeGaussianNoise:
4673 case PoissonNoise:
4674 numRandPerChannel = 2;
4675 break;
4676 };
4677
4678 if ((channel & RedChannel) != 0)
4679 numRandomNumberPerPixel+=numRandPerChannel;
4680 if ((channel & GreenChannel) != 0)
4681 numRandomNumberPerPixel+=numRandPerChannel;
4682 if ((channel & BlueChannel) != 0)
4683 numRandomNumberPerPixel+=numRandPerChannel;
4684 if ((channel & OpacityChannel) != 0)
4685 numRandomNumberPerPixel+=numRandPerChannel;
4686 }
4687
4688 numRowsPerKernelLaunch = 512;
4689
4690 /* create a buffer for random numbers */
4691 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4692 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float)
4693 , NULL, &clStatus);
4694
4695 {
4696 /* setup the random number generators */
4697 unsigned long* seeds;
4698 numRandomNumberGenerators = 512;
4699 randomNumberSeedsBuffer = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE
4700 , numRandomNumberGenerators * 4 * sizeof(unsigned long), NULL, &clStatus);
4701 if (clStatus != CL_SUCCESS)
4702 {
cristya22457d2013-12-07 14:03:06 +00004703 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004704 goto cleanup;
4705 }
4706 seeds = (unsigned long*) clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0
4707 , numRandomNumberGenerators*4*sizeof(unsigned long), 0, NULL, NULL, &clStatus);
4708 if (clStatus != CL_SUCCESS)
4709 {
cristya22457d2013-12-07 14:03:06 +00004710 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004711 goto cleanup;
4712 }
4713
4714 for (i = 0; i < numRandomNumberGenerators; i++) {
4715 RandomInfo* randomInfo = AcquireRandomInfo();
4716 const unsigned long* s = GetRandomInfoSeed(randomInfo);
4717
4718 if (i == 0)
4719 fNormalize = GetRandomInfoNormalize(randomInfo);
4720
4721 seeds[i*4] = s[0];
4722 randomInfo = DestroyRandomInfo(randomInfo);
4723 }
4724
4725 clStatus = clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL);
4726 if (clStatus != CL_SUCCESS)
4727 {
cristya22457d2013-12-07 14:03:06 +00004728 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
cristye85d0f72013-11-27 02:25:43 +00004729 goto cleanup;
4730 }
4731
4732 randomNumberGeneratorKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE
4733 ,"randomNumberGeneratorKernel");
4734
4735 k = 0;
4736 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer);
4737 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize);
4738 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4739 initRandom = 1;
4740 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom);
4741 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer);
4742
4743 random_work_size = numRandomNumberGenerators;
4744 }
4745
4746 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4747 k = 0;
4748 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4749 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4750 inputColumns = inputImage->columns;
4751 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4752 inputRows = inputImage->rows;
4753 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4754 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4755 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4756 attenuate=1.0f;
4757 option=GetImageArtifact(inputImage,"attenuate");
4758 if (option != (char *) NULL)
4759 attenuate=(float)StringToDouble(option,(char **) NULL);
4760 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4761 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4762 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4763
4764 global_work_size[0] = inputColumns;
4765 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4766 {
4767 size_t generator_local_size = 64;
4768 /* Generate random numbers in the buffer */
4769 clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL
4770 ,&random_work_size,&generator_local_size,0,NULL,NULL);
4771 if (initRandom != 0)
4772 {
4773 /* make sure we only do init once */
4774 initRandom = 0;
4775 clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom);
4776 }
4777
4778 /* set the row offset */
4779 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4780 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4781 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4782 }
4783
4784 if (ALIGNED(filteredPixels,CLPixelPacket))
4785 {
4786 length = inputImage->columns * inputImage->rows;
4787 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4788 }
4789 else
4790 {
4791 length = inputImage->columns * inputImage->rows;
4792 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4793 }
4794 if (clStatus != CL_SUCCESS)
4795 {
cristya22457d2013-12-07 14:03:06 +00004796 (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
cristye85d0f72013-11-27 02:25:43 +00004797 goto cleanup;
4798 }
4799
cristye85d0f72013-11-27 02:25:43 +00004800 outputReady = MagickTrue;
cristya22457d2013-12-07 14:03:06 +00004801
cristye85d0f72013-11-27 02:25:43 +00004802cleanup:
cristya22457d2013-12-07 14:03:06 +00004803 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4804
cristye85d0f72013-11-27 02:25:43 +00004805 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4806 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4807 if (randomNumberGeneratorKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomNumberGeneratorKernel);
4808 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4809 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4810 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4811 if (randomNumberSeedsBuffer!=NULL) clReleaseMemObject(randomNumberSeedsBuffer);
4812 if (outputReady == MagickFalse
4813 && filteredImage != NULL)
4814 {
4815 DestroyImage(filteredImage);
4816 filteredImage = NULL;
4817 }
4818 return filteredImage;
4819}
4820
4821
4822
4823MagickExport
4824Image* AccelerateAddNoiseImage(const Image *image, const ChannelType channel,
4825 const NoiseType noise_type,ExceptionInfo *exception)
4826{
4827 MagickBooleanType status;
4828 Image* filteredImage = NULL;
4829
4830 assert(image != NULL);
4831 assert(exception != NULL);
4832
4833 status = checkOpenCLEnvironment(exception);
4834 if (status == MagickFalse)
4835 return NULL;
4836
dirk5dcb7622013-12-01 10:43:43 +00004837 status = checkAccelerateCondition(image, channel);
cristye85d0f72013-11-27 02:25:43 +00004838 if (status == MagickFalse)
4839 return NULL;
4840
dirke3c5f892013-12-10 06:04:40 +00004841DisableMSCWarning(4127)
cristye85d0f72013-11-27 02:25:43 +00004842 if (sizeof(unsigned long) == 4)
dirke3c5f892013-12-10 06:04:40 +00004843RestoreMSCWarning
cristye85d0f72013-11-27 02:25:43 +00004844 filteredImage = ComputeAddNoiseImageOptRandomNum(image,channel,noise_type,exception);
4845 else
4846 filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception);
4847
cristye85d0f72013-11-27 02:25:43 +00004848 return filteredImage;
4849}
4850
4851
cristyf034abb2013-11-24 14:16:14 +00004852#else /* MAGICKCORE_OPENCL_SUPPORT */
4853
4854MagickExport Image *AccelerateConvolveImageChannel(
4855 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4856 const KernelInfo *magick_unused(kernel),
4857 ExceptionInfo *magick_unused(exception))
4858{
4859 magick_unreferenced(image);
4860 magick_unreferenced(channel);
4861 magick_unreferenced(kernel);
4862 magick_unreferenced(exception);
4863
4864 return NULL;
4865}
4866
4867MagickExport MagickBooleanType AccelerateFunctionImage(
4868 Image *magick_unused(image),const ChannelType magick_unused(channel),
4869 const MagickFunction magick_unused(function),
4870 const size_t magick_unused(number_parameters),
4871 const double *magick_unused(parameters),
4872 ExceptionInfo *magick_unused(exception))
4873{
4874 magick_unreferenced(image);
4875 magick_unreferenced(channel);
4876 magick_unreferenced(function);
4877 magick_unreferenced(number_parameters);
4878 magick_unreferenced(parameters);
4879 magick_unreferenced(exception);
4880
4881 return MagickFalse;
4882}
4883
4884MagickExport Image *AccelerateBlurImage(const Image *magick_unused(image),
4885 const ChannelType magick_unused(channel),const double magick_unused(radius),
4886 const double magick_unused(sigma),ExceptionInfo *magick_unused(exception))
4887{
4888 magick_unreferenced(image);
4889 magick_unreferenced(channel);
4890 magick_unreferenced(radius);
4891 magick_unreferenced(sigma);
4892 magick_unreferenced(exception);
4893
4894 return NULL;
4895}
4896
4897MagickExport Image *AccelerateRadialBlurImage(
4898 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4899 const double magick_unused(angle),ExceptionInfo *magick_unused(exception))
4900{
4901 magick_unreferenced(image);
4902 magick_unreferenced(channel);
4903 magick_unreferenced(angle);
4904 magick_unreferenced(exception);
4905
4906 return NULL;
4907}
4908
4909
4910MagickExport Image *AccelerateUnsharpMaskImage(
4911 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4912 const double magick_unused(radius),const double magick_unused(sigma),
4913 const double magick_unused(gain),const double magick_unused(threshold),
4914 ExceptionInfo *magick_unused(exception))
4915{
4916 magick_unreferenced(image);
4917 magick_unreferenced(channel);
4918 magick_unreferenced(radius);
4919 magick_unreferenced(sigma);
4920 magick_unreferenced(gain);
4921 magick_unreferenced(threshold);
4922 magick_unreferenced(exception);
4923
4924 return NULL;
4925}
4926
4927
4928MagickExport MagickBooleanType AccelerateContrastImage(
4929 Image* magick_unused(image),const MagickBooleanType magick_unused(sharpen),
4930 ExceptionInfo* magick_unused(exception))
4931{
4932 magick_unreferenced(image);
4933 magick_unreferenced(sharpen);
4934 magick_unreferenced(exception);
4935
4936 return MagickFalse;
4937}
4938
4939MagickExport MagickBooleanType AccelerateEqualizeImage(
4940 Image* magick_unused(image), const ChannelType magick_unused(channel),
4941 ExceptionInfo* magick_unused(exception))
4942{
4943 magick_unreferenced(image);
4944 magick_unreferenced(channel);
4945 magick_unreferenced(exception);
4946
4947 return MagickFalse;
4948}
4949
4950MagickExport Image *AccelerateDespeckleImage(const Image* magick_unused(image),
4951 ExceptionInfo* magick_unused(exception))
4952{
4953 magick_unreferenced(image);
4954 magick_unreferenced(exception);
4955
4956 return NULL;
4957}
4958
4959MagickExport Image *AccelerateResizeImage(const Image* magick_unused(image),
4960 const size_t magick_unused(resizedColumns),
4961 const size_t magick_unused(resizedRows),
4962 const ResizeFilter* magick_unused(resizeFilter),
4963 ExceptionInfo *magick_unused(exception))
4964{
4965 magick_unreferenced(image);
4966 magick_unreferenced(resizedColumns);
4967 magick_unreferenced(resizedRows);
4968 magick_unreferenced(resizeFilter);
4969 magick_unreferenced(exception);
4970
4971 return NULL;
4972}
4973
4974
4975MagickExport
4976MagickBooleanType AccelerateModulateImage(
4977 Image* image, double percent_brightness, double percent_hue,
4978 double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
4979{
4980 magick_unreferenced(image);
4981 magick_unreferenced(percent_brightness);
4982 magick_unreferenced(percent_hue);
4983 magick_unreferenced(percent_saturation);
4984 magick_unreferenced(colorspace);
4985 magick_unreferenced(exception);
4986 return(MagickFalse);
4987}
4988
cristye85d0f72013-11-27 02:25:43 +00004989MagickExport Image *AccelerateAddNoiseImage(const Image *image,
4990 const ChannelType channel, const NoiseType noise_type,ExceptionInfo *exception)
4991{
4992 magick_unreferenced(image);
4993 magick_unreferenced(channel);
4994 magick_unreferenced(noise_type);
4995 magick_unreferenced(exception);
4996 return NULL;
4997}
cristyf034abb2013-11-24 14:16:14 +00004998
4999#endif /* MAGICKCORE_OPENCL_SUPPORT */
5000
5001MagickExport MagickBooleanType AccelerateConvolveImage(
5002 const Image *magick_unused(image),const KernelInfo *magick_unused(kernel),
5003 Image *magick_unused(convolve_image),ExceptionInfo *magick_unused(exception))
5004{
5005 magick_unreferenced(image);
5006 magick_unreferenced(kernel);
5007 magick_unreferenced(convolve_image);
5008 magick_unreferenced(exception);
5009
5010 /* legacy, do not use */
5011 return(MagickFalse);
5012}
5013