blob: 235de0f1cc1beb91b5271080d08ff2c12f441eec [file] [log] [blame]
cristy3f6d1482010-01-20 21:01:21 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7% A A C C E L E R R A A T E %
8% AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9% A A C C E L E R R A A T E %
10% A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11% %
12% %
13% MagickCore Acceleration Methods %
14% %
15% Software Design %
cristy0d127ab2010-05-14 23:29:46 +000016% John Cristy %
cristyf034abb2013-11-24 14:16:14 +000017% SiuChi Chan %
18% Guansong Zhang %
cristy3f6d1482010-01-20 21:01:21 +000019% January 2010 %
20% %
21% %
cristyfe676ee2013-11-18 13:03:38 +000022% Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization %
cristy3f6d1482010-01-20 21:01:21 +000023% dedicated to making software imaging solutions freely available. %
24% %
25% You may not use this file except in compliance with the License. You may %
26% obtain a copy of the License at %
27% %
28% http://www.imagemagick.org/script/license.php %
29% %
30% Unless required by applicable law or agreed to in writing, software %
31% distributed under the License is distributed on an "AS IS" BASIS, %
32% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
33% See the License for the specific language governing permissions and %
34% limitations under the License. %
35% %
36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cristy3f6d1482010-01-20 21:01:21 +000037*/
cristyf034abb2013-11-24 14:16:14 +000038
cristy3f6d1482010-01-20 21:01:21 +000039/*
cristyf034abb2013-11-24 14:16:14 +000040Include declarations.
cristy3f6d1482010-01-20 21:01:21 +000041*/
cristy4c08aed2011-07-01 19:47:50 +000042#include "MagickCore/studio.h"
43#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000044#include "MagickCore/accelerate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000045#include "MagickCore/artifact.h"
cristy35f33492011-07-07 16:54:49 +000046#include "MagickCore/cache.h"
cristyd1dd6e42011-09-04 01:46:08 +000047#include "MagickCore/cache-private.h"
cristy4c08aed2011-07-01 19:47:50 +000048#include "MagickCore/cache-view.h"
49#include "MagickCore/color-private.h"
cristy7f866842013-07-11 01:15:58 +000050#include "MagickCore/delegate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000051#include "MagickCore/enhance.h"
52#include "MagickCore/exception.h"
53#include "MagickCore/exception-private.h"
54#include "MagickCore/gem.h"
55#include "MagickCore/hashmap.h"
56#include "MagickCore/image.h"
57#include "MagickCore/image-private.h"
58#include "MagickCore/list.h"
59#include "MagickCore/memory_.h"
60#include "MagickCore/monitor-private.h"
61#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000062#include "MagickCore/opencl.h"
63#include "MagickCore/opencl-private.h"
cristy4c08aed2011-07-01 19:47:50 +000064#include "MagickCore/option.h"
cristyf034abb2013-11-24 14:16:14 +000065#include "MagickCore/pixel-private.h"
cristy4c08aed2011-07-01 19:47:50 +000066#include "MagickCore/prepress.h"
67#include "MagickCore/quantize.h"
cristye85d0f72013-11-27 02:25:43 +000068#include "MagickCore/random_.h"
69#include "MagickCore/random-private.h"
cristy4c08aed2011-07-01 19:47:50 +000070#include "MagickCore/registry.h"
cristyf034abb2013-11-24 14:16:14 +000071#include "MagickCore/resize.h"
72#include "MagickCore/resize-private.h"
cristy4c08aed2011-07-01 19:47:50 +000073#include "MagickCore/semaphore.h"
74#include "MagickCore/splay-tree.h"
75#include "MagickCore/statistic.h"
76#include "MagickCore/string_.h"
77#include "MagickCore/string-private.h"
78#include "MagickCore/token.h"
cristyf034abb2013-11-24 14:16:14 +000079
80#ifdef MAGICKCORE_CLPERFMARKER
81#include "CLPerfMarker.h"
82#endif
83
cristye85d0f72013-11-27 02:25:43 +000084#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
85#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
86
cristyf034abb2013-11-24 14:16:14 +000087#if defined(MAGICKCORE_OPENCL_SUPPORT)
88
89#define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0)
90/*#define ALIGNED(pointer,type) (0) */
91
92static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
93{
94 MagickBooleanType flag;
95
96 MagickCLEnv clEnv;
97 clEnv = GetDefaultOpenCLEnv();
98
99 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
100 , sizeof(MagickBooleanType), &flag, exception);
101 if (flag == MagickTrue)
102 return MagickFalse;
103
104 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED
105 , sizeof(MagickBooleanType), &flag, exception);
106 if (flag == MagickFalse)
107 {
108 if(InitOpenCLEnv(clEnv, exception) == MagickFalse)
109 return MagickFalse;
110 }
111
112 return MagickTrue;
113}
114
115
116static MagickBooleanType checkAccelerateCondition(const Image* image, const ChannelType channel, ExceptionInfo *exception)
117{
118 /* check if the image's colorspace is supported */
119 if (image->colorspace != RGBColorspace
120 && image->colorspace != sRGBColorspace)
121 return MagickFalse;
122
123 /* check if the channel is supported */
124 if (((channel&RedChannel) == 0)
125 || ((channel&GreenChannel) == 0)
126 || ((channel&BlueChannel) == 0))
127 {
128 return MagickFalse;
129 }
130
131
132 /* check if if the virtual pixel method is compatible with the OpenCL implementation */
133 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod)&&
134 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
135 return MagickFalse;
136
137 return MagickTrue;
138}
139
140
141static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
142{
143 MagickBooleanType outputReady;
144 MagickCLEnv clEnv;
145
146 cl_int clStatus;
147 size_t global_work_size[2];
148 size_t localGroupSize[2];
149 size_t localMemoryRequirement;
150 Image* filteredImage;
151 MagickSizeType length;
152 const void *inputPixels;
153 void *filteredPixels;
154 cl_mem_flags mem_flags;
155 float* kernelBufferPtr;
156 unsigned kernelSize;
157 unsigned int i;
158 void *hostPtr;
159 unsigned int matte, filterWidth, filterHeight, imageWidth, imageHeight;
160
161 cl_context context;
162 cl_kernel clkernel;
163 cl_mem inputImageBuffer, filteredImageBuffer, convolutionKernel;
164 cl_ulong deviceLocalMemorySize;
165 cl_device_id device;
166
167 cl_command_queue queue;
168
169 /* intialize all CL objects to NULL */
170 context = NULL;
171 inputImageBuffer = NULL;
172 filteredImageBuffer = NULL;
173 convolutionKernel = NULL;
174 clkernel = NULL;
175 queue = NULL;
176 device = NULL;
177
178 filteredImage = NULL;
179 outputReady = MagickFalse;
180
181 clEnv = GetDefaultOpenCLEnv();
182 context = GetOpenCLContext(clEnv);
183
184 inputPixels = NULL;
185 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
186 if (inputPixels == (const void *) NULL)
187 {
188 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
189 goto cleanup;
190 }
191
192 /* Create and initialize OpenCL buffers. */
193
194 /* If the host pointer is aligned to the size of CLPixelPacket,
195 then use the host buffer directly from the GPU; otherwise,
196 create a buffer on the GPU and copy the data over */
197 if (ALIGNED(inputPixels,CLPixelPacket))
198 {
199 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
200 }
201 else
202 {
203 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
204 }
205 /* create a CL buffer from image pixel buffer */
206 length = inputImage->columns * inputImage->rows;
207 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
208 if (clStatus != CL_SUCCESS)
209 {
210 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
211 goto cleanup;
212 }
213
214 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
215 assert(filteredImage != NULL);
216 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
217 {
218 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
219 goto cleanup;
220 }
221 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
222 if (filteredPixels == (void *) NULL)
223 {
224 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
225 goto cleanup;
226 }
227
228 if (ALIGNED(filteredPixels,CLPixelPacket))
229 {
230 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
231 hostPtr = filteredPixels;
232 }
233 else
234 {
235 mem_flags = CL_MEM_WRITE_ONLY;
236 hostPtr = NULL;
237 }
238 /* create a CL buffer from image pixel buffer */
239 length = inputImage->columns * inputImage->rows;
240 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
241 if (clStatus != CL_SUCCESS)
242 {
243 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
244 goto cleanup;
245 }
246
247 kernelSize = kernel->width * kernel->height;
248 convolutionKernel = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
249 if (clStatus != CL_SUCCESS)
250 {
251 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
252 goto cleanup;
253 }
254
255 queue = AcquireOpenCLCommandQueue(clEnv);
256
257 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
258 , 0, NULL, NULL, &clStatus);
259 if (clStatus != CL_SUCCESS)
260 {
261 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
262 goto cleanup;
263 }
264 for (i = 0; i < kernelSize; i++)
265 {
266 kernelBufferPtr[i] = (float) kernel->values[i];
267 }
268 clStatus = clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
269 if (clStatus != CL_SUCCESS)
270 {
271 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
272 goto cleanup;
273 }
274 clFlush(queue);
275
276 /* Compute the local memory requirement for a 16x16 workgroup.
277 If it's larger than 16k, reduce the workgroup size to 8x8 */
278 localGroupSize[0] = 16;
279 localGroupSize[1] = 16;
280 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
281 + kernel->width*kernel->height*sizeof(float);
282 if (localMemoryRequirement > 16384)
283 {
284
285
286 localGroupSize[0] = 8;
287 localGroupSize[1] = 8;
288
289 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
290 + kernel->width*kernel->height*sizeof(float);
291 }
292
293 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_DEVICE, sizeof(cl_device_id), &device, exception);
294 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &deviceLocalMemorySize, NULL);
295 if (localMemoryRequirement <= deviceLocalMemorySize)
296 {
297 /* get the OpenCL kernel */
298 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
299 if (clkernel == NULL)
300 {
301 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
302 goto cleanup;
303 }
304
305 /* set the kernel arguments */
306 i = 0;
307 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
308 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
309 imageWidth = inputImage->columns;
310 imageHeight = inputImage->rows;
311 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
312 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
313 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
314 filterWidth = kernel->width;
315 filterHeight = kernel->height;
316 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
317 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
318 matte = (inputImage->matte==MagickTrue)?1:0;
319 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
320 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
321 clStatus|=clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
322 clStatus|=clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
323 if (clStatus != CL_SUCCESS)
324 {
325 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
326 goto cleanup;
327 }
328
329 /* pad the global size to a multiple of the local work size dimension */
330 global_work_size[0] = ((inputImage->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
331 global_work_size[1] = ((inputImage->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
332
333 /* launch the kernel */
334 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
335 if (clStatus != CL_SUCCESS)
336 {
337 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
338 goto cleanup;
339 }
340 }
341 else
342 {
343 /* get the OpenCL kernel */
344 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
345 if (clkernel == NULL)
346 {
347 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
348 goto cleanup;
349 }
350
351 /* set the kernel arguments */
352 i = 0;
353 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
354 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
355 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
356 filterWidth = kernel->width;
357 filterHeight = kernel->height;
358 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
359 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
360 matte = (inputImage->matte==MagickTrue)?1:0;
361 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
362 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
363 if (clStatus != CL_SUCCESS)
364 {
365 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
366 goto cleanup;
367 }
368
369 global_work_size[0] = inputImage->columns;
370 global_work_size[1] = inputImage->rows;
371
372 /* launch the kernel */
373 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
374 if (clStatus != CL_SUCCESS)
375 {
376 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
377 goto cleanup;
378 }
379 }
380 clFlush(queue);
381
382 if (ALIGNED(filteredPixels,CLPixelPacket))
383 {
384 length = inputImage->columns * inputImage->rows;
385 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
386 }
387 else
388 {
389 length = inputImage->columns * inputImage->rows;
390 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
391 }
392 if (clStatus != CL_SUCCESS)
393 {
394 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
395 goto cleanup;
396 }
397
398 /* everything is fine! :) */
399 outputReady = MagickTrue;
400
401
402cleanup:
403
404 if (inputImageBuffer != NULL)
405 clReleaseMemObject(inputImageBuffer);
406
407 if (filteredImageBuffer != NULL)
408 clReleaseMemObject(filteredImageBuffer);
409
410 if (convolutionKernel != NULL)
411 clReleaseMemObject(convolutionKernel);
412
413 if (clkernel != NULL)
414 RelinquishOpenCLKernel(clEnv, clkernel);
415
416 if (queue != NULL)
417 RelinquishOpenCLCommandQueue(clEnv, queue);
418
419 if (outputReady == MagickFalse)
420 {
421 if (filteredImage != NULL)
422 {
423 DestroyImage(filteredImage);
424 filteredImage = NULL;
425 }
426 }
427
428 return filteredImage;
429}
430
cristy3f6d1482010-01-20 21:01:21 +0000431/*
432%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
433% %
434% %
435% %
cristyf034abb2013-11-24 14:16:14 +0000436% C o n v o l v e I m a g e w i t h O p e n C L %
cristy3f6d1482010-01-20 21:01:21 +0000437% %
438% %
439% %
440%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
441%
cristyf034abb2013-11-24 14:16:14 +0000442% ConvolveImage() applies a custom convolution kernel to the image.
cristy3f6d1482010-01-20 21:01:21 +0000443%
cristyf034abb2013-11-24 14:16:14 +0000444% The format of the ConvolveImage method is:
cristy3f6d1482010-01-20 21:01:21 +0000445%
cristyf034abb2013-11-24 14:16:14 +0000446% Image *ConvolveImage(const Image *image,const size_t order,
447% const double *kernel,ExceptionInfo *exception)
448% Image *ConvolveImageChannel(const Image *image,const ChannelType channel,
449% const size_t order,const double *kernel,ExceptionInfo *exception)
450%
451% A description of each parameter follows:
452%
453% o image: the image.
454%
455% o channel: the channel type.
456%
457% o kernel: kernel info.
458%
459% o exception: return any errors or warnings in this structure.
460%
461*/
462
463MagickExport Image* AccelerateConvolveImageChannel(const Image *image, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
464{
465 MagickBooleanType status;
466 Image* filteredImage = NULL;
467
468 assert(image != NULL);
469 assert(kernel != (KernelInfo *) NULL);
470 assert(exception != (ExceptionInfo *) NULL);
471
472 status = checkOpenCLEnvironment(exception);
473 if (status == MagickFalse)
474 return NULL;
475
476 status = checkAccelerateCondition(image, channel, exception);
477 if (status == MagickFalse)
478 return NULL;
479
480 filteredImage = ComputeConvolveImage(image, channel, kernel, exception);
481 OpenCLLogException(__FUNCTION__,__LINE__,exception);
482 return filteredImage;
483}
484
485static MagickBooleanType ComputeFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
486 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
487{
488 MagickBooleanType status;
489
490 MagickCLEnv clEnv;
491
492 MagickSizeType length;
493 void* pixels;
494 float* parametersBufferPtr;
495
496 cl_int clStatus;
497 cl_context context;
498 cl_kernel clkernel;
499 cl_command_queue queue;
500 cl_mem_flags mem_flags;
501 cl_mem imageBuffer;
502 cl_mem parametersBuffer;
503 size_t globalWorkSize[2];
504
505 unsigned int i;
506
507 status = MagickFalse;
508
509 context = NULL;
510 clkernel = NULL;
511 queue = NULL;
512 imageBuffer = NULL;
513 parametersBuffer = NULL;
514
515 clEnv = GetDefaultOpenCLEnv();
516 context = GetOpenCLContext(clEnv);
517
518 pixels = GetPixelCachePixels(image, &length, exception);
519 if (pixels == (void *) NULL)
520 {
521 (void) ThrowMagickException(exception, GetMagickModule(), CacheWarning,
522 "GetPixelCachePixels failed.",
523 "'%s'", image->filename);
524 goto cleanup;
525 }
526
527
528 if (ALIGNED(pixels,CLPixelPacket))
529 {
530 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
531 }
532 else
533 {
534 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
535 }
536 /* create a CL buffer from image pixel buffer */
537 length = image->columns * image->rows;
538 imageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
539 if (clStatus != CL_SUCCESS)
540 {
541 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
542 goto cleanup;
543 }
544
545 parametersBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
546 if (clStatus != CL_SUCCESS)
547 {
548 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
549 goto cleanup;
550 }
551
552 queue = AcquireOpenCLCommandQueue(clEnv);
553
554 parametersBufferPtr = (float*)clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
555 , 0, NULL, NULL, &clStatus);
556 if (clStatus != CL_SUCCESS)
557 {
558 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
559 goto cleanup;
560 }
561 for (i = 0; i < number_parameters; i++)
562 {
563 parametersBufferPtr[i] = (float)parameters[i];
564 }
565 clStatus = clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
566 if (clStatus != CL_SUCCESS)
567 {
568 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
569 goto cleanup;
570 }
571 clFlush(queue);
572
573 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage");
574 if (clkernel == NULL)
575 {
576 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
577 goto cleanup;
578 }
579
580 /* set the kernel arguments */
581 i = 0;
582 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
583 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
584 clStatus|=clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
585 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
586 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
587 if (clStatus != CL_SUCCESS)
588 {
589 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
590 goto cleanup;
591 }
592
593 globalWorkSize[0] = image->columns;
594 globalWorkSize[1] = image->rows;
595 /* launch the kernel */
596 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
597 if (clStatus != CL_SUCCESS)
598 {
599 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
600 goto cleanup;
601 }
602 clFlush(queue);
603
604
605 if (ALIGNED(pixels,CLPixelPacket))
606 {
607 length = image->columns * image->rows;
608 clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
609 }
610 else
611 {
612 length = image->columns * image->rows;
613 clStatus = clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
614 }
615 if (clStatus != CL_SUCCESS)
616 {
617 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
618 goto cleanup;
619 }
620 status = MagickTrue;
621
622cleanup:
623
624 if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
625 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
626 if (imageBuffer != NULL) clReleaseMemObject(imageBuffer);
627 if (parametersBuffer != NULL) clReleaseMemObject(parametersBuffer);
628
629 return status;
630}
631
632
633
634MagickExport MagickBooleanType
635 AccelerateFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
636 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
637{
638 MagickBooleanType status;
639
640 status = MagickFalse;
641
642 assert(image != NULL);
643 assert(exception != (ExceptionInfo *) NULL);
644
645 status = checkOpenCLEnvironment(exception);
646 if (status == MagickTrue)
647 {
648 status = checkAccelerateCondition(image, channel, exception);
649 if (status == MagickTrue)
650 {
651 status = ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
652 OpenCLLogException(__FUNCTION__,__LINE__,exception);
653 }
654 }
655 return status;
656}
657
658
659static MagickBooleanType splitImage(const Image* inputImage)
660{
661 MagickBooleanType split;
662
663 MagickCLEnv clEnv;
664 unsigned long allocSize;
665 unsigned long tempSize;
666
667 clEnv = GetDefaultOpenCLEnv();
668
669 allocSize = GetOpenCLDeviceMaxMemAllocSize(clEnv);
670 tempSize = inputImage->columns * inputImage->rows * 4 * 4;
671
672 /*
673 printf("alloc size: %lu\n", allocSize);
674 printf("temp size: %lu\n", tempSize);
675 */
676
677 split = ((tempSize > allocSize) ? MagickTrue:MagickFalse);
678
679 return split;
680}
681
682static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
683{
684 MagickBooleanType outputReady;
685 Image* filteredImage;
686 MagickCLEnv clEnv;
687
688 cl_int clStatus;
689
690 const void *inputPixels;
691 void *filteredPixels;
692 cl_mem_flags mem_flags;
693
694 cl_context context;
695 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
696 cl_kernel blurRowKernel, blurColumnKernel;
697 cl_command_queue queue;
698
699 void* hostPtr;
700 float* kernelBufferPtr;
701 MagickSizeType length;
702
703 char geometry[MaxTextExtent];
704 KernelInfo* kernel = NULL;
705 unsigned int kernelWidth;
706 unsigned int imageColumns, imageRows;
707
708 unsigned int i;
709
710 context = NULL;
711 filteredImage = NULL;
712 inputImageBuffer = NULL;
713 tempImageBuffer = NULL;
714 filteredImageBuffer = NULL;
715 imageKernelBuffer = NULL;
716 blurRowKernel = NULL;
717 blurColumnKernel = NULL;
718 queue = NULL;
719
720 outputReady = MagickFalse;
721
722 clEnv = GetDefaultOpenCLEnv();
723 context = GetOpenCLContext(clEnv);
724 queue = AcquireOpenCLCommandQueue(clEnv);
725
726 /* Create and initialize OpenCL buffers. */
727 {
728 inputPixels = NULL;
729 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
730 if (inputPixels == (const void *) NULL)
731 {
732 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
733 goto cleanup;
734 }
735 /* If the host pointer is aligned to the size of CLPixelPacket,
736 then use the host buffer directly from the GPU; otherwise,
737 create a buffer on the GPU and copy the data over */
738 if (ALIGNED(inputPixels,CLPixelPacket))
739 {
740 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
741 }
742 else
743 {
744 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
745 }
746 /* create a CL buffer from image pixel buffer */
747 length = inputImage->columns * inputImage->rows;
748 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
749 if (clStatus != CL_SUCCESS)
750 {
751 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
752 goto cleanup;
753 }
754 }
755
756 /* create output */
757 {
758 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
759 assert(filteredImage != NULL);
760 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
761 {
762 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
763 goto cleanup;
764 }
765 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
766 if (filteredPixels == (void *) NULL)
767 {
768 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
769 goto cleanup;
770 }
771
772 if (ALIGNED(filteredPixels,CLPixelPacket))
773 {
774 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
775 hostPtr = filteredPixels;
776 }
777 else
778 {
779 mem_flags = CL_MEM_WRITE_ONLY;
780 hostPtr = NULL;
781 }
782 /* create a CL buffer from image pixel buffer */
783 length = inputImage->columns * inputImage->rows;
784 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
785 if (clStatus != CL_SUCCESS)
786 {
787 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
788 goto cleanup;
789 }
790 }
791
792 /* create processing kernel */
793 {
794 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
795 kernel=AcquireKernelInfo(geometry);
796 if (kernel == (KernelInfo *) NULL)
797 {
798 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
799 goto cleanup;
800 }
801
802 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
803 if (clStatus != CL_SUCCESS)
804 {
805 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
806 goto cleanup;
807 }
808 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
809 if (clStatus != CL_SUCCESS)
810 {
811 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
812 goto cleanup;
813 }
814
815 for (i = 0; i < kernel->width; i++)
816 {
817 kernelBufferPtr[i] = (float) kernel->values[i];
818 }
819
820 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
821 if (clStatus != CL_SUCCESS)
822 {
823 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
824 goto cleanup;
825 }
826 }
827
828 {
829
830 /* create temp buffer */
831 {
832 length = inputImage->columns * inputImage->rows;
833 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
834 if (clStatus != CL_SUCCESS)
835 {
836 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
837 goto cleanup;
838 }
839 }
840
841 /* get the OpenCL kernels */
842 {
843 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
844 if (blurRowKernel == NULL)
845 {
846 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
847 goto cleanup;
848 };
849
850 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
851 if (blurColumnKernel == NULL)
852 {
853 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
854 goto cleanup;
855 };
856 }
857
858 {
859 /* need logic to decide this value */
860 int chunkSize = 256;
861
862 {
863 imageColumns = inputImage->columns;
864 imageRows = inputImage->rows;
865
866 /* set the kernel arguments */
867 i = 0;
868 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
869 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
870 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
871 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
872 kernelWidth = kernel->width;
873 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
874 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
875 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
876 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
877 if (clStatus != CL_SUCCESS)
878 {
879 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
880 goto cleanup;
881 }
882 }
883
884 /* launch the kernel */
885 {
886 size_t gsize[2];
887 size_t wsize[2];
888
889 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
890 gsize[1] = inputImage->rows;
891 wsize[0] = chunkSize;
892 wsize[1] = 1;
893
894 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
895 if (clStatus != CL_SUCCESS)
896 {
897 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
898 goto cleanup;
899 }
900 clFlush(queue);
901 }
902 }
903
904 {
905 /* need logic to decide this value */
906 int chunkSize = 256;
907
908 {
909 imageColumns = inputImage->columns;
910 imageRows = inputImage->rows;
911
912 /* set the kernel arguments */
913 i = 0;
914 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
915 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
916 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
917 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
918 kernelWidth = kernel->width;
919 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
920 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
921 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
922 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
923 if (clStatus != CL_SUCCESS)
924 {
925 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
926 goto cleanup;
927 }
928 }
929
930 /* launch the kernel */
931 {
932 size_t gsize[2];
933 size_t wsize[2];
934
935 gsize[0] = inputImage->columns;
936 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
937 wsize[0] = 1;
938 wsize[1] = chunkSize;
939
940 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
941 if (clStatus != CL_SUCCESS)
942 {
943 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
944 goto cleanup;
945 }
946 clFlush(queue);
947 }
948 }
949
950 }
951
952 /* get result */
953 if (ALIGNED(filteredPixels,CLPixelPacket))
954 {
955 length = inputImage->columns * inputImage->rows;
956 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
957 }
958 else
959 {
960 length = inputImage->columns * inputImage->rows;
961 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
962 }
963 if (clStatus != CL_SUCCESS)
964 {
965 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
966 goto cleanup;
967 }
968
969 outputReady = MagickTrue;
970
971cleanup:
972 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
973 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
974 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
975 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
976 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
977 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
978 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
979 if (kernel!=NULL) DestroyKernelInfo(kernel);
980 if (outputReady == MagickFalse)
981 {
982 if (filteredImage != NULL)
983 {
984 DestroyImage(filteredImage);
985 filteredImage = NULL;
986 }
987 }
988 return filteredImage;
989}
990
991static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
992{
993 MagickBooleanType outputReady;
994 Image* filteredImage;
995 MagickCLEnv clEnv;
996
997 cl_int clStatus;
998
999 const void *inputPixels;
1000 void *filteredPixels;
1001 cl_mem_flags mem_flags;
1002
1003 cl_context context;
1004 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
1005 cl_kernel blurRowKernel, blurColumnKernel;
1006 cl_command_queue queue;
1007
1008 void* hostPtr;
1009 float* kernelBufferPtr;
1010 MagickSizeType length;
1011
1012 char geometry[MaxTextExtent];
1013 KernelInfo* kernel = NULL;
1014 unsigned int kernelWidth;
1015 unsigned int imageColumns, imageRows;
1016
1017 unsigned int i;
1018
1019 context = NULL;
1020 filteredImage = NULL;
1021 inputImageBuffer = NULL;
1022 tempImageBuffer = NULL;
1023 filteredImageBuffer = NULL;
1024 imageKernelBuffer = NULL;
1025 blurRowKernel = NULL;
1026 blurColumnKernel = NULL;
1027 queue = NULL;
1028
1029 outputReady = MagickFalse;
1030
1031 clEnv = GetDefaultOpenCLEnv();
1032 context = GetOpenCLContext(clEnv);
1033 queue = AcquireOpenCLCommandQueue(clEnv);
1034
1035 /* Create and initialize OpenCL buffers. */
1036 {
1037 inputPixels = NULL;
1038 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1039 if (inputPixels == (const void *) NULL)
1040 {
1041 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1042 goto cleanup;
1043 }
1044 /* If the host pointer is aligned to the size of CLPixelPacket,
1045 then use the host buffer directly from the GPU; otherwise,
1046 create a buffer on the GPU and copy the data over */
1047 if (ALIGNED(inputPixels,CLPixelPacket))
1048 {
1049 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1050 }
1051 else
1052 {
1053 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1054 }
1055 /* create a CL buffer from image pixel buffer */
1056 length = inputImage->columns * inputImage->rows;
1057 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1058 if (clStatus != CL_SUCCESS)
1059 {
1060 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1061 goto cleanup;
1062 }
1063 }
1064
1065 /* create output */
1066 {
1067 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1068 assert(filteredImage != NULL);
1069 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
1070 {
1071 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1072 goto cleanup;
1073 }
1074 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1075 if (filteredPixels == (void *) NULL)
1076 {
1077 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1078 goto cleanup;
1079 }
1080
1081 if (ALIGNED(filteredPixels,CLPixelPacket))
1082 {
1083 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1084 hostPtr = filteredPixels;
1085 }
1086 else
1087 {
1088 mem_flags = CL_MEM_WRITE_ONLY;
1089 hostPtr = NULL;
1090 }
1091 /* create a CL buffer from image pixel buffer */
1092 length = inputImage->columns * inputImage->rows;
1093 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1094 if (clStatus != CL_SUCCESS)
1095 {
1096 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1097 goto cleanup;
1098 }
1099 }
1100
1101 /* create processing kernel */
1102 {
1103 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1104 kernel=AcquireKernelInfo(geometry);
1105 if (kernel == (KernelInfo *) NULL)
1106 {
1107 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
1108 goto cleanup;
1109 }
1110
1111 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
1112 if (clStatus != CL_SUCCESS)
1113 {
1114 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1115 goto cleanup;
1116 }
1117 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1118 if (clStatus != CL_SUCCESS)
1119 {
1120 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
1121 goto cleanup;
1122 }
1123
1124 for (i = 0; i < kernel->width; i++)
1125 {
1126 kernelBufferPtr[i] = (float) kernel->values[i];
1127 }
1128
1129 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1130 if (clStatus != CL_SUCCESS)
1131 {
1132 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1133 goto cleanup;
1134 }
1135 }
1136
1137 {
1138 unsigned int offsetRows;
1139 unsigned int sec;
1140
1141 /* create temp buffer */
1142 {
1143 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
1144 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1145 if (clStatus != CL_SUCCESS)
1146 {
1147 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1148 goto cleanup;
1149 }
1150 }
1151
1152 /* get the OpenCL kernels */
1153 {
1154 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
1155 if (blurRowKernel == NULL)
1156 {
1157 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1158 goto cleanup;
1159 };
1160
1161 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumnSection");
1162 if (blurColumnKernel == NULL)
1163 {
1164 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1165 goto cleanup;
1166 };
1167 }
1168
1169 for (sec = 0; sec < 2; sec++)
1170 {
1171 {
1172 /* need logic to decide this value */
1173 int chunkSize = 256;
1174
1175 {
1176 imageColumns = inputImage->columns;
1177 if (sec == 0)
1178 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
1179 else
1180 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
1181
1182 offsetRows = sec * inputImage->rows / 2;
1183
1184 kernelWidth = kernel->width;
1185
1186 /* set the kernel arguments */
1187 i = 0;
1188 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1189 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1190 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1191 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1192 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1193 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1194 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1195 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1196 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1197 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
1198 if (clStatus != CL_SUCCESS)
1199 {
1200 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1201 goto cleanup;
1202 }
1203 }
1204
1205 /* launch the kernel */
1206 {
1207 size_t gsize[2];
1208 size_t wsize[2];
1209
1210 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
1211 gsize[1] = imageRows;
1212 wsize[0] = chunkSize;
1213 wsize[1] = 1;
1214
1215 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1216 if (clStatus != CL_SUCCESS)
1217 {
1218 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1219 goto cleanup;
1220 }
1221 clFlush(queue);
1222 }
1223 }
1224
1225 {
1226 /* need logic to decide this value */
1227 int chunkSize = 256;
1228
1229 {
1230 imageColumns = inputImage->columns;
1231 if (sec == 0)
1232 imageRows = inputImage->rows / 2;
1233 else
1234 imageRows = (inputImage->rows - inputImage->rows / 2);
1235
1236 offsetRows = sec * inputImage->rows / 2;
1237
1238 kernelWidth = kernel->width;
1239
1240 /* set the kernel arguments */
1241 i = 0;
1242 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1243 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1244 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1245 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1246 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1247 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1248 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1249 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
1250 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1251 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
1252 if (clStatus != CL_SUCCESS)
1253 {
1254 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1255 goto cleanup;
1256 }
1257 }
1258
1259 /* launch the kernel */
1260 {
1261 size_t gsize[2];
1262 size_t wsize[2];
1263
1264 gsize[0] = imageColumns;
1265 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
1266 wsize[0] = 1;
1267 wsize[1] = chunkSize;
1268
1269 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1270 if (clStatus != CL_SUCCESS)
1271 {
1272 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1273 goto cleanup;
1274 }
1275 clFlush(queue);
1276 }
1277 }
1278 }
1279
1280 }
1281
1282 /* get result */
1283 if (ALIGNED(filteredPixels,CLPixelPacket))
1284 {
1285 length = inputImage->columns * inputImage->rows;
1286 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1287 }
1288 else
1289 {
1290 length = inputImage->columns * inputImage->rows;
1291 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1292 }
1293 if (clStatus != CL_SUCCESS)
1294 {
1295 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1296 goto cleanup;
1297 }
1298
1299 outputReady = MagickTrue;
1300
1301cleanup:
1302 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1303 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1304 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1305 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1306 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1307 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1308 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1309 if (kernel!=NULL) DestroyKernelInfo(kernel);
1310 if (outputReady == MagickFalse)
1311 {
1312 if (filteredImage != NULL)
1313 {
1314 DestroyImage(filteredImage);
1315 filteredImage = NULL;
1316 }
1317 }
1318 return filteredImage;
1319}
1320
1321/*
1322%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1323% %
1324% %
1325% %
1326% B l u r I m a g e w i t h O p e n C L %
1327% %
1328% %
1329% %
1330%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1331%
1332% BlurImage() blurs an image. We convolve the image with a Gaussian operator
1333% of the given radius and standard deviation (sigma). For reasonable results,
1334% the radius should be larger than sigma. Use a radius of 0 and BlurImage()
1335% selects a suitable radius for you.
1336%
1337% The format of the BlurImage method is:
1338%
1339% Image *BlurImage(const Image *image,const double radius,
1340% const double sigma,ExceptionInfo *exception)
1341% Image *BlurImageChannel(const Image *image,const ChannelType channel,
1342% const double radius,const double sigma,ExceptionInfo *exception)
1343%
1344% A description of each parameter follows:
1345%
1346% o image: the image.
1347%
1348% o channel: the channel type.
1349%
1350% o radius: the radius of the Gaussian, in pixels, not counting the center
1351% pixel.
1352%
1353% o sigma: the standard deviation of the Gaussian, in pixels.
1354%
1355% o exception: return any errors or warnings in this structure.
1356%
1357*/
1358
1359MagickExport
1360Image* AccelerateBlurImage(const Image *image, const ChannelType channel, const double radius, const double sigma,ExceptionInfo *exception)
1361{
1362 MagickBooleanType status;
1363 Image* filteredImage = NULL;
1364
1365 assert(image != NULL);
1366 assert(exception != (ExceptionInfo *) NULL);
1367
1368 status = checkOpenCLEnvironment(exception);
1369 if (status == MagickFalse)
1370 return NULL;
1371
1372 status = checkAccelerateCondition(image, channel, exception);
1373 if (status == MagickFalse)
1374 return NULL;
1375
1376 if (splitImage(image) && (image->rows / 2 > radius))
1377 filteredImage = ComputeBlurImageSection(image, channel, radius, sigma, exception);
1378 else
1379 filteredImage = ComputeBlurImage(image, channel, radius, sigma, exception);
1380
1381 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1382 return filteredImage;
1383}
1384
1385
1386static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType channel, const double angle, ExceptionInfo *exception)
1387{
1388
1389 MagickBooleanType outputReady;
1390 Image* filteredImage;
1391 MagickCLEnv clEnv;
1392
1393 cl_int clStatus;
1394 size_t global_work_size[2];
1395
1396 cl_context context;
1397 cl_mem_flags mem_flags;
1398 cl_mem inputImageBuffer, filteredImageBuffer, sinThetaBuffer, cosThetaBuffer;
1399 cl_kernel radialBlurKernel;
1400 cl_command_queue queue;
1401
1402 const void *inputPixels;
1403 void *filteredPixels;
1404 void* hostPtr;
1405 float* sinThetaPtr;
1406 float* cosThetaPtr;
1407 MagickSizeType length;
1408 unsigned int matte;
1409 MagickPixelPacket bias;
1410 cl_float4 biasPixel;
1411 cl_float2 blurCenter;
1412 float blurRadius;
1413 unsigned int cossin_theta_size;
1414 float offset, theta;
1415
1416 unsigned int i;
1417
1418 outputReady = MagickFalse;
1419 context = NULL;
1420 filteredImage = NULL;
1421 inputImageBuffer = NULL;
1422 filteredImageBuffer = NULL;
1423 sinThetaBuffer = NULL;
1424 cosThetaBuffer = NULL;
1425 queue = NULL;
1426 radialBlurKernel = NULL;
1427
1428
1429 clEnv = GetDefaultOpenCLEnv();
1430 context = GetOpenCLContext(clEnv);
1431
1432
1433 /* Create and initialize OpenCL buffers. */
1434
1435 inputPixels = NULL;
1436 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1437 if (inputPixels == (const void *) NULL)
1438 {
1439 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1440 goto cleanup;
1441 }
1442
1443 /* If the host pointer is aligned to the size of CLPixelPacket,
1444 then use the host buffer directly from the GPU; otherwise,
1445 create a buffer on the GPU and copy the data over */
1446 if (ALIGNED(inputPixels,CLPixelPacket))
1447 {
1448 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1449 }
1450 else
1451 {
1452 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1453 }
1454 /* create a CL buffer from image pixel buffer */
1455 length = inputImage->columns * inputImage->rows;
1456 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1457 if (clStatus != CL_SUCCESS)
1458 {
1459 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1460 goto cleanup;
1461 }
1462
1463
1464 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1465 assert(filteredImage != NULL);
1466 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
1467 {
1468 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1469 goto cleanup;
1470 }
1471 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1472 if (filteredPixels == (void *) NULL)
1473 {
1474 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1475 goto cleanup;
1476 }
1477
1478 if (ALIGNED(filteredPixels,CLPixelPacket))
1479 {
1480 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1481 hostPtr = filteredPixels;
1482 }
1483 else
1484 {
1485 mem_flags = CL_MEM_WRITE_ONLY;
1486 hostPtr = NULL;
1487 }
1488 /* create a CL buffer from image pixel buffer */
1489 length = inputImage->columns * inputImage->rows;
1490 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1491 if (clStatus != CL_SUCCESS)
1492 {
1493 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1494 goto cleanup;
1495 }
1496
1497 blurCenter.s[0] = (float) (inputImage->columns-1)/2.0;
1498 blurCenter.s[1] = (float) (inputImage->rows-1)/2.0;
1499 blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
1500 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
1501
1502 /* create a buffer for sin_theta and cos_theta */
1503 sinThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1504 if (clStatus != CL_SUCCESS)
1505 {
1506 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1507 goto cleanup;
1508 }
1509 cosThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1510 if (clStatus != CL_SUCCESS)
1511 {
1512 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1513 goto cleanup;
1514 }
1515
1516
1517 queue = AcquireOpenCLCommandQueue(clEnv);
1518 sinThetaPtr = (float*) clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1519 if (clStatus != CL_SUCCESS)
1520 {
1521 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1522 goto cleanup;
1523 }
1524
1525 cosThetaPtr = (float*) clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1526 if (clStatus != CL_SUCCESS)
1527 {
1528 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1529 goto cleanup;
1530 }
1531
1532 theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
1533 offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
1534 for (i=0; i < (ssize_t) cossin_theta_size; i++)
1535 {
1536 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
1537 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
1538 }
1539
1540 clStatus = clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
1541 clStatus |= clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
1542 if (clStatus != CL_SUCCESS)
1543 {
1544 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1545 goto cleanup;
1546 }
1547
1548 /* get the OpenCL kernel */
1549 radialBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RadialBlur");
1550 if (radialBlurKernel == NULL)
1551 {
1552 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1553 goto cleanup;
1554 }
1555
1556
1557 /* set the kernel arguments */
1558 i = 0;
1559 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1560 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1561
1562 GetMagickPixelPacket(inputImage,&bias);
1563 biasPixel.s[0] = bias.red;
1564 biasPixel.s[1] = bias.green;
1565 biasPixel.s[2] = bias.blue;
1566 biasPixel.s[3] = bias.opacity;
1567 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
1568 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
1569
1570 matte = (inputImage->matte == MagickTrue)?1:0;
1571 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
1572
1573 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
1574
1575 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
1576 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
1577 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
1578 if (clStatus != CL_SUCCESS)
1579 {
1580 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1581 goto cleanup;
1582 }
1583
1584
1585 global_work_size[0] = inputImage->columns;
1586 global_work_size[1] = inputImage->rows;
1587 /* launch the kernel */
1588 clStatus = clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
1589 if (clStatus != CL_SUCCESS)
1590 {
1591 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1592 goto cleanup;
1593 }
1594 clFlush(queue);
1595
1596 if (ALIGNED(filteredPixels,CLPixelPacket))
1597 {
1598 length = inputImage->columns * inputImage->rows;
1599 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1600 }
1601 else
1602 {
1603 length = inputImage->columns * inputImage->rows;
1604 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1605 }
1606 if (clStatus != CL_SUCCESS)
1607 {
1608 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1609 goto cleanup;
1610 }
1611 outputReady = MagickTrue;
1612
1613cleanup:
1614 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1615 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1616 if (sinThetaBuffer!=NULL) clReleaseMemObject(sinThetaBuffer);
1617 if (cosThetaBuffer!=NULL) clReleaseMemObject(cosThetaBuffer);
1618 if (radialBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, radialBlurKernel);
1619 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1620 if (outputReady == MagickFalse)
1621 {
1622 if (filteredImage != NULL)
1623 {
1624 DestroyImage(filteredImage);
1625 filteredImage = NULL;
1626 }
1627 }
1628 return filteredImage;
1629}
1630
1631/*
1632%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1633% %
1634% %
1635% %
1636% R a d i a l B l u r I m a g e w i t h O p e n C L %
1637% %
1638% %
1639% %
1640%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1641%
1642% RadialBlurImage() applies a radial blur to the image.
1643%
1644% Andrew Protano contributed this effect.
1645%
1646% The format of the RadialBlurImage method is:
1647%
1648% Image *RadialBlurImage(const Image *image,const double angle,
1649% ExceptionInfo *exception)
1650% Image *RadialBlurImageChannel(const Image *image,const ChannelType channel,
1651% const double angle,ExceptionInfo *exception)
1652%
1653% A description of each parameter follows:
1654%
1655% o image: the image.
1656%
1657% o channel: the channel type.
1658%
1659% o angle: the angle of the radial blur.
1660%
1661% o exception: return any errors or warnings in this structure.
1662%
1663*/
1664
1665MagickExport
1666Image* AccelerateRadialBlurImage(const Image *image, const ChannelType channel, const double angle, ExceptionInfo *exception)
1667{
1668 MagickBooleanType status;
1669 Image* filteredImage;
1670
1671
1672 assert(image != NULL);
1673 assert(exception != NULL);
1674
1675 status = checkOpenCLEnvironment(exception);
1676 if (status == MagickFalse)
1677 return NULL;
1678
1679 status = checkAccelerateCondition(image, channel, exception);
1680 if (status == MagickFalse)
1681 return NULL;
1682
1683 filteredImage = ComputeRadialBlurImage(image, channel, angle, exception);
1684 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1685 return filteredImage;
1686}
1687
1688
1689
1690static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1691 const double gain,const double threshold,ExceptionInfo *exception)
1692{
1693 MagickBooleanType outputReady = MagickFalse;
1694 Image* filteredImage = NULL;
1695 MagickCLEnv clEnv = NULL;
1696
1697 cl_int clStatus;
1698
1699 const void *inputPixels;
1700 void *filteredPixels;
1701 cl_mem_flags mem_flags;
1702
1703 KernelInfo *kernel = NULL;
1704 char geometry[MaxTextExtent];
1705
1706 cl_context context = NULL;
1707 cl_mem inputImageBuffer = NULL;
1708 cl_mem filteredImageBuffer = NULL;
1709 cl_mem tempImageBuffer = NULL;
1710 cl_mem imageKernelBuffer = NULL;
1711 cl_kernel blurRowKernel = NULL;
1712 cl_kernel unsharpMaskBlurColumnKernel = NULL;
1713 cl_command_queue queue = NULL;
1714
1715 void* hostPtr;
1716 float* kernelBufferPtr;
1717 MagickSizeType length;
1718 unsigned int kernelWidth;
1719 float fGain;
1720 float fThreshold;
1721 unsigned int imageColumns, imageRows;
1722 int chunkSize;
1723 unsigned int i;
1724
1725 clEnv = GetDefaultOpenCLEnv();
1726 context = GetOpenCLContext(clEnv);
1727 queue = AcquireOpenCLCommandQueue(clEnv);
1728
1729 /* Create and initialize OpenCL buffers. */
1730 {
1731 inputPixels = NULL;
1732 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1733 if (inputPixels == (const void *) NULL)
1734 {
1735 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1736 goto cleanup;
1737 }
1738
1739 /* If the host pointer is aligned to the size of CLPixelPacket,
1740 then use the host buffer directly from the GPU; otherwise,
1741 create a buffer on the GPU and copy the data over */
1742 if (ALIGNED(inputPixels,CLPixelPacket))
1743 {
1744 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1745 }
1746 else
1747 {
1748 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1749 }
1750 /* create a CL buffer from image pixel buffer */
1751 length = inputImage->columns * inputImage->rows;
1752 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1753 if (clStatus != CL_SUCCESS)
1754 {
1755 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1756 goto cleanup;
1757 }
1758 }
1759
1760 /* create output */
1761 {
1762 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1763 assert(filteredImage != NULL);
1764 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
1765 {
1766 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1767 goto cleanup;
1768 }
1769 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1770 if (filteredPixels == (void *) NULL)
1771 {
1772 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1773 goto cleanup;
1774 }
1775
1776 if (ALIGNED(filteredPixels,CLPixelPacket))
1777 {
1778 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1779 hostPtr = filteredPixels;
1780 }
1781 else
1782 {
1783 mem_flags = CL_MEM_WRITE_ONLY;
1784 hostPtr = NULL;
1785 }
1786
1787 /* create a CL buffer from image pixel buffer */
1788 length = inputImage->columns * inputImage->rows;
1789 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1790 if (clStatus != CL_SUCCESS)
1791 {
1792 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1793 goto cleanup;
1794 }
1795 }
1796
1797 /* create the blur kernel */
1798 {
1799 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1800 kernel=AcquireKernelInfo(geometry);
1801 if (kernel == (KernelInfo *) NULL)
1802 {
1803 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
1804 goto cleanup;
1805 }
1806
1807 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
1808 if (clStatus != CL_SUCCESS)
1809 {
1810 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1811 goto cleanup;
1812 }
1813
1814
1815 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1816 if (clStatus != CL_SUCCESS)
1817 {
1818 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
1819 goto cleanup;
1820 }
1821 for (i = 0; i < kernel->width; i++)
1822 {
1823 kernelBufferPtr[i] = (float) kernel->values[i];
1824 }
1825 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1826 if (clStatus != CL_SUCCESS)
1827 {
1828 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1829 goto cleanup;
1830 }
1831 }
1832
1833 {
1834 /* create temp buffer */
1835 {
1836 length = inputImage->columns * inputImage->rows;
1837 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1838 if (clStatus != CL_SUCCESS)
1839 {
1840 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1841 goto cleanup;
1842 }
1843 }
1844
1845 /* get the opencl kernel */
1846 {
1847 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
1848 if (blurRowKernel == NULL)
1849 {
1850 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1851 goto cleanup;
1852 };
1853
1854 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
1855 if (unsharpMaskBlurColumnKernel == NULL)
1856 {
1857 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1858 goto cleanup;
1859 };
1860 }
1861
1862 {
1863 chunkSize = 256;
1864
1865 imageColumns = inputImage->columns;
1866 imageRows = inputImage->rows;
1867
1868 kernelWidth = kernel->width;
1869
1870 /* set the kernel arguments */
1871 i = 0;
1872 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1873 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1874 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1875 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1876 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1877 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1878 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1879 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1880 if (clStatus != CL_SUCCESS)
1881 {
1882 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1883 goto cleanup;
1884 }
1885 }
1886
1887 /* launch the kernel */
1888 {
1889 size_t gsize[2];
1890 size_t wsize[2];
1891
1892 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
1893 gsize[1] = inputImage->rows;
1894 wsize[0] = chunkSize;
1895 wsize[1] = 1;
1896
1897 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1898 if (clStatus != CL_SUCCESS)
1899 {
1900 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1901 goto cleanup;
1902 }
1903 clFlush(queue);
1904 }
1905
1906
1907 {
1908 chunkSize = 256;
1909 imageColumns = inputImage->columns;
1910 imageRows = inputImage->rows;
1911 kernelWidth = kernel->width;
1912 fGain = (float)gain;
1913 fThreshold = (float)threshold;
1914
1915 i = 0;
1916 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1917 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1918 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1919 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1920 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1921 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
1922 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
1923 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
1924 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1925 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1926 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
1927 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
1928
1929 if (clStatus != CL_SUCCESS)
1930 {
1931 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1932 goto cleanup;
1933 }
1934 }
1935
1936 /* launch the kernel */
1937 {
1938 size_t gsize[2];
1939 size_t wsize[2];
1940
1941 gsize[0] = inputImage->columns;
1942 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
1943 wsize[0] = 1;
1944 wsize[1] = chunkSize;
1945
1946 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1947 if (clStatus != CL_SUCCESS)
1948 {
1949 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1950 goto cleanup;
1951 }
1952 clFlush(queue);
1953 }
1954
1955 }
1956
1957 /* get result */
1958 if (ALIGNED(filteredPixels,CLPixelPacket))
1959 {
1960 length = inputImage->columns * inputImage->rows;
1961 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1962 }
1963 else
1964 {
1965 length = inputImage->columns * inputImage->rows;
1966 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1967 }
1968 if (clStatus != CL_SUCCESS)
1969 {
1970 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1971 goto cleanup;
1972 }
1973
1974 outputReady = MagickTrue;
1975
1976cleanup:
1977 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
1978 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1979 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1980 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1981 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1982 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1983 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
1984 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1985 if (outputReady == MagickFalse)
1986 {
1987 if (filteredImage != NULL)
1988 {
1989 DestroyImage(filteredImage);
1990 filteredImage = NULL;
1991 }
1992 }
1993 return filteredImage;
1994}
1995
1996
1997static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1998 const double gain,const double threshold,ExceptionInfo *exception)
1999{
2000 MagickBooleanType outputReady = MagickFalse;
2001 Image* filteredImage = NULL;
2002 MagickCLEnv clEnv = NULL;
2003
2004 cl_int clStatus;
2005
2006 const void *inputPixels;
2007 void *filteredPixels;
2008 cl_mem_flags mem_flags;
2009
2010 KernelInfo *kernel = NULL;
2011 char geometry[MaxTextExtent];
2012
2013 cl_context context = NULL;
2014 cl_mem inputImageBuffer = NULL;
2015 cl_mem filteredImageBuffer = NULL;
2016 cl_mem tempImageBuffer = NULL;
2017 cl_mem imageKernelBuffer = NULL;
2018 cl_kernel blurRowKernel = NULL;
2019 cl_kernel unsharpMaskBlurColumnKernel = NULL;
2020 cl_command_queue queue = NULL;
2021
2022 void* hostPtr;
2023 float* kernelBufferPtr;
2024 MagickSizeType length;
2025 unsigned int kernelWidth;
2026 float fGain;
2027 float fThreshold;
2028 unsigned int imageColumns, imageRows;
2029 int chunkSize;
2030 unsigned int i;
2031
2032 clEnv = GetDefaultOpenCLEnv();
2033 context = GetOpenCLContext(clEnv);
2034 queue = AcquireOpenCLCommandQueue(clEnv);
2035
2036 /* Create and initialize OpenCL buffers. */
2037 {
2038 inputPixels = NULL;
2039 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2040 if (inputPixels == (const void *) NULL)
2041 {
2042 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
2043 goto cleanup;
2044 }
2045
2046 /* If the host pointer is aligned to the size of CLPixelPacket,
2047 then use the host buffer directly from the GPU; otherwise,
2048 create a buffer on the GPU and copy the data over */
2049 if (ALIGNED(inputPixels,CLPixelPacket))
2050 {
2051 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2052 }
2053 else
2054 {
2055 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2056 }
2057 /* create a CL buffer from image pixel buffer */
2058 length = inputImage->columns * inputImage->rows;
2059 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2060 if (clStatus != CL_SUCCESS)
2061 {
2062 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2063 goto cleanup;
2064 }
2065 }
2066
2067 /* create output */
2068 {
2069 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
2070 assert(filteredImage != NULL);
2071 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
2072 {
2073 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2074 goto cleanup;
2075 }
2076 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2077 if (filteredPixels == (void *) NULL)
2078 {
2079 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2080 goto cleanup;
2081 }
2082
2083 if (ALIGNED(filteredPixels,CLPixelPacket))
2084 {
2085 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2086 hostPtr = filteredPixels;
2087 }
2088 else
2089 {
2090 mem_flags = CL_MEM_WRITE_ONLY;
2091 hostPtr = NULL;
2092 }
2093
2094 /* create a CL buffer from image pixel buffer */
2095 length = inputImage->columns * inputImage->rows;
2096 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2097 if (clStatus != CL_SUCCESS)
2098 {
2099 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2100 goto cleanup;
2101 }
2102 }
2103
2104 /* create the blur kernel */
2105 {
2106 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2107 kernel=AcquireKernelInfo(geometry);
2108 if (kernel == (KernelInfo *) NULL)
2109 {
2110 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
2111 goto cleanup;
2112 }
2113
2114 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2115 if (clStatus != CL_SUCCESS)
2116 {
2117 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2118 goto cleanup;
2119 }
2120
2121
2122 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2123 if (clStatus != CL_SUCCESS)
2124 {
2125 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
2126 goto cleanup;
2127 }
2128 for (i = 0; i < kernel->width; i++)
2129 {
2130 kernelBufferPtr[i] = (float) kernel->values[i];
2131 }
2132 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2133 if (clStatus != CL_SUCCESS)
2134 {
2135 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
2136 goto cleanup;
2137 }
2138 }
2139
2140 {
2141 unsigned int offsetRows;
2142 unsigned int sec;
2143
2144 /* create temp buffer */
2145 {
2146 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
2147 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
2148 if (clStatus != CL_SUCCESS)
2149 {
2150 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2151 goto cleanup;
2152 }
2153 }
2154
2155 /* get the opencl kernel */
2156 {
2157 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
2158 if (blurRowKernel == NULL)
2159 {
2160 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2161 goto cleanup;
2162 };
2163
2164 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
2165 if (unsharpMaskBlurColumnKernel == NULL)
2166 {
2167 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2168 goto cleanup;
2169 };
2170 }
2171
2172 for (sec = 0; sec < 2; sec++)
2173 {
2174 {
2175 chunkSize = 256;
2176
2177 imageColumns = inputImage->columns;
2178 if (sec == 0)
2179 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2180 else
2181 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
2182
2183 offsetRows = sec * inputImage->rows / 2;
2184
2185 kernelWidth = kernel->width;
2186
2187 /* set the kernel arguments */
2188 i = 0;
2189 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2190 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2191 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
2192 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2193 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2194 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2195 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2196 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
2197 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2198 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
2199 if (clStatus != CL_SUCCESS)
2200 {
2201 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2202 goto cleanup;
2203 }
2204 }
2205 /* launch the kernel */
2206 {
2207 size_t gsize[2];
2208 size_t wsize[2];
2209
2210 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
2211 gsize[1] = imageRows;
2212 wsize[0] = chunkSize;
2213 wsize[1] = 1;
2214
2215 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2216 if (clStatus != CL_SUCCESS)
2217 {
2218 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2219 goto cleanup;
2220 }
2221 clFlush(queue);
2222 }
2223
2224
2225 {
2226 chunkSize = 256;
2227
2228 imageColumns = inputImage->columns;
2229 if (sec == 0)
2230 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2231 else
2232 imageRows = (inputImage->rows - inputImage->rows / 2);
2233
2234 offsetRows = sec * inputImage->rows / 2;
2235
2236 kernelWidth = kernel->width;
2237
2238 fGain = (float)gain;
2239 fThreshold = (float)threshold;
2240
2241 i = 0;
2242 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2243 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2244 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2245 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2246 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2247 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
2248 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
2249 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
2250 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2251 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2252 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
2253 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
2254 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2255 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
2256
2257 if (clStatus != CL_SUCCESS)
2258 {
2259 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2260 goto cleanup;
2261 }
2262 }
2263
2264 /* launch the kernel */
2265 {
2266 size_t gsize[2];
2267 size_t wsize[2];
2268
2269 gsize[0] = imageColumns;
2270 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
2271 wsize[0] = 1;
2272 wsize[1] = chunkSize;
2273
2274 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2275 if (clStatus != CL_SUCCESS)
2276 {
2277 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2278 goto cleanup;
2279 }
2280 clFlush(queue);
2281 }
2282 }
2283 }
2284
2285 /* get result */
2286 if (ALIGNED(filteredPixels,CLPixelPacket))
2287 {
2288 length = inputImage->columns * inputImage->rows;
2289 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2290 }
2291 else
2292 {
2293 length = inputImage->columns * inputImage->rows;
2294 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2295 }
2296 if (clStatus != CL_SUCCESS)
2297 {
2298 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
2299 goto cleanup;
2300 }
2301
2302 outputReady = MagickTrue;
2303
2304cleanup:
2305 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
2306 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2307 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2308 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2309 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
2310 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
2311 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
2312 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2313 if (outputReady == MagickFalse)
2314 {
2315 if (filteredImage != NULL)
2316 {
2317 DestroyImage(filteredImage);
2318 filteredImage = NULL;
2319 }
2320 }
2321 return filteredImage;
2322}
2323
2324
2325/*
2326%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2327% %
2328% %
2329% %
2330% U n s h a r p M a s k I m a g e w i t h O p e n C L %
2331% %
2332% %
2333% %
2334%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2335%
2336% UnsharpMaskImage() sharpens one or more image channels. We convolve the
2337% image with a Gaussian operator of the given radius and standard deviation
2338% (sigma). For reasonable results, radius should be larger than sigma. Use a
2339% radius of 0 and UnsharpMaskImage() selects a suitable radius for you.
2340%
2341% The format of the UnsharpMaskImage method is:
2342%
2343% Image *UnsharpMaskImage(const Image *image,const double radius,
2344% const double sigma,const double amount,const double threshold,
2345% ExceptionInfo *exception)
2346% Image *UnsharpMaskImageChannel(const Image *image,
2347% const ChannelType channel,const double radius,const double sigma,
2348% const double gain,const double threshold,ExceptionInfo *exception)
2349%
2350% A description of each parameter follows:
2351%
2352% o image: the image.
2353%
2354% o channel: the channel type.
2355%
2356% o radius: the radius of the Gaussian, in pixels, not counting the center
2357% pixel.
2358%
2359% o sigma: the standard deviation of the Gaussian, in pixels.
2360%
2361% o gain: the percentage of the difference between the original and the
2362% blur image that is added back into the original.
2363%
2364% o threshold: the threshold in pixels needed to apply the diffence gain.
2365%
2366% o exception: return any errors or warnings in this structure.
2367%
2368*/
2369
2370
2371MagickExport
2372Image* AccelerateUnsharpMaskImage(const Image *image, const ChannelType channel,const double radius,const double sigma,
2373 const double gain,const double threshold,ExceptionInfo *exception)
2374{
2375 MagickBooleanType status;
2376 Image* filteredImage;
2377
2378
2379 assert(image != NULL);
2380 assert(exception != NULL);
2381
2382 status = checkOpenCLEnvironment(exception);
2383 if (status == MagickFalse)
2384 return NULL;
2385
2386 status = checkAccelerateCondition(image, channel, exception);
2387 if (status == MagickFalse)
2388 return NULL;
2389
2390 if (splitImage(image) && (image->rows / 2 > radius))
2391 filteredImage = ComputeUnsharpMaskImageSection(image,channel,radius,sigma,gain,threshold,exception);
2392 else
2393 filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
2394 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2395 return filteredImage;
2396
2397}
2398
cristyf034abb2013-11-24 14:16:14 +00002399static MagickBooleanType resizeHorizontalFilter(cl_mem inputImage
2400 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2401 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2402 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float xFactor
2403 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2404{
2405 MagickBooleanType status = MagickFalse;
2406
2407 float scale, support;
2408 unsigned int i;
2409 cl_kernel horizontalKernel = NULL;
2410 cl_int clStatus;
2411 size_t global_work_size[2];
2412 size_t local_work_size[2];
2413 int resizeFilterType, resizeWindowType;
2414 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2415 size_t totalLocalMemorySize;
2416 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2417 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2418 size_t deviceLocalMemorySize;
2419 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2420
2421 const unsigned int workgroupSize = 256;
2422 unsigned int pixelPerWorkgroup;
2423 unsigned int chunkSize;
2424
2425 /*
2426 Apply filter to resize vertically from image to resize image.
2427 */
cristye85d0f72013-11-27 02:25:43 +00002428 scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002429 support=scale*GetResizeFilterSupport(resizeFilter);
2430 if (support < 0.5)
2431 {
2432 /*
2433 Support too small even for nearest neighbour: Reduce to point
2434 sampling.
2435 */
2436 support=(MagickRealType) 0.5;
2437 scale=1.0;
2438 }
2439 scale=PerceptibleReciprocal(scale);
2440
2441 if (resizedColumns < workgroupSize)
2442 {
2443 chunkSize = 32;
2444 pixelPerWorkgroup = 32;
2445 }
2446 else
2447 {
2448 chunkSize = workgroupSize;
2449 pixelPerWorkgroup = workgroupSize;
2450 }
2451
2452 /* get the local memory size supported by the device */
2453 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2454
2455 while(1)
2456 {
2457 /* calculate the local memory size needed per workgroup */
2458 cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
2459 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
2460 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2461 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2462 totalLocalMemorySize = imageCacheLocalMemorySize;
2463
2464 /* local size for the pixel accumulator */
2465 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2466 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2467
2468 /* local memory size for the weight accumulator */
2469 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2470 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2471
2472 /* local memory size for the gamma accumulator */
2473 if (matte == 0)
2474 gammaAccumulatorLocalMemorySize = sizeof(float);
2475 else
2476 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2477 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2478
2479 if (totalLocalMemorySize <= deviceLocalMemorySize)
2480 break;
2481 else
2482 {
2483 pixelPerWorkgroup = pixelPerWorkgroup/2;
2484 chunkSize = chunkSize/2;
2485 if (pixelPerWorkgroup == 0
2486 || chunkSize == 0)
2487 {
2488 /* quit, fallback to CPU */
2489 goto cleanup;
2490 }
2491 }
2492 }
2493
2494 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2495 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2496
2497
2498 if (resizeFilterType == SincFastWeightingFunction
2499 && resizeWindowType == SincFastWeightingFunction)
2500 {
2501 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilterSinc");
2502 }
2503 else
2504 {
2505 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
2506 }
2507 if (horizontalKernel == NULL)
2508 {
2509 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2510 goto cleanup;
2511 }
2512
2513 i = 0;
2514 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2515 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2516 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2517 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2518 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
2519 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2520
2521 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2522 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2523
2524 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2525 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2526 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2527
2528 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2529 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2530
2531 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2532 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2533
2534 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2535 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2536
2537 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2538 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2539
2540
2541 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2542 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2543 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2544 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2545
2546
2547 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2548 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2549 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2550
2551 if (clStatus != CL_SUCCESS)
2552 {
2553 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2554 goto cleanup;
2555 }
2556
2557 global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2558 global_work_size[1] = resizedRows;
2559
2560 local_work_size[0] = workgroupSize;
2561 local_work_size[1] = 1;
2562 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2563 if (clStatus != CL_SUCCESS)
2564 {
2565 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2566 goto cleanup;
2567 }
2568 clFlush(queue);
2569 status = MagickTrue;
2570
2571
2572cleanup:
2573 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2574
2575 return status;
2576}
2577
2578
2579static MagickBooleanType resizeVerticalFilter(cl_mem inputImage
2580 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2581 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2582 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float yFactor
2583 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2584{
2585 MagickBooleanType status = MagickFalse;
2586
2587 float scale, support;
2588 unsigned int i;
2589 cl_kernel horizontalKernel = NULL;
2590 cl_int clStatus;
2591 size_t global_work_size[2];
2592 size_t local_work_size[2];
2593 int resizeFilterType, resizeWindowType;
2594 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2595 size_t totalLocalMemorySize;
2596 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2597 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2598 size_t deviceLocalMemorySize;
2599 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2600
2601 const unsigned int workgroupSize = 256;
2602 unsigned int pixelPerWorkgroup;
2603 unsigned int chunkSize;
2604
2605 /*
2606 Apply filter to resize vertically from image to resize image.
2607 */
cristye85d0f72013-11-27 02:25:43 +00002608 scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002609 support=scale*GetResizeFilterSupport(resizeFilter);
2610 if (support < 0.5)
2611 {
2612 /*
2613 Support too small even for nearest neighbour: Reduce to point
2614 sampling.
2615 */
2616 support=(MagickRealType) 0.5;
2617 scale=1.0;
2618 }
2619 scale=PerceptibleReciprocal(scale);
2620
2621 if (resizedRows < workgroupSize)
2622 {
2623 chunkSize = 32;
2624 pixelPerWorkgroup = 32;
2625 }
2626 else
2627 {
2628 chunkSize = workgroupSize;
2629 pixelPerWorkgroup = workgroupSize;
2630 }
2631
2632 /* get the local memory size supported by the device */
2633 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2634
2635 while(1)
2636 {
2637 /* calculate the local memory size needed per workgroup */
2638 cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
2639 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
2640 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2641 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2642 totalLocalMemorySize = imageCacheLocalMemorySize;
2643
2644 /* local size for the pixel accumulator */
2645 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2646 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2647
2648 /* local memory size for the weight accumulator */
2649 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2650 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2651
2652 /* local memory size for the gamma accumulator */
2653 if (matte == 0)
2654 gammaAccumulatorLocalMemorySize = sizeof(float);
2655 else
2656 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2657 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2658
2659 if (totalLocalMemorySize <= deviceLocalMemorySize)
2660 break;
2661 else
2662 {
2663 pixelPerWorkgroup = pixelPerWorkgroup/2;
2664 chunkSize = chunkSize/2;
2665 if (pixelPerWorkgroup == 0
2666 || chunkSize == 0)
2667 {
2668 /* quit, fallback to CPU */
2669 goto cleanup;
2670 }
2671 }
2672 }
2673
2674 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2675 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2676
2677 if (resizeFilterType == SincFastWeightingFunction
2678 && resizeWindowType == SincFastWeightingFunction)
2679 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilterSinc");
2680 else
2681 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
2682
2683 if (horizontalKernel == NULL)
2684 {
2685 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2686 goto cleanup;
2687 }
2688
2689 i = 0;
2690 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2691 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2692 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2693 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2694 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
2695 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2696
2697 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2698 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2699
2700 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2701 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2702 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2703
2704 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2705 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2706
2707 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2708 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2709
2710 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2711 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2712
2713 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2714 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2715
2716
2717 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2718 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2719 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2720 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2721
2722
2723 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2724 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2725 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2726
2727 if (clStatus != CL_SUCCESS)
2728 {
2729 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2730 goto cleanup;
2731 }
2732
2733 global_work_size[0] = resizedColumns;
2734 global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2735
2736 local_work_size[0] = 1;
2737 local_work_size[1] = workgroupSize;
2738 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2739 if (clStatus != CL_SUCCESS)
2740 {
2741 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2742 goto cleanup;
2743 }
2744 clFlush(queue);
2745 status = MagickTrue;
2746
2747
2748cleanup:
2749 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2750
2751 return status;
2752}
2753
2754
2755
2756static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedColumns, const size_t resizedRows
2757 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
2758{
2759
2760 MagickBooleanType outputReady = MagickFalse;
2761 Image* filteredImage = NULL;
2762 MagickCLEnv clEnv = NULL;
2763
2764 cl_int clStatus;
2765 MagickBooleanType status;
2766 const void *inputPixels;
2767 void* filteredPixels;
2768 void* hostPtr;
2769 const MagickRealType* resizeFilterCoefficient;
2770 float* mappedCoefficientBuffer;
2771 float xFactor, yFactor;
2772 MagickSizeType length;
2773
2774 cl_mem_flags mem_flags;
2775 cl_context context = NULL;
2776 cl_mem inputImageBuffer = NULL;
2777 cl_mem tempImageBuffer = NULL;
2778 cl_mem filteredImageBuffer = NULL;
2779 cl_mem cubicCoefficientsBuffer = NULL;
2780 cl_command_queue queue = NULL;
2781
2782 unsigned int i;
2783
2784 clEnv = GetDefaultOpenCLEnv();
2785 context = GetOpenCLContext(clEnv);
2786
2787 /* Create and initialize OpenCL buffers. */
2788 inputPixels = NULL;
2789 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2790 if (inputPixels == (const void *) NULL)
2791 {
2792 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
2793 goto cleanup;
2794 }
2795
2796 /* If the host pointer is aligned to the size of CLPixelPacket,
2797 then use the host buffer directly from the GPU; otherwise,
2798 create a buffer on the GPU and copy the data over */
2799 if (ALIGNED(inputPixels,CLPixelPacket))
2800 {
2801 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2802 }
2803 else
2804 {
2805 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2806 }
2807 /* create a CL buffer from image pixel buffer */
2808 length = inputImage->columns * inputImage->rows;
2809 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2810 if (clStatus != CL_SUCCESS)
2811 {
2812 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2813 goto cleanup;
2814 }
2815
2816 cubicCoefficientsBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
2817 if (clStatus != CL_SUCCESS)
2818 {
2819 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2820 goto cleanup;
2821 }
2822 queue = AcquireOpenCLCommandQueue(clEnv);
2823 mappedCoefficientBuffer = (float*)clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
2824 , 0, NULL, NULL, &clStatus);
2825 if (clStatus != CL_SUCCESS)
2826 {
2827 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
2828 goto cleanup;
2829 }
2830 resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
2831 for (i = 0; i < 7; i++)
2832 {
2833 mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
2834 }
2835 clStatus = clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
2836 if (clStatus != CL_SUCCESS)
2837 {
2838 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
2839 goto cleanup;
2840 }
2841
2842 filteredImage = CloneImage(inputImage,resizedColumns,resizedRows,MagickTrue,exception);
2843 if (filteredImage == NULL)
2844 goto cleanup;
2845
2846 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
2847 {
2848 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2849 goto cleanup;
2850 }
2851 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2852 if (filteredPixels == (void *) NULL)
2853 {
2854 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2855 goto cleanup;
2856 }
2857
2858 if (ALIGNED(filteredPixels,CLPixelPacket))
2859 {
2860 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2861 hostPtr = filteredPixels;
2862 }
2863 else
2864 {
2865 mem_flags = CL_MEM_WRITE_ONLY;
2866 hostPtr = NULL;
2867 }
2868
2869 /* create a CL buffer from image pixel buffer */
2870 length = filteredImage->columns * filteredImage->rows;
2871 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2872 if (clStatus != CL_SUCCESS)
2873 {
2874 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2875 goto cleanup;
2876 }
2877
2878 xFactor=(float) resizedColumns/(float) inputImage->columns;
2879 yFactor=(float) resizedRows/(float) inputImage->rows;
2880 if (xFactor > yFactor)
2881 {
2882
2883 length = resizedColumns*inputImage->rows;
2884 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2885 if (clStatus != CL_SUCCESS)
2886 {
2887 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2888 goto cleanup;
2889 }
2890
2891 status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte == MagickTrue)?1:0
2892 , tempImageBuffer, resizedColumns, inputImage->rows
2893 , resizeFilter, cubicCoefficientsBuffer
2894 , xFactor, clEnv, queue, exception);
2895 if (status != MagickTrue)
2896 goto cleanup;
2897
2898 status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->matte == MagickTrue)?1:0
2899 , filteredImageBuffer, resizedColumns, resizedRows
2900 , resizeFilter, cubicCoefficientsBuffer
2901 , yFactor, clEnv, queue, exception);
2902 if (status != MagickTrue)
2903 goto cleanup;
2904 }
2905 else
2906 {
2907 length = inputImage->columns*resizedRows;
2908 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2909 if (clStatus != CL_SUCCESS)
2910 {
2911 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2912 goto cleanup;
2913 }
2914
2915 status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte == MagickTrue)?1:0
2916 , tempImageBuffer, inputImage->columns, resizedRows
2917 , resizeFilter, cubicCoefficientsBuffer
2918 , yFactor, clEnv, queue, exception);
2919 if (status != MagickTrue)
2920 goto cleanup;
2921
2922 status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->matte == MagickTrue)?1:0
2923 , filteredImageBuffer, resizedColumns, resizedRows
2924 , resizeFilter, cubicCoefficientsBuffer
2925 , xFactor, clEnv, queue, exception);
2926 if (status != MagickTrue)
2927 goto cleanup;
2928 }
2929 length = resizedColumns*resizedRows;
2930 if (ALIGNED(filteredPixels,CLPixelPacket))
2931 {
2932 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2933 }
2934 else
2935 {
2936 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2937 }
2938 if (clStatus != CL_SUCCESS)
2939 {
2940 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
2941 goto cleanup;
2942 }
2943 outputReady = MagickTrue;
2944
2945cleanup:
2946 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2947 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2948 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2949 if (cubicCoefficientsBuffer!=NULL) clReleaseMemObject(cubicCoefficientsBuffer);
2950 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2951 if (outputReady == MagickFalse)
2952 {
2953 if (filteredImage != NULL)
2954 {
2955 DestroyImage(filteredImage);
2956 filteredImage = NULL;
2957 }
2958 }
2959
2960 return filteredImage;
2961}
2962
2963const ResizeWeightingFunctionType supportedResizeWeighting[] =
2964{
2965 BoxWeightingFunction
2966 ,TriangleWeightingFunction
2967 ,HanningWeightingFunction
2968 ,HammingWeightingFunction
2969 ,BlackmanWeightingFunction
2970 ,CubicBCWeightingFunction
2971 ,SincWeightingFunction
2972 ,SincFastWeightingFunction
2973 ,LastWeightingFunction
2974};
2975
2976static MagickBooleanType gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)
2977{
2978 MagickBooleanType supported = MagickFalse;
2979 unsigned int i;
2980 for (i = 0; ;i++)
2981 {
2982 if (supportedResizeWeighting[i] == LastWeightingFunction)
2983 break;
2984 if (supportedResizeWeighting[i] == f)
2985 {
2986 supported = MagickTrue;
2987 break;
2988 }
2989 }
2990 return supported;
2991}
2992
2993
2994/*
2995%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2996% %
2997% %
2998% %
2999% A c c e l e r a t e R e s i z e I m a g e %
3000% %
3001% %
3002% %
3003%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3004%
3005% AccelerateResizeImage() is an OpenCL implementation of ResizeImage()
3006%
3007% AccelerateResizeImage() scales an image to the desired dimensions, using the given
3008% filter (see AcquireFilterInfo()).
3009%
3010% If an undefined filter is given the filter defaults to Mitchell for a
3011% colormapped image, a image with a matte channel, or if the image is
3012% enlarged. Otherwise the filter defaults to a Lanczos.
3013%
3014% AccelerateResizeImage() was inspired by Paul Heckbert's "zoom" program.
3015%
3016% The format of the AccelerateResizeImage method is:
3017%
3018% Image *ResizeImage(Image *image,const size_t columns,
3019% const size_t rows, const ResizeFilter* filter,
cristy3f6d1482010-01-20 21:01:21 +00003020% ExceptionInfo *exception)
3021%
3022% A description of each parameter follows:
3023%
3024% o image: the image.
3025%
cristyf034abb2013-11-24 14:16:14 +00003026% o columns: the number of columns in the scaled image.
cristy3f6d1482010-01-20 21:01:21 +00003027%
cristyf034abb2013-11-24 14:16:14 +00003028% o rows: the number of rows in the scaled image.
3029%
3030% o filter: Image filter to use.
cristy3f6d1482010-01-20 21:01:21 +00003031%
3032% o exception: return any errors or warnings in this structure.
3033%
3034*/
cristyd43a46b2010-01-21 02:13:41 +00003035
cristyf034abb2013-11-24 14:16:14 +00003036MagickExport
3037Image* AccelerateResizeImage(const Image* image, const size_t resizedColumns, const size_t resizedRows
3038 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
cristyd43a46b2010-01-21 02:13:41 +00003039{
cristyf034abb2013-11-24 14:16:14 +00003040 MagickBooleanType status;
3041 Image* filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003042
cristyf034abb2013-11-24 14:16:14 +00003043 assert(image != NULL);
3044 assert(resizeFilter != NULL);
cristyd43a46b2010-01-21 02:13:41 +00003045
cristyf034abb2013-11-24 14:16:14 +00003046 status = checkOpenCLEnvironment(exception);
3047 if (status == MagickFalse)
3048 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003049
cristyf034abb2013-11-24 14:16:14 +00003050 status = checkAccelerateCondition(image, AllChannels, exception);
3051 if (status == MagickFalse)
3052 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003053
cristyf034abb2013-11-24 14:16:14 +00003054 if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse
3055 || gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
3056 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003057
cristyf034abb2013-11-24 14:16:14 +00003058 filteredImage = ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
3059 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3060 return filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003061
cristyd43a46b2010-01-21 02:13:41 +00003062}
3063
cristyd43a46b2010-01-21 02:13:41 +00003064
cristyf034abb2013-11-24 14:16:14 +00003065static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBooleanType sharpen, ExceptionInfo *exception)
3066{
3067 MagickBooleanType outputReady = MagickFalse;
3068 MagickCLEnv clEnv = NULL;
3069
3070 cl_int clStatus;
3071 size_t global_work_size[2];
3072
3073 void *inputPixels = NULL;
3074 MagickSizeType length;
3075 unsigned int uSharpen;
3076 unsigned int i;
3077
3078 cl_mem_flags mem_flags;
3079 cl_context context = NULL;
3080 cl_mem inputImageBuffer = NULL;
3081 cl_kernel filterKernel = NULL;
3082 cl_command_queue queue = NULL;
3083
3084 clEnv = GetDefaultOpenCLEnv();
3085 context = GetOpenCLContext(clEnv);
3086
3087 /* Create and initialize OpenCL buffers. */
3088 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3089 if (inputPixels == (void *) NULL)
3090 {
3091 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3092 goto cleanup;
3093 }
3094
3095 /* If the host pointer is aligned to the size of CLPixelPacket,
3096 then use the host buffer directly from the GPU; otherwise,
3097 create a buffer on the GPU and copy the data over */
3098 if (ALIGNED(inputPixels,CLPixelPacket))
3099 {
3100 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3101 }
3102 else
3103 {
3104 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3105 }
3106 /* create a CL buffer from image pixel buffer */
3107 length = inputImage->columns * inputImage->rows;
3108 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3109 if (clStatus != CL_SUCCESS)
3110 {
3111 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3112 goto cleanup;
3113 }
3114
3115 filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
3116 if (filterKernel == NULL)
3117 {
3118 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3119 goto cleanup;
3120 }
3121
3122 i = 0;
3123 clStatus=clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3124
3125 uSharpen = (sharpen == MagickFalse)?0:1;
3126 clStatus|=clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
3127 if (clStatus != CL_SUCCESS)
3128 {
3129 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3130 goto cleanup;
3131 }
3132
3133 global_work_size[0] = inputImage->columns;
3134 global_work_size[1] = inputImage->rows;
3135 /* launch the kernel */
3136 queue = AcquireOpenCLCommandQueue(clEnv);
3137 clStatus = clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3138 if (clStatus != CL_SUCCESS)
3139 {
3140 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3141 goto cleanup;
3142 }
3143 clFlush(queue);
3144
3145 if (ALIGNED(inputPixels,CLPixelPacket))
3146 {
3147 length = inputImage->columns * inputImage->rows;
3148 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3149 }
3150 else
3151 {
3152 length = inputImage->columns * inputImage->rows;
3153 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3154 }
3155 if (clStatus != CL_SUCCESS)
3156 {
3157 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3158 goto cleanup;
3159 }
3160 outputReady = MagickTrue;
3161
3162cleanup:
3163
3164 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
3165 if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel);
3166 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
3167 return outputReady;
3168}
3169
3170/*
3171%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3172% %
3173% %
3174% %
3175% C o n t r a s t I m a g e w i t h O p e n C L %
3176% %
3177% %
3178% %
3179%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3180%
3181% ContrastImage() enhances the intensity differences between the lighter and
3182% darker elements of the image. Set sharpen to a MagickTrue to increase the
3183% image contrast otherwise the contrast is reduced.
3184%
3185% The format of the ContrastImage method is:
3186%
3187% MagickBooleanType ContrastImage(Image *image,
3188% const MagickBooleanType sharpen)
3189%
3190% A description of each parameter follows:
3191%
3192% o image: the image.
3193%
3194% o sharpen: Increase or decrease image contrast.
3195%
3196*/
3197
3198MagickExport
3199MagickBooleanType AccelerateContrastImage(Image* image, const MagickBooleanType sharpen, ExceptionInfo* exception)
3200{
3201 MagickBooleanType status;
3202
3203 assert(image != NULL);
3204 assert(exception != NULL);
3205
3206 status = checkOpenCLEnvironment(exception);
3207 if (status == MagickFalse)
3208 return MagickFalse;
3209
3210 status = checkAccelerateCondition(image, AllChannels, exception);
3211 if (status == MagickFalse)
3212 return MagickFalse;
3213
3214 status = ComputeContrastImage(image,sharpen,exception);
3215 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3216 return status;
3217}
3218
3219
3220
3221MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3222{
3223 register ssize_t
cristyd43a46b2010-01-21 02:13:41 +00003224 i;
3225
cristyf034abb2013-11-24 14:16:14 +00003226 cl_float
3227 bright,
3228 hue,
3229 saturation;
3230
3231 cl_int color;
3232
3233 MagickBooleanType outputReady;
3234
3235 MagickCLEnv clEnv;
3236
3237 void *inputPixels;
3238
3239 MagickSizeType length;
3240
3241 cl_context context;
3242 cl_command_queue queue;
3243 cl_kernel modulateKernel;
3244
3245 cl_mem inputImageBuffer;
3246 cl_mem_flags mem_flags;
3247
3248 cl_int clStatus;
3249
3250 Image * inputImage = image;
3251
3252 inputImageBuffer = NULL;
3253 modulateKernel = NULL;
3254
3255 assert(inputImage != (Image *) NULL);
3256 assert(inputImage->signature == MagickSignature);
3257 if (inputImage->debug != MagickFalse)
3258 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
cristyd43a46b2010-01-21 02:13:41 +00003259
3260 /*
cristyf034abb2013-11-24 14:16:14 +00003261 * initialize opencl env
3262 */
3263 clEnv = GetDefaultOpenCLEnv();
3264 context = GetOpenCLContext(clEnv);
3265 queue = AcquireOpenCLCommandQueue(clEnv);
cristyd43a46b2010-01-21 02:13:41 +00003266
cristyf034abb2013-11-24 14:16:14 +00003267 outputReady = MagickFalse;
cristyd43a46b2010-01-21 02:13:41 +00003268
cristyf034abb2013-11-24 14:16:14 +00003269 /* Create and initialize OpenCL buffers.
3270 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3271 assume this will get a writable image
3272 */
3273 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3274 if (inputPixels == (void *) NULL)
cristyd43a46b2010-01-21 02:13:41 +00003275 {
cristyf034abb2013-11-24 14:16:14 +00003276 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3277 goto cleanup;
cristyd43a46b2010-01-21 02:13:41 +00003278 }
cristyf034abb2013-11-24 14:16:14 +00003279
3280 /* If the host pointer is aligned to the size of CLPixelPacket,
3281 then use the host buffer directly from the GPU; otherwise,
3282 create a buffer on the GPU and copy the data over
3283 */
3284 if (ALIGNED(inputPixels,CLPixelPacket))
3285 {
3286 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3287 }
3288 else
3289 {
3290 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3291 }
3292 /* create a CL buffer from image pixel buffer */
3293 length = inputImage->columns * inputImage->rows;
3294 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3295 if (clStatus != CL_SUCCESS)
3296 {
3297 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3298 goto cleanup;
3299 }
3300
3301 modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
3302 if (modulateKernel == NULL)
3303 {
3304 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3305 goto cleanup;
3306 }
3307
3308 bright=percent_brightness;
3309 hue=percent_hue;
3310 saturation=percent_saturation;
3311 color=colorspace;
3312
3313 i = 0;
3314 clStatus=clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3315 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3316 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3317 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3318 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3319 if (clStatus != CL_SUCCESS)
3320 {
3321 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3322 printf("no kernel\n");
3323 goto cleanup;
3324 }
3325
3326 {
3327 size_t global_work_size[2];
3328 global_work_size[0] = inputImage->columns;
3329 global_work_size[1] = inputImage->rows;
3330 /* launch the kernel */
3331 clStatus = clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3332 if (clStatus != CL_SUCCESS)
3333 {
3334 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3335 goto cleanup;
3336 }
3337 clFlush(queue);
3338 }
3339
3340 if (ALIGNED(inputPixels,CLPixelPacket))
3341 {
3342 length = inputImage->columns * inputImage->rows;
3343 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3344 }
3345 else
3346 {
3347 length = inputImage->columns * inputImage->rows;
3348 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3349 }
3350 if (clStatus != CL_SUCCESS)
3351 {
3352 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3353 goto cleanup;
3354 }
3355
3356 outputReady = MagickTrue;
3357
3358cleanup:
3359
3360 if (inputPixels) {
3361 //ReleasePixelCachePixels();
3362 inputPixels = NULL;
3363 }
3364
3365 if (inputImageBuffer!=NULL)
3366 clReleaseMemObject(inputImageBuffer);
3367 if (modulateKernel!=NULL)
3368 RelinquishOpenCLKernel(clEnv, modulateKernel);
3369 if (queue != NULL)
3370 RelinquishOpenCLCommandQueue(clEnv, queue);
3371
3372 return outputReady;
3373
cristy3f6d1482010-01-20 21:01:21 +00003374}
cristyf034abb2013-11-24 14:16:14 +00003375
3376/*
3377%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3378% %
3379% %
3380% %
3381% M o d u l a t e I m a g e w i t h O p e n C L %
3382% %
3383% %
3384% %
3385%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3386%
3387% ModulateImage() lets you control the brightness, saturation, and hue
3388% of an image. Modulate represents the brightness, saturation, and hue
3389% as one parameter (e.g. 90,150,100). If the image colorspace is HSL, the
3390% modulation is lightness, saturation, and hue. For HWB, use blackness,
3391% whiteness, and hue. And for HCL, use chrome, luma, and hue.
3392%
3393% The format of the ModulateImage method is:
3394%
3395% MagickBooleanType ModulateImage(Image *image,const char *modulate)
3396%
3397% A description of each parameter follows:
3398%
3399% o image: the image.
3400%
3401% o percent_*: Define the percent change in brightness, saturation, and
3402% hue.
3403%
3404*/
3405
3406MagickExport
3407MagickBooleanType AccelerateModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3408{
3409 MagickBooleanType status;
3410
3411 assert(image != NULL);
3412 assert(exception != NULL);
3413
3414 status = checkOpenCLEnvironment(exception);
3415 if (status == MagickFalse)
3416 return MagickFalse;
3417
3418 status = checkAccelerateCondition(image, AllChannels, exception);
3419 if (status == MagickFalse)
3420 return MagickFalse;
3421
3422 if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
3423 return MagickFalse;
3424
3425
3426 status = ComputeModulateImage(image,percent_brightness, percent_hue, percent_saturation, colorspace, exception);
3427 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3428 return status;
3429}
3430
3431
3432MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception)
3433{
3434#define EqualizeImageTag "Equalize/Image"
3435
3436 ExceptionInfo
3437 *exception=_exception;
3438
3439 FloatPixelPacket
3440 white,
3441 black,
3442 intensity,
3443 *map;
3444
3445 cl_uint4
3446 *histogram;
3447
3448 PixelPacket
3449 *equalize_map;
3450
3451 register ssize_t
3452 i;
3453
3454 Image * image = inputImage;
3455
3456 MagickBooleanType outputReady;
3457 MagickCLEnv clEnv;
3458
3459 cl_int clStatus;
3460 size_t global_work_size[2];
3461
3462 void *inputPixels;
3463 cl_mem_flags mem_flags;
3464
3465 cl_context context;
3466 cl_mem inputImageBuffer;
3467 cl_mem histogramBuffer;
3468 cl_mem equalizeMapBuffer;
3469 cl_kernel histogramKernel;
3470 cl_kernel equalizeKernel;
3471 cl_command_queue queue;
3472 cl_int colorspace;
3473
3474 void* hostPtr;
3475
3476 MagickSizeType length;
3477
3478 inputPixels = NULL;
3479 inputImageBuffer = NULL;
3480 histogramBuffer = NULL;
3481 histogramKernel = NULL;
3482 equalizeKernel = NULL;
3483 context = NULL;
3484 queue = NULL;
3485 outputReady = MagickFalse;
3486
3487 assert(inputImage != (Image *) NULL);
3488 assert(inputImage->signature == MagickSignature);
3489 if (inputImage->debug != MagickFalse)
3490 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
3491
3492 /*
3493 Allocate and initialize histogram arrays.
3494 */
3495 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
3496 if (histogram == (cl_uint4 *) NULL)
3497 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3498
3499 /* reset histogram */
3500 (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
3501
3502 /*
3503 * initialize opencl env
3504 */
3505 clEnv = GetDefaultOpenCLEnv();
3506 context = GetOpenCLContext(clEnv);
3507 queue = AcquireOpenCLCommandQueue(clEnv);
3508
3509 /* Create and initialize OpenCL buffers. */
3510 /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */
3511 /* assume this will get a writable image */
3512 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3513
3514 if (inputPixels == (void *) NULL)
3515 {
3516 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3517 goto cleanup;
3518 }
3519 /* If the host pointer is aligned to the size of CLPixelPacket,
3520 then use the host buffer directly from the GPU; otherwise,
3521 create a buffer on the GPU and copy the data over */
3522 if (ALIGNED(inputPixels,CLPixelPacket))
3523 {
3524 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3525 }
3526 else
3527 {
3528 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3529 }
3530 /* create a CL buffer from image pixel buffer */
3531 length = inputImage->columns * inputImage->rows;
3532 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3533 if (clStatus != CL_SUCCESS)
3534 {
3535 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3536 goto cleanup;
3537 }
3538
3539 /* If the host pointer is aligned to the size of cl_uint,
3540 then use the host buffer directly from the GPU; otherwise,
3541 create a buffer on the GPU and copy the data over */
3542 if (ALIGNED(histogram,cl_uint4))
3543 {
3544 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3545 hostPtr = histogram;
3546 }
3547 else
3548 {
3549 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3550 hostPtr = histogram;
3551 }
3552 /* create a CL buffer for histogram */
3553 length = (MaxMap+1);
3554 histogramBuffer = clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
3555 if (clStatus != CL_SUCCESS)
3556 {
3557 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3558 goto cleanup;
3559 }
3560
3561 switch (inputImage->colorspace)
3562 {
3563 case RGBColorspace:
3564 colorspace = 1;
3565 break;
3566 case sRGBColorspace:
3567 colorspace = 0;
3568 break;
3569 default:
3570 {
3571 /* something is wrong, as we checked in checkAccelerateCondition */
3572 }
3573 }
3574
3575 /* get the OpenCL kernel */
3576 histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
3577 if (histogramKernel == NULL)
3578 {
3579 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3580 goto cleanup;
3581 }
3582
3583 /* set the kernel arguments */
3584 i = 0;
3585 clStatus=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3586 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
3587 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
3588 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
3589 if (clStatus != CL_SUCCESS)
3590 {
3591 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3592 goto cleanup;
3593 }
3594
3595 /* launch the kernel */
3596 global_work_size[0] = inputImage->columns;
3597 global_work_size[1] = inputImage->rows;
3598
3599 clStatus = clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3600
3601 if (clStatus != CL_SUCCESS)
3602 {
3603 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3604 goto cleanup;
3605 }
3606 clFlush(queue);
3607
3608 /* read from the kenel output */
3609 if (ALIGNED(histogram,cl_uint4))
3610 {
3611 length = (MaxMap+1);
3612 clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
3613 }
3614 else
3615 {
3616 length = (MaxMap+1);
3617 clStatus = clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
3618 }
3619 if (clStatus != CL_SUCCESS)
3620 {
3621 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3622 goto cleanup;
3623 }
3624
3625 /* unmap, don't block gpu to use this buffer again. */
3626 if (ALIGNED(histogram,cl_uint4))
3627 {
3628 clStatus = clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
3629 if (clStatus != CL_SUCCESS)
3630 {
3631 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
3632 goto cleanup;
3633 }
3634 }
3635
3636 if (getenv("TEST")) {
3637 unsigned int i;
3638 for (i=0; i<(MaxMap+1UL); i++)
3639 {
3640 printf("histogram %d: red %d\n", i, histogram[i].s[2]);
3641 printf("histogram %d: green %d\n", i, histogram[i].s[1]);
3642 printf("histogram %d: blue %d\n", i, histogram[i].s[0]);
3643 printf("histogram %d: opacity %d\n", i, histogram[i].s[3]);
3644 }
3645 }
3646
3647 /* cpu stuff */
3648 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
3649 if (equalize_map == (PixelPacket *) NULL)
3650 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3651
3652 map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
3653 if (map == (FloatPixelPacket *) NULL)
3654 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3655
3656 /*
3657 Integrate the histogram to get the equalization map.
3658 */
3659 (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
3660 for (i=0; i <= (ssize_t) MaxMap; i++)
3661 {
3662 if ((channel & SyncChannels) != 0)
3663 {
3664 intensity.red+=histogram[i].s[2];
3665 map[i]=intensity;
3666 continue;
3667 }
3668 if ((channel & RedChannel) != 0)
3669 intensity.red+=histogram[i].s[2];
3670 if ((channel & GreenChannel) != 0)
3671 intensity.green+=histogram[i].s[1];
3672 if ((channel & BlueChannel) != 0)
3673 intensity.blue+=histogram[i].s[0];
3674 if ((channel & OpacityChannel) != 0)
3675 intensity.opacity+=histogram[i].s[3];
3676 if (((channel & IndexChannel) != 0) &&
3677 (image->colorspace == CMYKColorspace))
3678 {
3679 printf("something here\n");
3680 /*intensity.index+=histogram[i].index; */
3681 }
3682 map[i]=intensity;
3683 }
3684 black=map[0];
3685 white=map[(int) MaxMap];
3686 (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
3687 for (i=0; i <= (ssize_t) MaxMap; i++)
3688 {
3689 if ((channel & SyncChannels) != 0)
3690 {
3691 if (white.red != black.red)
3692 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3693 (map[i].red-black.red))/(white.red-black.red)));
3694 continue;
3695 }
3696 if (((channel & RedChannel) != 0) && (white.red != black.red))
3697 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3698 (map[i].red-black.red))/(white.red-black.red)));
3699 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3700 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3701 (map[i].green-black.green))/(white.green-black.green)));
3702 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3703 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3704 (map[i].blue-black.blue))/(white.blue-black.blue)));
3705 if (((channel & OpacityChannel) != 0) && (white.opacity != black.opacity))
3706 equalize_map[i].opacity=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3707 (map[i].opacity-black.opacity))/(white.opacity-black.opacity)));
3708 /*
3709 if ((((channel & IndexChannel) != 0) &&
3710 (image->colorspace == CMYKColorspace)) &&
3711 (white.index != black.index))
3712 equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3713 (map[i].index-black.index))/(white.index-black.index)));
3714 */
3715 }
3716
3717 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
3718 map=(FloatPixelPacket *) RelinquishMagickMemory(map);
3719
3720 if (image->storage_class == PseudoClass)
3721 {
3722 /*
3723 Equalize colormap.
3724 */
3725 for (i=0; i < (ssize_t) image->colors; i++)
3726 {
3727 if ((channel & SyncChannels) != 0)
3728 {
3729 if (white.red != black.red)
3730 {
3731 image->colormap[i].red=equalize_map[
3732 ScaleQuantumToMap(image->colormap[i].red)].red;
3733 image->colormap[i].green=equalize_map[
3734 ScaleQuantumToMap(image->colormap[i].green)].red;
3735 image->colormap[i].blue=equalize_map[
3736 ScaleQuantumToMap(image->colormap[i].blue)].red;
3737 image->colormap[i].opacity=equalize_map[
3738 ScaleQuantumToMap(image->colormap[i].opacity)].red;
3739 }
3740 continue;
3741 }
3742 if (((channel & RedChannel) != 0) && (white.red != black.red))
3743 image->colormap[i].red=equalize_map[
3744 ScaleQuantumToMap(image->colormap[i].red)].red;
3745 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3746 image->colormap[i].green=equalize_map[
3747 ScaleQuantumToMap(image->colormap[i].green)].green;
3748 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3749 image->colormap[i].blue=equalize_map[
3750 ScaleQuantumToMap(image->colormap[i].blue)].blue;
3751 if (((channel & OpacityChannel) != 0) &&
3752 (white.opacity != black.opacity))
3753 image->colormap[i].opacity=equalize_map[
3754 ScaleQuantumToMap(image->colormap[i].opacity)].opacity;
3755 }
3756 }
3757
3758 /*
3759 Equalize image.
3760 */
3761
3762 /* GPU can work on this again, image and equalize map as input
3763 image: uchar4 (CLPixelPacket)
3764 equalize_map: uchar4 (PixelPacket)
3765 black, white: float4 (FloatPixelPacket) */
3766
3767 if (inputImageBuffer!=NULL)
3768 clReleaseMemObject(inputImageBuffer);
3769
3770 /* If the host pointer is aligned to the size of CLPixelPacket,
3771 then use the host buffer directly from the GPU; otherwise,
3772 create a buffer on the GPU and copy the data over */
3773 if (ALIGNED(inputPixels,CLPixelPacket))
3774 {
3775 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3776 }
3777 else
3778 {
3779 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3780 }
3781 /* create a CL buffer from image pixel buffer */
3782 length = inputImage->columns * inputImage->rows;
3783 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3784 if (clStatus != CL_SUCCESS)
3785 {
3786 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3787 goto cleanup;
3788 }
3789
3790 /* Create and initialize OpenCL buffers. */
3791 if (ALIGNED(equalize_map, PixelPacket))
3792 {
3793 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3794 hostPtr = equalize_map;
3795 }
3796 else
3797 {
3798 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3799 hostPtr = equalize_map;
3800 }
3801 /* create a CL buffer for eqaulize_map */
3802 length = (MaxMap+1);
3803 equalizeMapBuffer = clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
3804 if (clStatus != CL_SUCCESS)
3805 {
3806 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3807 goto cleanup;
3808 }
3809
3810 /* get the OpenCL kernel */
3811 equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
3812 if (equalizeKernel == NULL)
3813 {
3814 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3815 goto cleanup;
3816 }
3817
3818 /* set the kernel arguments */
3819 i = 0;
3820 clStatus=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3821 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
3822 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
3823 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
3824 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
3825 if (clStatus != CL_SUCCESS)
3826 {
3827 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3828 goto cleanup;
3829 }
3830
3831 /* launch the kernel */
3832 global_work_size[0] = inputImage->columns;
3833 global_work_size[1] = inputImage->rows;
3834
3835 clStatus = clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3836
3837 if (clStatus != CL_SUCCESS)
3838 {
3839 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3840 goto cleanup;
3841 }
3842 clFlush(queue);
3843
3844 /* read the data back */
3845 if (ALIGNED(inputPixels,CLPixelPacket))
3846 {
3847 length = inputImage->columns * inputImage->rows;
3848 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3849 }
3850 else
3851 {
3852 length = inputImage->columns * inputImage->rows;
3853 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3854 }
3855 if (clStatus != CL_SUCCESS)
3856 {
3857 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3858 goto cleanup;
3859 }
3860
3861 outputReady = MagickTrue;
3862
3863 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
3864
3865cleanup:
3866
3867 if (inputPixels) {
3868 /*ReleasePixelCachePixels();*/
3869 inputPixels = NULL;
3870 }
3871
3872 if (inputImageBuffer!=NULL)
3873 clReleaseMemObject(inputImageBuffer);
3874 if (histogramBuffer!=NULL)
3875 clReleaseMemObject(histogramBuffer);
3876 if (histogramKernel!=NULL)
3877 RelinquishOpenCLKernel(clEnv, histogramKernel);
3878 if (queue != NULL)
3879 RelinquishOpenCLCommandQueue(clEnv, queue);
3880
3881 return outputReady;
3882}
3883
3884/*
3885%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3886% %
3887% %
3888% %
3889% E q u a l i z e I m a g e w i t h O p e n C L %
3890% %
3891% %
3892% %
3893%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3894%
3895% EqualizeImage() applies a histogram equalization to the image.
3896%
3897% The format of the EqualizeImage method is:
3898%
3899% MagickBooleanType EqualizeImage(Image *image)
3900% MagickBooleanType EqualizeImageChannel(Image *image,
3901% const ChannelType channel)
3902%
3903% A description of each parameter follows:
3904%
3905% o image: the image.
3906%
3907% o channel: the channel.
3908%
3909*/
3910
3911
3912MagickExport
3913MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channel, ExceptionInfo* exception)
3914{
3915 MagickBooleanType status;
3916
3917 assert(image != NULL);
3918 assert(exception != NULL);
3919
3920 status = checkOpenCLEnvironment(exception);
3921 if (status == MagickFalse)
3922 return MagickFalse;
3923
3924 status = checkAccelerateCondition(image, channel, exception);
3925 if (status == MagickFalse)
3926 return MagickFalse;
3927
3928 /* ensure this is the only pass get in for now. */
3929 if ((channel & SyncChannels) == 0)
3930 return MagickFalse;
3931
3932 if (image->colorspace != sRGBColorspace)
3933 return MagickFalse;
3934
3935 status = ComputeEqualizeImage(image,channel,exception);
3936 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3937 return status;
3938}
3939
3940
3941static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception)
3942{
3943
3944 MagickBooleanType outputReady = MagickFalse;
3945 MagickCLEnv clEnv = NULL;
3946
3947 cl_int clStatus;
3948 size_t global_work_size[2];
3949
3950 const void *inputPixels = NULL;
3951 Image* filteredImage = NULL;
3952 void *filteredPixels = NULL;
3953 void *hostPtr;
3954 MagickSizeType length;
3955
3956 cl_mem_flags mem_flags;
3957 cl_context context = NULL;
3958 cl_mem inputImageBuffer = NULL;
3959 cl_mem tempImageBuffer[2];
3960 cl_mem filteredImageBuffer = NULL;
3961 cl_command_queue queue = NULL;
3962 cl_kernel hullPass1 = NULL;
3963 cl_kernel hullPass2 = NULL;
3964
3965 unsigned int imageWidth, imageHeight;
3966 int matte;
3967 int k;
3968
3969 static const int
3970 X[4] = {0, 1, 1,-1},
3971 Y[4] = {1, 0, 1, 1};
3972
3973 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
3974 clEnv = GetDefaultOpenCLEnv();
3975 context = GetOpenCLContext(clEnv);
3976 queue = AcquireOpenCLCommandQueue(clEnv);
3977
3978 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3979 if (inputPixels == (void *) NULL)
3980 {
3981 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3982 goto cleanup;
3983 }
3984
3985 if (ALIGNED(inputPixels,CLPixelPacket))
3986 {
3987 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3988 }
3989 else
3990 {
3991 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3992 }
3993 /* create a CL buffer from image pixel buffer */
3994 length = inputImage->columns * inputImage->rows;
3995 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3996 if (clStatus != CL_SUCCESS)
3997 {
3998 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3999 goto cleanup;
4000 }
4001
4002 mem_flags = CL_MEM_READ_WRITE;
4003 length = inputImage->columns * inputImage->rows;
4004 for (k = 0; k < 2; k++)
4005 {
4006 tempImageBuffer[k] = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
4007 if (clStatus != CL_SUCCESS)
4008 {
4009 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4010 goto cleanup;
4011 }
4012 }
4013
4014 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4015 assert(filteredImage != NULL);
4016 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
4017 {
4018 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4019 goto cleanup;
4020 }
4021 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4022 if (filteredPixels == (void *) NULL)
4023 {
4024 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4025 goto cleanup;
4026 }
4027
4028 if (ALIGNED(filteredPixels,CLPixelPacket))
4029 {
4030 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4031 hostPtr = filteredPixels;
4032 }
4033 else
4034 {
4035 mem_flags = CL_MEM_WRITE_ONLY;
4036 hostPtr = NULL;
4037 }
4038 /* create a CL buffer from image pixel buffer */
4039 length = inputImage->columns * inputImage->rows;
4040 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4041 if (clStatus != CL_SUCCESS)
4042 {
4043 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4044 goto cleanup;
4045 }
4046
4047 hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
4048 hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
4049
4050 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer);
4051 clStatus |=clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4052 imageWidth = inputImage->columns;
4053 clStatus |=clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
4054 imageHeight = inputImage->rows;
4055 clStatus |=clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
4056 matte = (inputImage->matte==MagickFalse)?0:1;
4057 clStatus |=clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
4058 if (clStatus != CL_SUCCESS)
4059 {
4060 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4061 goto cleanup;
4062 }
4063
4064 clStatus = clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4065 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
4066 imageWidth = inputImage->columns;
4067 clStatus |=clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
4068 imageHeight = inputImage->rows;
4069 clStatus |=clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
4070 matte = (inputImage->matte==MagickFalse)?0:1;
4071 clStatus |=clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
4072 if (clStatus != CL_SUCCESS)
4073 {
4074 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4075 goto cleanup;
4076 }
4077
4078
4079 global_work_size[0] = inputImage->columns;
4080 global_work_size[1] = inputImage->rows;
4081
4082
4083 for (k = 0; k < 4; k++)
4084 {
4085 cl_int2 offset;
4086 int polarity;
4087
4088
4089 offset.s[0] = X[k];
4090 offset.s[1] = Y[k];
4091 polarity = 1;
4092 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4093 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4094 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4095 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4096 if (clStatus != CL_SUCCESS)
4097 {
4098 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4099 goto cleanup;
4100 }
4101 /* launch the kernel */
4102 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4103 if (clStatus != CL_SUCCESS)
4104 {
4105 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4106 goto cleanup;
4107 }
4108 /* launch the kernel */
4109 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4110 if (clStatus != CL_SUCCESS)
4111 {
4112 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4113 goto cleanup;
4114 }
4115
4116
4117 if (k == 0)
4118 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
4119 offset.s[0] = -X[k];
4120 offset.s[1] = -Y[k];
4121 polarity = 1;
4122 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4123 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4124 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4125 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4126 if (clStatus != CL_SUCCESS)
4127 {
4128 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4129 goto cleanup;
4130 }
4131 /* launch the kernel */
4132 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4133 if (clStatus != CL_SUCCESS)
4134 {
4135 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4136 goto cleanup;
4137 }
4138 /* launch the kernel */
4139 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4140 if (clStatus != CL_SUCCESS)
4141 {
4142 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4143 goto cleanup;
4144 }
4145
4146 offset.s[0] = -X[k];
4147 offset.s[1] = -Y[k];
4148 polarity = -1;
4149 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4150 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4151 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4152 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4153 if (clStatus != CL_SUCCESS)
4154 {
4155 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4156 goto cleanup;
4157 }
4158 /* launch the kernel */
4159 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4160 if (clStatus != CL_SUCCESS)
4161 {
4162 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4163 goto cleanup;
4164 }
4165 /* launch the kernel */
4166 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4167 if (clStatus != CL_SUCCESS)
4168 {
4169 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4170 goto cleanup;
4171 }
4172
4173 offset.s[0] = X[k];
4174 offset.s[1] = Y[k];
4175 polarity = -1;
4176 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4177 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4178 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4179 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4180
4181 if (k == 3)
4182 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
4183
4184 if (clStatus != CL_SUCCESS)
4185 {
4186 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4187 goto cleanup;
4188 }
4189 /* launch the kernel */
4190 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4191 if (clStatus != CL_SUCCESS)
4192 {
4193 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4194 goto cleanup;
4195 }
4196 /* launch the kernel */
4197 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4198 if (clStatus != CL_SUCCESS)
4199 {
4200 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4201 goto cleanup;
4202 }
4203 }
4204
4205 if (ALIGNED(filteredPixels,CLPixelPacket))
4206 {
4207 length = inputImage->columns * inputImage->rows;
4208 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4209 }
4210 else
4211 {
4212 length = inputImage->columns * inputImage->rows;
4213 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4214 }
4215 if (clStatus != CL_SUCCESS)
4216 {
4217 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
4218 goto cleanup;
4219 }
4220
4221 outputReady = MagickTrue;
4222
4223cleanup:
4224 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4225 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4226 for (k = 0; k < 2; k++)
4227 {
4228 if (tempImageBuffer[k]!=NULL) clReleaseMemObject(tempImageBuffer[k]);
4229 }
4230 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4231 if (hullPass1!=NULL) RelinquishOpenCLKernel(clEnv, hullPass1);
4232 if (hullPass2!=NULL) RelinquishOpenCLKernel(clEnv, hullPass2);
4233 if (outputReady == MagickFalse)
4234 {
4235 if (filteredImage != NULL)
4236 {
4237 DestroyImage(filteredImage);
4238 filteredImage = NULL;
4239 }
4240 }
4241 return filteredImage;
4242}
4243
4244/*
4245%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4246% %
4247% %
4248% %
4249% D e s p e c k l e I m a g e w i t h O p e n C L %
4250% %
4251% %
4252% %
4253%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4254%
4255% DespeckleImage() reduces the speckle noise in an image while perserving the
4256% edges of the original image. A speckle removing filter uses a complementary
4257% hulling technique (raising pixels that are darker than their surrounding
4258% neighbors, then complementarily lowering pixels that are brighter than their
4259% surrounding neighbors) to reduce the speckle index of that image (reference
4260% Crimmins speckle removal).
4261%
4262% The format of the DespeckleImage method is:
4263%
4264% Image *DespeckleImage(const Image *image,ExceptionInfo *exception)
4265%
4266% A description of each parameter follows:
4267%
4268% o image: the image.
4269%
4270% o exception: return any errors or warnings in this structure.
4271%
4272*/
4273
4274MagickExport
4275Image* AccelerateDespeckleImage(const Image* image, ExceptionInfo* exception)
4276{
4277 MagickBooleanType status;
4278 Image* newImage = NULL;
4279
4280 assert(image != NULL);
4281 assert(exception != NULL);
4282
4283 status = checkOpenCLEnvironment(exception);
4284 if (status == MagickFalse)
4285 return NULL;
4286
4287 status = checkAccelerateCondition(image, AllChannels, exception);
4288 if (status == MagickFalse)
4289 return NULL;
4290
4291 newImage = ComputeDespeckleImage(image,exception);
4292 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4293 return newImage;
4294}
4295
cristye85d0f72013-11-27 02:25:43 +00004296static Image* ComputeAddNoiseImage(const Image* inputImage,
4297 const ChannelType channel, const NoiseType noise_type,
4298 ExceptionInfo *exception)
4299{
4300 MagickBooleanType outputReady = MagickFalse;
4301 MagickCLEnv clEnv = NULL;
4302
4303 cl_int clStatus;
4304 size_t global_work_size[2];
4305
4306 const void *inputPixels = NULL;
4307 Image* filteredImage = NULL;
4308 void *filteredPixels = NULL;
4309 void *hostPtr;
4310 unsigned int inputColumns, inputRows;
4311 float attenuate;
4312 float *randomNumberBufferPtr = NULL;
4313 MagickSizeType length;
4314 unsigned int numRandomNumberPerPixel;
4315 unsigned int numRowsPerKernelLaunch;
4316 unsigned int numRandomNumberPerBuffer;
4317 unsigned int r;
4318 unsigned int k;
4319 int i;
4320
4321 RandomInfo **restrict random_info;
4322 const char *option;
4323#if defined(MAGICKCORE_OPENMP_SUPPORT)
4324 unsigned long key;
4325#endif
4326
4327 cl_mem_flags mem_flags;
4328 cl_context context = NULL;
4329 cl_mem inputImageBuffer = NULL;
4330 cl_mem randomNumberBuffer = NULL;
4331 cl_mem filteredImageBuffer = NULL;
4332 cl_command_queue queue = NULL;
4333 cl_kernel addNoiseKernel = NULL;
4334
4335
4336 clEnv = GetDefaultOpenCLEnv();
4337 context = GetOpenCLContext(clEnv);
4338 queue = AcquireOpenCLCommandQueue(clEnv);
4339
4340 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4341 if (inputPixels == (void *) NULL)
4342 {
4343 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
4344 goto cleanup;
4345 }
4346
4347 if (ALIGNED(inputPixels,CLPixelPacket))
4348 {
4349 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4350 }
4351 else
4352 {
4353 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4354 }
4355 /* create a CL buffer from image pixel buffer */
4356 length = inputImage->columns * inputImage->rows;
4357 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4358 if (clStatus != CL_SUCCESS)
4359 {
4360 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4361 goto cleanup;
4362 }
4363
4364
4365 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4366 assert(filteredImage != NULL);
4367 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
4368 {
4369 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4370 goto cleanup;
4371 }
4372 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4373 if (filteredPixels == (void *) NULL)
4374 {
4375 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4376 goto cleanup;
4377 }
4378
4379 if (ALIGNED(filteredPixels,CLPixelPacket))
4380 {
4381 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4382 hostPtr = filteredPixels;
4383 }
4384 else
4385 {
4386 mem_flags = CL_MEM_WRITE_ONLY;
4387 hostPtr = NULL;
4388 }
4389 /* create a CL buffer from image pixel buffer */
4390 length = inputImage->columns * inputImage->rows;
4391 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4392 if (clStatus != CL_SUCCESS)
4393 {
4394 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4395 goto cleanup;
4396 }
4397
4398 /* find out how many random numbers needed by pixel */
4399 numRandomNumberPerPixel = 0;
4400 {
4401 unsigned int numRandPerChannel = 0;
4402 switch (noise_type)
4403 {
4404 case UniformNoise:
4405 case ImpulseNoise:
4406 case LaplacianNoise:
4407 case RandomNoise:
4408 default:
4409 numRandPerChannel = 1;
4410 break;
4411 case GaussianNoise:
4412 case MultiplicativeGaussianNoise:
4413 case PoissonNoise:
4414 numRandPerChannel = 2;
4415 break;
4416 };
4417
4418 if ((channel & RedChannel) != 0)
4419 numRandomNumberPerPixel+=numRandPerChannel;
4420 if ((channel & GreenChannel) != 0)
4421 numRandomNumberPerPixel+=numRandPerChannel;
4422 if ((channel & BlueChannel) != 0)
4423 numRandomNumberPerPixel+=numRandPerChannel;
4424 if ((channel & OpacityChannel) != 0)
4425 numRandomNumberPerPixel+=numRandPerChannel;
4426 }
4427
4428 numRowsPerKernelLaunch = 512;
4429 /* create a buffer for random numbers */
4430 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4431 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float)
4432 , NULL, &clStatus);
4433
4434
4435 /* set up the random number generators */
4436 attenuate=1.0;
4437 option=GetImageArtifact(inputImage,"attenuate");
4438 if (option != (char *) NULL)
4439 attenuate=StringToDouble(option,(char **) NULL);
4440 random_info=AcquireRandomInfoThreadSet();
4441#if defined(MAGICKCORE_OPENMP_SUPPORT)
4442 key=GetRandomSecretKey(random_info[0]);
4443#endif
4444
4445 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4446
4447 k = 0;
4448 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4449 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4450 inputColumns = inputImage->columns;
4451 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4452 inputRows = inputImage->rows;
4453 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4454 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4455 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4456 attenuate=1.0f;
4457 option=GetImageArtifact(inputImage,"attenuate");
4458 if (option != (char *) NULL)
4459 attenuate=(float)StringToDouble(option,(char **) NULL);
4460 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4461 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4462 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4463
4464 global_work_size[0] = inputColumns;
4465 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4466 {
4467 /* Generate random numbers in the buffer */
4468 randomNumberBufferPtr = (float*)clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0
4469 , numRandomNumberPerBuffer*sizeof(float), 0, NULL, NULL, &clStatus);
4470 if (clStatus != CL_SUCCESS)
4471 {
4472 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
4473 goto cleanup;
4474 }
4475
4476#if defined(MAGICKCORE_OPENMP_SUPPORT)
4477 #pragma omp parallel for schedule(static,4) \
4478 num_threads((key == ~0UL) == 0 ? 1 : (size_t) GetMagickResourceLimit(ThreadResource))
4479#endif
4480 for (i = 0; i < numRandomNumberPerBuffer; i++)
4481 {
4482 const int id = GetOpenMPThreadId();
4483 randomNumberBufferPtr[i] = (float)GetPseudoRandomValue(random_info[id]);
4484 }
4485
4486 clStatus = clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL);
4487 if (clStatus != CL_SUCCESS)
4488 {
4489 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
4490 goto cleanup;
4491 }
4492
4493 /* set the row offset */
4494 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4495 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4496 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4497 }
4498
4499 if (ALIGNED(filteredPixels,CLPixelPacket))
4500 {
4501 length = inputImage->columns * inputImage->rows;
4502 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4503 }
4504 else
4505 {
4506 length = inputImage->columns * inputImage->rows;
4507 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4508 }
4509 if (clStatus != CL_SUCCESS)
4510 {
4511 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
4512 goto cleanup;
4513 }
4514
4515
4516 outputReady = MagickTrue;
4517cleanup:
4518 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4519 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4520 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4521 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4522 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4523 if (outputReady == MagickFalse
4524 && filteredImage != NULL)
4525 {
4526 DestroyImage(filteredImage);
4527 filteredImage = NULL;
4528 }
4529 return filteredImage;
4530}
4531
4532
4533static Image* ComputeAddNoiseImageOptRandomNum(const Image* inputImage,
4534 const ChannelType channel, const NoiseType noise_type,
4535 ExceptionInfo *exception)
4536{
4537 MagickBooleanType outputReady = MagickFalse;
4538 MagickCLEnv clEnv = NULL;
4539
4540 cl_int clStatus;
4541 size_t global_work_size[2];
4542 size_t random_work_size;
4543
4544 const void *inputPixels = NULL;
4545 Image* filteredImage = NULL;
4546 void *filteredPixels = NULL;
4547 void *hostPtr;
4548 unsigned int inputColumns, inputRows;
4549 float attenuate;
4550 MagickSizeType length;
4551 unsigned int numRandomNumberPerPixel;
4552 unsigned int numRowsPerKernelLaunch;
4553 unsigned int numRandomNumberPerBuffer;
4554 unsigned int numRandomNumberGenerators;
4555 unsigned int initRandom;
4556 float fNormalize;
4557 unsigned int r;
4558 unsigned int k;
4559 int i;
4560 const char *option;
4561
4562 cl_mem_flags mem_flags;
4563 cl_context context = NULL;
4564 cl_mem inputImageBuffer = NULL;
4565 cl_mem randomNumberBuffer = NULL;
4566 cl_mem filteredImageBuffer = NULL;
4567 cl_mem randomNumberSeedsBuffer = NULL;
4568 cl_command_queue queue = NULL;
4569 cl_kernel addNoiseKernel = NULL;
4570 cl_kernel randomNumberGeneratorKernel = NULL;
4571
4572
4573 clEnv = GetDefaultOpenCLEnv();
4574 context = GetOpenCLContext(clEnv);
4575 queue = AcquireOpenCLCommandQueue(clEnv);
4576
4577 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4578 if (inputPixels == (void *) NULL)
4579 {
4580 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
4581 goto cleanup;
4582 }
4583
4584 if (ALIGNED(inputPixels,CLPixelPacket))
4585 {
4586 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4587 }
4588 else
4589 {
4590 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4591 }
4592 /* create a CL buffer from image pixel buffer */
4593 length = inputImage->columns * inputImage->rows;
4594 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4595 if (clStatus != CL_SUCCESS)
4596 {
4597 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4598 goto cleanup;
4599 }
4600
4601
4602 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4603 assert(filteredImage != NULL);
4604 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
4605 {
4606 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4607 goto cleanup;
4608 }
4609 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4610 if (filteredPixels == (void *) NULL)
4611 {
4612 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4613 goto cleanup;
4614 }
4615
4616 if (ALIGNED(filteredPixels,CLPixelPacket))
4617 {
4618 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4619 hostPtr = filteredPixels;
4620 }
4621 else
4622 {
4623 mem_flags = CL_MEM_WRITE_ONLY;
4624 hostPtr = NULL;
4625 }
4626 /* create a CL buffer from image pixel buffer */
4627 length = inputImage->columns * inputImage->rows;
4628 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4629 if (clStatus != CL_SUCCESS)
4630 {
4631 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4632 goto cleanup;
4633 }
4634
4635 /* find out how many random numbers needed by pixel */
4636 numRandomNumberPerPixel = 0;
4637 {
4638 unsigned int numRandPerChannel = 0;
4639 switch (noise_type)
4640 {
4641 case UniformNoise:
4642 case ImpulseNoise:
4643 case LaplacianNoise:
4644 case RandomNoise:
4645 default:
4646 numRandPerChannel = 1;
4647 break;
4648 case GaussianNoise:
4649 case MultiplicativeGaussianNoise:
4650 case PoissonNoise:
4651 numRandPerChannel = 2;
4652 break;
4653 };
4654
4655 if ((channel & RedChannel) != 0)
4656 numRandomNumberPerPixel+=numRandPerChannel;
4657 if ((channel & GreenChannel) != 0)
4658 numRandomNumberPerPixel+=numRandPerChannel;
4659 if ((channel & BlueChannel) != 0)
4660 numRandomNumberPerPixel+=numRandPerChannel;
4661 if ((channel & OpacityChannel) != 0)
4662 numRandomNumberPerPixel+=numRandPerChannel;
4663 }
4664
4665 numRowsPerKernelLaunch = 512;
4666
4667 /* create a buffer for random numbers */
4668 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4669 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float)
4670 , NULL, &clStatus);
4671
4672 {
4673 /* setup the random number generators */
4674 unsigned long* seeds;
4675 numRandomNumberGenerators = 512;
4676 randomNumberSeedsBuffer = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE
4677 , numRandomNumberGenerators * 4 * sizeof(unsigned long), NULL, &clStatus);
4678 if (clStatus != CL_SUCCESS)
4679 {
4680 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4681 goto cleanup;
4682 }
4683 seeds = (unsigned long*) clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0
4684 , numRandomNumberGenerators*4*sizeof(unsigned long), 0, NULL, NULL, &clStatus);
4685 if (clStatus != CL_SUCCESS)
4686 {
4687 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
4688 goto cleanup;
4689 }
4690
4691 for (i = 0; i < numRandomNumberGenerators; i++) {
4692 RandomInfo* randomInfo = AcquireRandomInfo();
4693 const unsigned long* s = GetRandomInfoSeed(randomInfo);
4694
4695 if (i == 0)
4696 fNormalize = GetRandomInfoNormalize(randomInfo);
4697
4698 seeds[i*4] = s[0];
4699 randomInfo = DestroyRandomInfo(randomInfo);
4700 }
4701
4702 clStatus = clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL);
4703 if (clStatus != CL_SUCCESS)
4704 {
4705 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
4706 goto cleanup;
4707 }
4708
4709 randomNumberGeneratorKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE
4710 ,"randomNumberGeneratorKernel");
4711
4712 k = 0;
4713 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer);
4714 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize);
4715 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4716 initRandom = 1;
4717 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom);
4718 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer);
4719
4720 random_work_size = numRandomNumberGenerators;
4721 }
4722
4723 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4724 k = 0;
4725 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4726 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4727 inputColumns = inputImage->columns;
4728 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4729 inputRows = inputImage->rows;
4730 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4731 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4732 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4733 attenuate=1.0f;
4734 option=GetImageArtifact(inputImage,"attenuate");
4735 if (option != (char *) NULL)
4736 attenuate=(float)StringToDouble(option,(char **) NULL);
4737 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4738 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4739 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4740
4741 global_work_size[0] = inputColumns;
4742 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4743 {
4744 size_t generator_local_size = 64;
4745 /* Generate random numbers in the buffer */
4746 clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL
4747 ,&random_work_size,&generator_local_size,0,NULL,NULL);
4748 if (initRandom != 0)
4749 {
4750 /* make sure we only do init once */
4751 initRandom = 0;
4752 clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom);
4753 }
4754
4755 /* set the row offset */
4756 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4757 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4758 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4759 }
4760
4761 if (ALIGNED(filteredPixels,CLPixelPacket))
4762 {
4763 length = inputImage->columns * inputImage->rows;
4764 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4765 }
4766 else
4767 {
4768 length = inputImage->columns * inputImage->rows;
4769 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4770 }
4771 if (clStatus != CL_SUCCESS)
4772 {
4773 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
4774 goto cleanup;
4775 }
4776
4777
4778 outputReady = MagickTrue;
4779cleanup:
4780 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4781 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4782 if (randomNumberGeneratorKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomNumberGeneratorKernel);
4783 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4784 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4785 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4786 if (randomNumberSeedsBuffer!=NULL) clReleaseMemObject(randomNumberSeedsBuffer);
4787 if (outputReady == MagickFalse
4788 && filteredImage != NULL)
4789 {
4790 DestroyImage(filteredImage);
4791 filteredImage = NULL;
4792 }
4793 return filteredImage;
4794}
4795
4796
4797
4798MagickExport
4799Image* AccelerateAddNoiseImage(const Image *image, const ChannelType channel,
4800 const NoiseType noise_type,ExceptionInfo *exception)
4801{
4802 MagickBooleanType status;
4803 Image* filteredImage = NULL;
4804
4805 assert(image != NULL);
4806 assert(exception != NULL);
4807
4808 status = checkOpenCLEnvironment(exception);
4809 if (status == MagickFalse)
4810 return NULL;
4811
4812 status = checkAccelerateCondition(image, channel, exception);
4813 if (status == MagickFalse)
4814 return NULL;
4815
4816 if (sizeof(unsigned long) == 4)
4817 filteredImage = ComputeAddNoiseImageOptRandomNum(image,channel,noise_type,exception);
4818 else
4819 filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception);
4820
4821 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4822 return filteredImage;
4823}
4824
4825
cristyf034abb2013-11-24 14:16:14 +00004826#else /* MAGICKCORE_OPENCL_SUPPORT */
4827
4828MagickExport Image *AccelerateConvolveImageChannel(
4829 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4830 const KernelInfo *magick_unused(kernel),
4831 ExceptionInfo *magick_unused(exception))
4832{
4833 magick_unreferenced(image);
4834 magick_unreferenced(channel);
4835 magick_unreferenced(kernel);
4836 magick_unreferenced(exception);
4837
4838 return NULL;
4839}
4840
4841MagickExport MagickBooleanType AccelerateFunctionImage(
4842 Image *magick_unused(image),const ChannelType magick_unused(channel),
4843 const MagickFunction magick_unused(function),
4844 const size_t magick_unused(number_parameters),
4845 const double *magick_unused(parameters),
4846 ExceptionInfo *magick_unused(exception))
4847{
4848 magick_unreferenced(image);
4849 magick_unreferenced(channel);
4850 magick_unreferenced(function);
4851 magick_unreferenced(number_parameters);
4852 magick_unreferenced(parameters);
4853 magick_unreferenced(exception);
4854
4855 return MagickFalse;
4856}
4857
4858MagickExport Image *AccelerateBlurImage(const Image *magick_unused(image),
4859 const ChannelType magick_unused(channel),const double magick_unused(radius),
4860 const double magick_unused(sigma),ExceptionInfo *magick_unused(exception))
4861{
4862 magick_unreferenced(image);
4863 magick_unreferenced(channel);
4864 magick_unreferenced(radius);
4865 magick_unreferenced(sigma);
4866 magick_unreferenced(exception);
4867
4868 return NULL;
4869}
4870
4871MagickExport Image *AccelerateRadialBlurImage(
4872 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4873 const double magick_unused(angle),ExceptionInfo *magick_unused(exception))
4874{
4875 magick_unreferenced(image);
4876 magick_unreferenced(channel);
4877 magick_unreferenced(angle);
4878 magick_unreferenced(exception);
4879
4880 return NULL;
4881}
4882
4883
4884MagickExport Image *AccelerateUnsharpMaskImage(
4885 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4886 const double magick_unused(radius),const double magick_unused(sigma),
4887 const double magick_unused(gain),const double magick_unused(threshold),
4888 ExceptionInfo *magick_unused(exception))
4889{
4890 magick_unreferenced(image);
4891 magick_unreferenced(channel);
4892 magick_unreferenced(radius);
4893 magick_unreferenced(sigma);
4894 magick_unreferenced(gain);
4895 magick_unreferenced(threshold);
4896 magick_unreferenced(exception);
4897
4898 return NULL;
4899}
4900
4901
4902MagickExport MagickBooleanType AccelerateContrastImage(
4903 Image* magick_unused(image),const MagickBooleanType magick_unused(sharpen),
4904 ExceptionInfo* magick_unused(exception))
4905{
4906 magick_unreferenced(image);
4907 magick_unreferenced(sharpen);
4908 magick_unreferenced(exception);
4909
4910 return MagickFalse;
4911}
4912
4913MagickExport MagickBooleanType AccelerateEqualizeImage(
4914 Image* magick_unused(image), const ChannelType magick_unused(channel),
4915 ExceptionInfo* magick_unused(exception))
4916{
4917 magick_unreferenced(image);
4918 magick_unreferenced(channel);
4919 magick_unreferenced(exception);
4920
4921 return MagickFalse;
4922}
4923
4924MagickExport Image *AccelerateDespeckleImage(const Image* magick_unused(image),
4925 ExceptionInfo* magick_unused(exception))
4926{
4927 magick_unreferenced(image);
4928 magick_unreferenced(exception);
4929
4930 return NULL;
4931}
4932
4933MagickExport Image *AccelerateResizeImage(const Image* magick_unused(image),
4934 const size_t magick_unused(resizedColumns),
4935 const size_t magick_unused(resizedRows),
4936 const ResizeFilter* magick_unused(resizeFilter),
4937 ExceptionInfo *magick_unused(exception))
4938{
4939 magick_unreferenced(image);
4940 magick_unreferenced(resizedColumns);
4941 magick_unreferenced(resizedRows);
4942 magick_unreferenced(resizeFilter);
4943 magick_unreferenced(exception);
4944
4945 return NULL;
4946}
4947
4948
4949MagickExport
4950MagickBooleanType AccelerateModulateImage(
4951 Image* image, double percent_brightness, double percent_hue,
4952 double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
4953{
4954 magick_unreferenced(image);
4955 magick_unreferenced(percent_brightness);
4956 magick_unreferenced(percent_hue);
4957 magick_unreferenced(percent_saturation);
4958 magick_unreferenced(colorspace);
4959 magick_unreferenced(exception);
4960 return(MagickFalse);
4961}
4962
cristye85d0f72013-11-27 02:25:43 +00004963MagickExport Image *AccelerateAddNoiseImage(const Image *image,
4964 const ChannelType channel, const NoiseType noise_type,ExceptionInfo *exception)
4965{
4966 magick_unreferenced(image);
4967 magick_unreferenced(channel);
4968 magick_unreferenced(noise_type);
4969 magick_unreferenced(exception);
4970 return NULL;
4971}
cristyf034abb2013-11-24 14:16:14 +00004972
4973#endif /* MAGICKCORE_OPENCL_SUPPORT */
4974
4975MagickExport MagickBooleanType AccelerateConvolveImage(
4976 const Image *magick_unused(image),const KernelInfo *magick_unused(kernel),
4977 Image *magick_unused(convolve_image),ExceptionInfo *magick_unused(exception))
4978{
4979 magick_unreferenced(image);
4980 magick_unreferenced(kernel);
4981 magick_unreferenced(convolve_image);
4982 magick_unreferenced(exception);
4983
4984 /* legacy, do not use */
4985 return(MagickFalse);
4986}
4987