blob: 6b5de7bc5c46273357fb54069b381e65d4cb984e [file] [log] [blame]
cristy3f6d1482010-01-20 21:01:21 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7% A A C C E L E R R A A T E %
8% AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9% A A C C E L E R R A A T E %
10% A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11% %
12% %
13% MagickCore Acceleration Methods %
14% %
15% Software Design %
cristy0d127ab2010-05-14 23:29:46 +000016% John Cristy %
cristyf034abb2013-11-24 14:16:14 +000017% SiuChi Chan %
18% Guansong Zhang %
cristy3f6d1482010-01-20 21:01:21 +000019% January 2010 %
20% %
21% %
cristyfe676ee2013-11-18 13:03:38 +000022% Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization %
cristy3f6d1482010-01-20 21:01:21 +000023% dedicated to making software imaging solutions freely available. %
24% %
25% You may not use this file except in compliance with the License. You may %
26% obtain a copy of the License at %
27% %
28% http://www.imagemagick.org/script/license.php %
29% %
30% Unless required by applicable law or agreed to in writing, software %
31% distributed under the License is distributed on an "AS IS" BASIS, %
32% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
33% See the License for the specific language governing permissions and %
34% limitations under the License. %
35% %
36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cristy3f6d1482010-01-20 21:01:21 +000037*/
cristyf034abb2013-11-24 14:16:14 +000038
cristy3f6d1482010-01-20 21:01:21 +000039/*
cristyf034abb2013-11-24 14:16:14 +000040Include declarations.
cristy3f6d1482010-01-20 21:01:21 +000041*/
cristy4c08aed2011-07-01 19:47:50 +000042#include "MagickCore/studio.h"
43#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000044#include "MagickCore/accelerate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000045#include "MagickCore/artifact.h"
cristy35f33492011-07-07 16:54:49 +000046#include "MagickCore/cache.h"
cristyd1dd6e42011-09-04 01:46:08 +000047#include "MagickCore/cache-private.h"
cristy4c08aed2011-07-01 19:47:50 +000048#include "MagickCore/cache-view.h"
49#include "MagickCore/color-private.h"
cristy7f866842013-07-11 01:15:58 +000050#include "MagickCore/delegate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000051#include "MagickCore/enhance.h"
52#include "MagickCore/exception.h"
53#include "MagickCore/exception-private.h"
54#include "MagickCore/gem.h"
55#include "MagickCore/hashmap.h"
56#include "MagickCore/image.h"
57#include "MagickCore/image-private.h"
58#include "MagickCore/list.h"
59#include "MagickCore/memory_.h"
60#include "MagickCore/monitor-private.h"
61#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000062#include "MagickCore/opencl.h"
63#include "MagickCore/opencl-private.h"
cristy4c08aed2011-07-01 19:47:50 +000064#include "MagickCore/option.h"
cristyf034abb2013-11-24 14:16:14 +000065#include "MagickCore/pixel-private.h"
cristy4c08aed2011-07-01 19:47:50 +000066#include "MagickCore/prepress.h"
67#include "MagickCore/quantize.h"
68#include "MagickCore/registry.h"
cristyf034abb2013-11-24 14:16:14 +000069#include "MagickCore/resize.h"
70#include "MagickCore/resize-private.h"
cristy4c08aed2011-07-01 19:47:50 +000071#include "MagickCore/semaphore.h"
72#include "MagickCore/splay-tree.h"
73#include "MagickCore/statistic.h"
74#include "MagickCore/string_.h"
75#include "MagickCore/string-private.h"
76#include "MagickCore/token.h"
cristyf034abb2013-11-24 14:16:14 +000077
78#ifdef MAGICKCORE_CLPERFMARKER
79#include "CLPerfMarker.h"
80#endif
81
82#if defined(MAGICKCORE_OPENCL_SUPPORT)
83
84#define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0)
85/*#define ALIGNED(pointer,type) (0) */
86
87static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
88{
89 MagickBooleanType flag;
90
91 MagickCLEnv clEnv;
92 clEnv = GetDefaultOpenCLEnv();
93
94 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
95 , sizeof(MagickBooleanType), &flag, exception);
96 if (flag == MagickTrue)
97 return MagickFalse;
98
99 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED
100 , sizeof(MagickBooleanType), &flag, exception);
101 if (flag == MagickFalse)
102 {
103 if(InitOpenCLEnv(clEnv, exception) == MagickFalse)
104 return MagickFalse;
105 }
106
107 return MagickTrue;
108}
109
110
111static MagickBooleanType checkAccelerateCondition(const Image* image, const ChannelType channel, ExceptionInfo *exception)
112{
113 /* check if the image's colorspace is supported */
114 if (image->colorspace != RGBColorspace
115 && image->colorspace != sRGBColorspace)
116 return MagickFalse;
117
118 /* check if the channel is supported */
119 if (((channel&RedChannel) == 0)
120 || ((channel&GreenChannel) == 0)
121 || ((channel&BlueChannel) == 0))
122 {
123 return MagickFalse;
124 }
125
126
127 /* check if if the virtual pixel method is compatible with the OpenCL implementation */
128 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod)&&
129 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
130 return MagickFalse;
131
132 return MagickTrue;
133}
134
135
136static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
137{
138 MagickBooleanType outputReady;
139 MagickCLEnv clEnv;
140
141 cl_int clStatus;
142 size_t global_work_size[2];
143 size_t localGroupSize[2];
144 size_t localMemoryRequirement;
145 Image* filteredImage;
146 MagickSizeType length;
147 const void *inputPixels;
148 void *filteredPixels;
149 cl_mem_flags mem_flags;
150 float* kernelBufferPtr;
151 unsigned kernelSize;
152 unsigned int i;
153 void *hostPtr;
154 unsigned int matte, filterWidth, filterHeight, imageWidth, imageHeight;
155
156 cl_context context;
157 cl_kernel clkernel;
158 cl_mem inputImageBuffer, filteredImageBuffer, convolutionKernel;
159 cl_ulong deviceLocalMemorySize;
160 cl_device_id device;
161
162 cl_command_queue queue;
163
164 /* intialize all CL objects to NULL */
165 context = NULL;
166 inputImageBuffer = NULL;
167 filteredImageBuffer = NULL;
168 convolutionKernel = NULL;
169 clkernel = NULL;
170 queue = NULL;
171 device = NULL;
172
173 filteredImage = NULL;
174 outputReady = MagickFalse;
175
176 clEnv = GetDefaultOpenCLEnv();
177 context = GetOpenCLContext(clEnv);
178
179 inputPixels = NULL;
180 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
181 if (inputPixels == (const void *) NULL)
182 {
183 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
184 goto cleanup;
185 }
186
187 /* Create and initialize OpenCL buffers. */
188
189 /* If the host pointer is aligned to the size of CLPixelPacket,
190 then use the host buffer directly from the GPU; otherwise,
191 create a buffer on the GPU and copy the data over */
192 if (ALIGNED(inputPixels,CLPixelPacket))
193 {
194 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
195 }
196 else
197 {
198 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
199 }
200 /* create a CL buffer from image pixel buffer */
201 length = inputImage->columns * inputImage->rows;
202 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
203 if (clStatus != CL_SUCCESS)
204 {
205 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
206 goto cleanup;
207 }
208
209 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
210 assert(filteredImage != NULL);
211 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
212 {
213 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
214 goto cleanup;
215 }
216 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
217 if (filteredPixels == (void *) NULL)
218 {
219 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
220 goto cleanup;
221 }
222
223 if (ALIGNED(filteredPixels,CLPixelPacket))
224 {
225 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
226 hostPtr = filteredPixels;
227 }
228 else
229 {
230 mem_flags = CL_MEM_WRITE_ONLY;
231 hostPtr = NULL;
232 }
233 /* create a CL buffer from image pixel buffer */
234 length = inputImage->columns * inputImage->rows;
235 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
236 if (clStatus != CL_SUCCESS)
237 {
238 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
239 goto cleanup;
240 }
241
242 kernelSize = kernel->width * kernel->height;
243 convolutionKernel = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
244 if (clStatus != CL_SUCCESS)
245 {
246 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
247 goto cleanup;
248 }
249
250 queue = AcquireOpenCLCommandQueue(clEnv);
251
252 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
253 , 0, NULL, NULL, &clStatus);
254 if (clStatus != CL_SUCCESS)
255 {
256 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
257 goto cleanup;
258 }
259 for (i = 0; i < kernelSize; i++)
260 {
261 kernelBufferPtr[i] = (float) kernel->values[i];
262 }
263 clStatus = clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
264 if (clStatus != CL_SUCCESS)
265 {
266 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
267 goto cleanup;
268 }
269 clFlush(queue);
270
271 /* Compute the local memory requirement for a 16x16 workgroup.
272 If it's larger than 16k, reduce the workgroup size to 8x8 */
273 localGroupSize[0] = 16;
274 localGroupSize[1] = 16;
275 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
276 + kernel->width*kernel->height*sizeof(float);
277 if (localMemoryRequirement > 16384)
278 {
279
280
281 localGroupSize[0] = 8;
282 localGroupSize[1] = 8;
283
284 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
285 + kernel->width*kernel->height*sizeof(float);
286 }
287
288 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_DEVICE, sizeof(cl_device_id), &device, exception);
289 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &deviceLocalMemorySize, NULL);
290 if (localMemoryRequirement <= deviceLocalMemorySize)
291 {
292 /* get the OpenCL kernel */
293 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
294 if (clkernel == NULL)
295 {
296 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
297 goto cleanup;
298 }
299
300 /* set the kernel arguments */
301 i = 0;
302 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
303 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
304 imageWidth = inputImage->columns;
305 imageHeight = inputImage->rows;
306 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
307 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
308 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
309 filterWidth = kernel->width;
310 filterHeight = kernel->height;
311 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
312 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
313 matte = (inputImage->matte==MagickTrue)?1:0;
314 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
315 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
316 clStatus|=clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
317 clStatus|=clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
318 if (clStatus != CL_SUCCESS)
319 {
320 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
321 goto cleanup;
322 }
323
324 /* pad the global size to a multiple of the local work size dimension */
325 global_work_size[0] = ((inputImage->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
326 global_work_size[1] = ((inputImage->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
327
328 /* launch the kernel */
329 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
330 if (clStatus != CL_SUCCESS)
331 {
332 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
333 goto cleanup;
334 }
335 }
336 else
337 {
338 /* get the OpenCL kernel */
339 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
340 if (clkernel == NULL)
341 {
342 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
343 goto cleanup;
344 }
345
346 /* set the kernel arguments */
347 i = 0;
348 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
349 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
350 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
351 filterWidth = kernel->width;
352 filterHeight = kernel->height;
353 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
354 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
355 matte = (inputImage->matte==MagickTrue)?1:0;
356 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
357 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
358 if (clStatus != CL_SUCCESS)
359 {
360 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
361 goto cleanup;
362 }
363
364 global_work_size[0] = inputImage->columns;
365 global_work_size[1] = inputImage->rows;
366
367 /* launch the kernel */
368 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
369 if (clStatus != CL_SUCCESS)
370 {
371 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
372 goto cleanup;
373 }
374 }
375 clFlush(queue);
376
377 if (ALIGNED(filteredPixels,CLPixelPacket))
378 {
379 length = inputImage->columns * inputImage->rows;
380 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
381 }
382 else
383 {
384 length = inputImage->columns * inputImage->rows;
385 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
386 }
387 if (clStatus != CL_SUCCESS)
388 {
389 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
390 goto cleanup;
391 }
392
393 /* everything is fine! :) */
394 outputReady = MagickTrue;
395
396
397cleanup:
398
399 if (inputImageBuffer != NULL)
400 clReleaseMemObject(inputImageBuffer);
401
402 if (filteredImageBuffer != NULL)
403 clReleaseMemObject(filteredImageBuffer);
404
405 if (convolutionKernel != NULL)
406 clReleaseMemObject(convolutionKernel);
407
408 if (clkernel != NULL)
409 RelinquishOpenCLKernel(clEnv, clkernel);
410
411 if (queue != NULL)
412 RelinquishOpenCLCommandQueue(clEnv, queue);
413
414 if (outputReady == MagickFalse)
415 {
416 if (filteredImage != NULL)
417 {
418 DestroyImage(filteredImage);
419 filteredImage = NULL;
420 }
421 }
422
423 return filteredImage;
424}
425
cristy3f6d1482010-01-20 21:01:21 +0000426/*
427%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
428% %
429% %
430% %
cristyf034abb2013-11-24 14:16:14 +0000431% C o n v o l v e I m a g e w i t h O p e n C L %
cristy3f6d1482010-01-20 21:01:21 +0000432% %
433% %
434% %
435%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
436%
cristyf034abb2013-11-24 14:16:14 +0000437% ConvolveImage() applies a custom convolution kernel to the image.
cristy3f6d1482010-01-20 21:01:21 +0000438%
cristyf034abb2013-11-24 14:16:14 +0000439% The format of the ConvolveImage method is:
cristy3f6d1482010-01-20 21:01:21 +0000440%
cristyf034abb2013-11-24 14:16:14 +0000441% Image *ConvolveImage(const Image *image,const size_t order,
442% const double *kernel,ExceptionInfo *exception)
443% Image *ConvolveImageChannel(const Image *image,const ChannelType channel,
444% const size_t order,const double *kernel,ExceptionInfo *exception)
445%
446% A description of each parameter follows:
447%
448% o image: the image.
449%
450% o channel: the channel type.
451%
452% o kernel: kernel info.
453%
454% o exception: return any errors or warnings in this structure.
455%
456*/
457
458MagickExport Image* AccelerateConvolveImageChannel(const Image *image, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
459{
460 MagickBooleanType status;
461 Image* filteredImage = NULL;
462
463 assert(image != NULL);
464 assert(kernel != (KernelInfo *) NULL);
465 assert(exception != (ExceptionInfo *) NULL);
466
467 status = checkOpenCLEnvironment(exception);
468 if (status == MagickFalse)
469 return NULL;
470
471 status = checkAccelerateCondition(image, channel, exception);
472 if (status == MagickFalse)
473 return NULL;
474
475 filteredImage = ComputeConvolveImage(image, channel, kernel, exception);
476 OpenCLLogException(__FUNCTION__,__LINE__,exception);
477 return filteredImage;
478}
479
480static MagickBooleanType ComputeFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
481 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
482{
483 MagickBooleanType status;
484
485 MagickCLEnv clEnv;
486
487 MagickSizeType length;
488 void* pixels;
489 float* parametersBufferPtr;
490
491 cl_int clStatus;
492 cl_context context;
493 cl_kernel clkernel;
494 cl_command_queue queue;
495 cl_mem_flags mem_flags;
496 cl_mem imageBuffer;
497 cl_mem parametersBuffer;
498 size_t globalWorkSize[2];
499
500 unsigned int i;
501
502 status = MagickFalse;
503
504 context = NULL;
505 clkernel = NULL;
506 queue = NULL;
507 imageBuffer = NULL;
508 parametersBuffer = NULL;
509
510 clEnv = GetDefaultOpenCLEnv();
511 context = GetOpenCLContext(clEnv);
512
513 pixels = GetPixelCachePixels(image, &length, exception);
514 if (pixels == (void *) NULL)
515 {
516 (void) ThrowMagickException(exception, GetMagickModule(), CacheWarning,
517 "GetPixelCachePixels failed.",
518 "'%s'", image->filename);
519 goto cleanup;
520 }
521
522
523 if (ALIGNED(pixels,CLPixelPacket))
524 {
525 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
526 }
527 else
528 {
529 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
530 }
531 /* create a CL buffer from image pixel buffer */
532 length = image->columns * image->rows;
533 imageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
534 if (clStatus != CL_SUCCESS)
535 {
536 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
537 goto cleanup;
538 }
539
540 parametersBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
541 if (clStatus != CL_SUCCESS)
542 {
543 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
544 goto cleanup;
545 }
546
547 queue = AcquireOpenCLCommandQueue(clEnv);
548
549 parametersBufferPtr = (float*)clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
550 , 0, NULL, NULL, &clStatus);
551 if (clStatus != CL_SUCCESS)
552 {
553 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
554 goto cleanup;
555 }
556 for (i = 0; i < number_parameters; i++)
557 {
558 parametersBufferPtr[i] = (float)parameters[i];
559 }
560 clStatus = clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
561 if (clStatus != CL_SUCCESS)
562 {
563 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
564 goto cleanup;
565 }
566 clFlush(queue);
567
568 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage");
569 if (clkernel == NULL)
570 {
571 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
572 goto cleanup;
573 }
574
575 /* set the kernel arguments */
576 i = 0;
577 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
578 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
579 clStatus|=clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
580 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
581 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
582 if (clStatus != CL_SUCCESS)
583 {
584 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
585 goto cleanup;
586 }
587
588 globalWorkSize[0] = image->columns;
589 globalWorkSize[1] = image->rows;
590 /* launch the kernel */
591 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
592 if (clStatus != CL_SUCCESS)
593 {
594 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
595 goto cleanup;
596 }
597 clFlush(queue);
598
599
600 if (ALIGNED(pixels,CLPixelPacket))
601 {
602 length = image->columns * image->rows;
603 clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
604 }
605 else
606 {
607 length = image->columns * image->rows;
608 clStatus = clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
609 }
610 if (clStatus != CL_SUCCESS)
611 {
612 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
613 goto cleanup;
614 }
615 status = MagickTrue;
616
617cleanup:
618
619 if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
620 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
621 if (imageBuffer != NULL) clReleaseMemObject(imageBuffer);
622 if (parametersBuffer != NULL) clReleaseMemObject(parametersBuffer);
623
624 return status;
625}
626
627
628
629MagickExport MagickBooleanType
630 AccelerateFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
631 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
632{
633 MagickBooleanType status;
634
635 status = MagickFalse;
636
637 assert(image != NULL);
638 assert(exception != (ExceptionInfo *) NULL);
639
640 status = checkOpenCLEnvironment(exception);
641 if (status == MagickTrue)
642 {
643 status = checkAccelerateCondition(image, channel, exception);
644 if (status == MagickTrue)
645 {
646 status = ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
647 OpenCLLogException(__FUNCTION__,__LINE__,exception);
648 }
649 }
650 return status;
651}
652
653
654static MagickBooleanType splitImage(const Image* inputImage)
655{
656 MagickBooleanType split;
657
658 MagickCLEnv clEnv;
659 unsigned long allocSize;
660 unsigned long tempSize;
661
662 clEnv = GetDefaultOpenCLEnv();
663
664 allocSize = GetOpenCLDeviceMaxMemAllocSize(clEnv);
665 tempSize = inputImage->columns * inputImage->rows * 4 * 4;
666
667 /*
668 printf("alloc size: %lu\n", allocSize);
669 printf("temp size: %lu\n", tempSize);
670 */
671
672 split = ((tempSize > allocSize) ? MagickTrue:MagickFalse);
673
674 return split;
675}
676
677static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
678{
679 MagickBooleanType outputReady;
680 Image* filteredImage;
681 MagickCLEnv clEnv;
682
683 cl_int clStatus;
684
685 const void *inputPixels;
686 void *filteredPixels;
687 cl_mem_flags mem_flags;
688
689 cl_context context;
690 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
691 cl_kernel blurRowKernel, blurColumnKernel;
692 cl_command_queue queue;
693
694 void* hostPtr;
695 float* kernelBufferPtr;
696 MagickSizeType length;
697
698 char geometry[MaxTextExtent];
699 KernelInfo* kernel = NULL;
700 unsigned int kernelWidth;
701 unsigned int imageColumns, imageRows;
702
703 unsigned int i;
704
705 context = NULL;
706 filteredImage = NULL;
707 inputImageBuffer = NULL;
708 tempImageBuffer = NULL;
709 filteredImageBuffer = NULL;
710 imageKernelBuffer = NULL;
711 blurRowKernel = NULL;
712 blurColumnKernel = NULL;
713 queue = NULL;
714
715 outputReady = MagickFalse;
716
717 clEnv = GetDefaultOpenCLEnv();
718 context = GetOpenCLContext(clEnv);
719 queue = AcquireOpenCLCommandQueue(clEnv);
720
721 /* Create and initialize OpenCL buffers. */
722 {
723 inputPixels = NULL;
724 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
725 if (inputPixels == (const void *) NULL)
726 {
727 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
728 goto cleanup;
729 }
730 /* If the host pointer is aligned to the size of CLPixelPacket,
731 then use the host buffer directly from the GPU; otherwise,
732 create a buffer on the GPU and copy the data over */
733 if (ALIGNED(inputPixels,CLPixelPacket))
734 {
735 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
736 }
737 else
738 {
739 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
740 }
741 /* create a CL buffer from image pixel buffer */
742 length = inputImage->columns * inputImage->rows;
743 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
744 if (clStatus != CL_SUCCESS)
745 {
746 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
747 goto cleanup;
748 }
749 }
750
751 /* create output */
752 {
753 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
754 assert(filteredImage != NULL);
755 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
756 {
757 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
758 goto cleanup;
759 }
760 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
761 if (filteredPixels == (void *) NULL)
762 {
763 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
764 goto cleanup;
765 }
766
767 if (ALIGNED(filteredPixels,CLPixelPacket))
768 {
769 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
770 hostPtr = filteredPixels;
771 }
772 else
773 {
774 mem_flags = CL_MEM_WRITE_ONLY;
775 hostPtr = NULL;
776 }
777 /* create a CL buffer from image pixel buffer */
778 length = inputImage->columns * inputImage->rows;
779 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
780 if (clStatus != CL_SUCCESS)
781 {
782 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
783 goto cleanup;
784 }
785 }
786
787 /* create processing kernel */
788 {
789 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
790 kernel=AcquireKernelInfo(geometry);
791 if (kernel == (KernelInfo *) NULL)
792 {
793 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
794 goto cleanup;
795 }
796
797 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
798 if (clStatus != CL_SUCCESS)
799 {
800 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
801 goto cleanup;
802 }
803 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
804 if (clStatus != CL_SUCCESS)
805 {
806 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
807 goto cleanup;
808 }
809
810 for (i = 0; i < kernel->width; i++)
811 {
812 kernelBufferPtr[i] = (float) kernel->values[i];
813 }
814
815 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
816 if (clStatus != CL_SUCCESS)
817 {
818 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
819 goto cleanup;
820 }
821 }
822
823 {
824
825 /* create temp buffer */
826 {
827 length = inputImage->columns * inputImage->rows;
828 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
829 if (clStatus != CL_SUCCESS)
830 {
831 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
832 goto cleanup;
833 }
834 }
835
836 /* get the OpenCL kernels */
837 {
838 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
839 if (blurRowKernel == NULL)
840 {
841 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
842 goto cleanup;
843 };
844
845 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
846 if (blurColumnKernel == NULL)
847 {
848 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
849 goto cleanup;
850 };
851 }
852
853 {
854 /* need logic to decide this value */
855 int chunkSize = 256;
856
857 {
858 imageColumns = inputImage->columns;
859 imageRows = inputImage->rows;
860
861 /* set the kernel arguments */
862 i = 0;
863 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
864 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
865 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
866 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
867 kernelWidth = kernel->width;
868 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
869 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
870 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
871 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
872 if (clStatus != CL_SUCCESS)
873 {
874 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
875 goto cleanup;
876 }
877 }
878
879 /* launch the kernel */
880 {
881 size_t gsize[2];
882 size_t wsize[2];
883
884 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
885 gsize[1] = inputImage->rows;
886 wsize[0] = chunkSize;
887 wsize[1] = 1;
888
889 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
890 if (clStatus != CL_SUCCESS)
891 {
892 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
893 goto cleanup;
894 }
895 clFlush(queue);
896 }
897 }
898
899 {
900 /* need logic to decide this value */
901 int chunkSize = 256;
902
903 {
904 imageColumns = inputImage->columns;
905 imageRows = inputImage->rows;
906
907 /* set the kernel arguments */
908 i = 0;
909 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
910 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
911 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
912 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
913 kernelWidth = kernel->width;
914 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
915 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
916 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
917 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
918 if (clStatus != CL_SUCCESS)
919 {
920 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
921 goto cleanup;
922 }
923 }
924
925 /* launch the kernel */
926 {
927 size_t gsize[2];
928 size_t wsize[2];
929
930 gsize[0] = inputImage->columns;
931 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
932 wsize[0] = 1;
933 wsize[1] = chunkSize;
934
935 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
936 if (clStatus != CL_SUCCESS)
937 {
938 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
939 goto cleanup;
940 }
941 clFlush(queue);
942 }
943 }
944
945 }
946
947 /* get result */
948 if (ALIGNED(filteredPixels,CLPixelPacket))
949 {
950 length = inputImage->columns * inputImage->rows;
951 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
952 }
953 else
954 {
955 length = inputImage->columns * inputImage->rows;
956 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
957 }
958 if (clStatus != CL_SUCCESS)
959 {
960 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
961 goto cleanup;
962 }
963
964 outputReady = MagickTrue;
965
966cleanup:
967 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
968 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
969 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
970 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
971 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
972 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
973 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
974 if (kernel!=NULL) DestroyKernelInfo(kernel);
975 if (outputReady == MagickFalse)
976 {
977 if (filteredImage != NULL)
978 {
979 DestroyImage(filteredImage);
980 filteredImage = NULL;
981 }
982 }
983 return filteredImage;
984}
985
986static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
987{
988 MagickBooleanType outputReady;
989 Image* filteredImage;
990 MagickCLEnv clEnv;
991
992 cl_int clStatus;
993
994 const void *inputPixels;
995 void *filteredPixels;
996 cl_mem_flags mem_flags;
997
998 cl_context context;
999 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
1000 cl_kernel blurRowKernel, blurColumnKernel;
1001 cl_command_queue queue;
1002
1003 void* hostPtr;
1004 float* kernelBufferPtr;
1005 MagickSizeType length;
1006
1007 char geometry[MaxTextExtent];
1008 KernelInfo* kernel = NULL;
1009 unsigned int kernelWidth;
1010 unsigned int imageColumns, imageRows;
1011
1012 unsigned int i;
1013
1014 context = NULL;
1015 filteredImage = NULL;
1016 inputImageBuffer = NULL;
1017 tempImageBuffer = NULL;
1018 filteredImageBuffer = NULL;
1019 imageKernelBuffer = NULL;
1020 blurRowKernel = NULL;
1021 blurColumnKernel = NULL;
1022 queue = NULL;
1023
1024 outputReady = MagickFalse;
1025
1026 clEnv = GetDefaultOpenCLEnv();
1027 context = GetOpenCLContext(clEnv);
1028 queue = AcquireOpenCLCommandQueue(clEnv);
1029
1030 /* Create and initialize OpenCL buffers. */
1031 {
1032 inputPixels = NULL;
1033 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1034 if (inputPixels == (const void *) NULL)
1035 {
1036 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1037 goto cleanup;
1038 }
1039 /* If the host pointer is aligned to the size of CLPixelPacket,
1040 then use the host buffer directly from the GPU; otherwise,
1041 create a buffer on the GPU and copy the data over */
1042 if (ALIGNED(inputPixels,CLPixelPacket))
1043 {
1044 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1045 }
1046 else
1047 {
1048 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1049 }
1050 /* create a CL buffer from image pixel buffer */
1051 length = inputImage->columns * inputImage->rows;
1052 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1053 if (clStatus != CL_SUCCESS)
1054 {
1055 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1056 goto cleanup;
1057 }
1058 }
1059
1060 /* create output */
1061 {
1062 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1063 assert(filteredImage != NULL);
1064 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
1065 {
1066 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1067 goto cleanup;
1068 }
1069 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1070 if (filteredPixels == (void *) NULL)
1071 {
1072 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1073 goto cleanup;
1074 }
1075
1076 if (ALIGNED(filteredPixels,CLPixelPacket))
1077 {
1078 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1079 hostPtr = filteredPixels;
1080 }
1081 else
1082 {
1083 mem_flags = CL_MEM_WRITE_ONLY;
1084 hostPtr = NULL;
1085 }
1086 /* create a CL buffer from image pixel buffer */
1087 length = inputImage->columns * inputImage->rows;
1088 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1089 if (clStatus != CL_SUCCESS)
1090 {
1091 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1092 goto cleanup;
1093 }
1094 }
1095
1096 /* create processing kernel */
1097 {
1098 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1099 kernel=AcquireKernelInfo(geometry);
1100 if (kernel == (KernelInfo *) NULL)
1101 {
1102 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
1103 goto cleanup;
1104 }
1105
1106 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
1107 if (clStatus != CL_SUCCESS)
1108 {
1109 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1110 goto cleanup;
1111 }
1112 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1113 if (clStatus != CL_SUCCESS)
1114 {
1115 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
1116 goto cleanup;
1117 }
1118
1119 for (i = 0; i < kernel->width; i++)
1120 {
1121 kernelBufferPtr[i] = (float) kernel->values[i];
1122 }
1123
1124 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1125 if (clStatus != CL_SUCCESS)
1126 {
1127 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1128 goto cleanup;
1129 }
1130 }
1131
1132 {
1133 unsigned int offsetRows;
1134 unsigned int sec;
1135
1136 /* create temp buffer */
1137 {
1138 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
1139 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1140 if (clStatus != CL_SUCCESS)
1141 {
1142 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1143 goto cleanup;
1144 }
1145 }
1146
1147 /* get the OpenCL kernels */
1148 {
1149 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
1150 if (blurRowKernel == NULL)
1151 {
1152 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1153 goto cleanup;
1154 };
1155
1156 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumnSection");
1157 if (blurColumnKernel == NULL)
1158 {
1159 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1160 goto cleanup;
1161 };
1162 }
1163
1164 for (sec = 0; sec < 2; sec++)
1165 {
1166 {
1167 /* need logic to decide this value */
1168 int chunkSize = 256;
1169
1170 {
1171 imageColumns = inputImage->columns;
1172 if (sec == 0)
1173 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
1174 else
1175 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
1176
1177 offsetRows = sec * inputImage->rows / 2;
1178
1179 kernelWidth = kernel->width;
1180
1181 /* set the kernel arguments */
1182 i = 0;
1183 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1184 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1185 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1186 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1187 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1188 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1189 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1190 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1191 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1192 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
1193 if (clStatus != CL_SUCCESS)
1194 {
1195 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1196 goto cleanup;
1197 }
1198 }
1199
1200 /* launch the kernel */
1201 {
1202 size_t gsize[2];
1203 size_t wsize[2];
1204
1205 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
1206 gsize[1] = imageRows;
1207 wsize[0] = chunkSize;
1208 wsize[1] = 1;
1209
1210 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1211 if (clStatus != CL_SUCCESS)
1212 {
1213 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1214 goto cleanup;
1215 }
1216 clFlush(queue);
1217 }
1218 }
1219
1220 {
1221 /* need logic to decide this value */
1222 int chunkSize = 256;
1223
1224 {
1225 imageColumns = inputImage->columns;
1226 if (sec == 0)
1227 imageRows = inputImage->rows / 2;
1228 else
1229 imageRows = (inputImage->rows - inputImage->rows / 2);
1230
1231 offsetRows = sec * inputImage->rows / 2;
1232
1233 kernelWidth = kernel->width;
1234
1235 /* set the kernel arguments */
1236 i = 0;
1237 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1238 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1239 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1240 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1241 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1242 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1243 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1244 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
1245 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1246 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
1247 if (clStatus != CL_SUCCESS)
1248 {
1249 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1250 goto cleanup;
1251 }
1252 }
1253
1254 /* launch the kernel */
1255 {
1256 size_t gsize[2];
1257 size_t wsize[2];
1258
1259 gsize[0] = imageColumns;
1260 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
1261 wsize[0] = 1;
1262 wsize[1] = chunkSize;
1263
1264 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1265 if (clStatus != CL_SUCCESS)
1266 {
1267 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1268 goto cleanup;
1269 }
1270 clFlush(queue);
1271 }
1272 }
1273 }
1274
1275 }
1276
1277 /* get result */
1278 if (ALIGNED(filteredPixels,CLPixelPacket))
1279 {
1280 length = inputImage->columns * inputImage->rows;
1281 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1282 }
1283 else
1284 {
1285 length = inputImage->columns * inputImage->rows;
1286 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1287 }
1288 if (clStatus != CL_SUCCESS)
1289 {
1290 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1291 goto cleanup;
1292 }
1293
1294 outputReady = MagickTrue;
1295
1296cleanup:
1297 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1298 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1299 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1300 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1301 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1302 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1303 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1304 if (kernel!=NULL) DestroyKernelInfo(kernel);
1305 if (outputReady == MagickFalse)
1306 {
1307 if (filteredImage != NULL)
1308 {
1309 DestroyImage(filteredImage);
1310 filteredImage = NULL;
1311 }
1312 }
1313 return filteredImage;
1314}
1315
1316/*
1317%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1318% %
1319% %
1320% %
1321% B l u r I m a g e w i t h O p e n C L %
1322% %
1323% %
1324% %
1325%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1326%
1327% BlurImage() blurs an image. We convolve the image with a Gaussian operator
1328% of the given radius and standard deviation (sigma). For reasonable results,
1329% the radius should be larger than sigma. Use a radius of 0 and BlurImage()
1330% selects a suitable radius for you.
1331%
1332% The format of the BlurImage method is:
1333%
1334% Image *BlurImage(const Image *image,const double radius,
1335% const double sigma,ExceptionInfo *exception)
1336% Image *BlurImageChannel(const Image *image,const ChannelType channel,
1337% const double radius,const double sigma,ExceptionInfo *exception)
1338%
1339% A description of each parameter follows:
1340%
1341% o image: the image.
1342%
1343% o channel: the channel type.
1344%
1345% o radius: the radius of the Gaussian, in pixels, not counting the center
1346% pixel.
1347%
1348% o sigma: the standard deviation of the Gaussian, in pixels.
1349%
1350% o exception: return any errors or warnings in this structure.
1351%
1352*/
1353
1354MagickExport
1355Image* AccelerateBlurImage(const Image *image, const ChannelType channel, const double radius, const double sigma,ExceptionInfo *exception)
1356{
1357 MagickBooleanType status;
1358 Image* filteredImage = NULL;
1359
1360 assert(image != NULL);
1361 assert(exception != (ExceptionInfo *) NULL);
1362
1363 status = checkOpenCLEnvironment(exception);
1364 if (status == MagickFalse)
1365 return NULL;
1366
1367 status = checkAccelerateCondition(image, channel, exception);
1368 if (status == MagickFalse)
1369 return NULL;
1370
1371 if (splitImage(image) && (image->rows / 2 > radius))
1372 filteredImage = ComputeBlurImageSection(image, channel, radius, sigma, exception);
1373 else
1374 filteredImage = ComputeBlurImage(image, channel, radius, sigma, exception);
1375
1376 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1377 return filteredImage;
1378}
1379
1380
1381static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType channel, const double angle, ExceptionInfo *exception)
1382{
1383
1384 MagickBooleanType outputReady;
1385 Image* filteredImage;
1386 MagickCLEnv clEnv;
1387
1388 cl_int clStatus;
1389 size_t global_work_size[2];
1390
1391 cl_context context;
1392 cl_mem_flags mem_flags;
1393 cl_mem inputImageBuffer, filteredImageBuffer, sinThetaBuffer, cosThetaBuffer;
1394 cl_kernel radialBlurKernel;
1395 cl_command_queue queue;
1396
1397 const void *inputPixels;
1398 void *filteredPixels;
1399 void* hostPtr;
1400 float* sinThetaPtr;
1401 float* cosThetaPtr;
1402 MagickSizeType length;
1403 unsigned int matte;
1404 MagickPixelPacket bias;
1405 cl_float4 biasPixel;
1406 cl_float2 blurCenter;
1407 float blurRadius;
1408 unsigned int cossin_theta_size;
1409 float offset, theta;
1410
1411 unsigned int i;
1412
1413 outputReady = MagickFalse;
1414 context = NULL;
1415 filteredImage = NULL;
1416 inputImageBuffer = NULL;
1417 filteredImageBuffer = NULL;
1418 sinThetaBuffer = NULL;
1419 cosThetaBuffer = NULL;
1420 queue = NULL;
1421 radialBlurKernel = NULL;
1422
1423
1424 clEnv = GetDefaultOpenCLEnv();
1425 context = GetOpenCLContext(clEnv);
1426
1427
1428 /* Create and initialize OpenCL buffers. */
1429
1430 inputPixels = NULL;
1431 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1432 if (inputPixels == (const void *) NULL)
1433 {
1434 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1435 goto cleanup;
1436 }
1437
1438 /* If the host pointer is aligned to the size of CLPixelPacket,
1439 then use the host buffer directly from the GPU; otherwise,
1440 create a buffer on the GPU and copy the data over */
1441 if (ALIGNED(inputPixels,CLPixelPacket))
1442 {
1443 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1444 }
1445 else
1446 {
1447 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1448 }
1449 /* create a CL buffer from image pixel buffer */
1450 length = inputImage->columns * inputImage->rows;
1451 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1452 if (clStatus != CL_SUCCESS)
1453 {
1454 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1455 goto cleanup;
1456 }
1457
1458
1459 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1460 assert(filteredImage != NULL);
1461 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
1462 {
1463 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1464 goto cleanup;
1465 }
1466 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1467 if (filteredPixels == (void *) NULL)
1468 {
1469 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1470 goto cleanup;
1471 }
1472
1473 if (ALIGNED(filteredPixels,CLPixelPacket))
1474 {
1475 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1476 hostPtr = filteredPixels;
1477 }
1478 else
1479 {
1480 mem_flags = CL_MEM_WRITE_ONLY;
1481 hostPtr = NULL;
1482 }
1483 /* create a CL buffer from image pixel buffer */
1484 length = inputImage->columns * inputImage->rows;
1485 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1486 if (clStatus != CL_SUCCESS)
1487 {
1488 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1489 goto cleanup;
1490 }
1491
1492 blurCenter.s[0] = (float) (inputImage->columns-1)/2.0;
1493 blurCenter.s[1] = (float) (inputImage->rows-1)/2.0;
1494 blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
1495 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
1496
1497 /* create a buffer for sin_theta and cos_theta */
1498 sinThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1499 if (clStatus != CL_SUCCESS)
1500 {
1501 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1502 goto cleanup;
1503 }
1504 cosThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1505 if (clStatus != CL_SUCCESS)
1506 {
1507 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1508 goto cleanup;
1509 }
1510
1511
1512 queue = AcquireOpenCLCommandQueue(clEnv);
1513 sinThetaPtr = (float*) clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1514 if (clStatus != CL_SUCCESS)
1515 {
1516 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1517 goto cleanup;
1518 }
1519
1520 cosThetaPtr = (float*) clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1521 if (clStatus != CL_SUCCESS)
1522 {
1523 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1524 goto cleanup;
1525 }
1526
1527 theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
1528 offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
1529 for (i=0; i < (ssize_t) cossin_theta_size; i++)
1530 {
1531 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
1532 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
1533 }
1534
1535 clStatus = clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
1536 clStatus |= clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
1537 if (clStatus != CL_SUCCESS)
1538 {
1539 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1540 goto cleanup;
1541 }
1542
1543 /* get the OpenCL kernel */
1544 radialBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RadialBlur");
1545 if (radialBlurKernel == NULL)
1546 {
1547 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1548 goto cleanup;
1549 }
1550
1551
1552 /* set the kernel arguments */
1553 i = 0;
1554 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1555 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1556
1557 GetMagickPixelPacket(inputImage,&bias);
1558 biasPixel.s[0] = bias.red;
1559 biasPixel.s[1] = bias.green;
1560 biasPixel.s[2] = bias.blue;
1561 biasPixel.s[3] = bias.opacity;
1562 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
1563 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
1564
1565 matte = (inputImage->matte == MagickTrue)?1:0;
1566 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
1567
1568 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
1569
1570 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
1571 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
1572 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
1573 if (clStatus != CL_SUCCESS)
1574 {
1575 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1576 goto cleanup;
1577 }
1578
1579
1580 global_work_size[0] = inputImage->columns;
1581 global_work_size[1] = inputImage->rows;
1582 /* launch the kernel */
1583 clStatus = clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
1584 if (clStatus != CL_SUCCESS)
1585 {
1586 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1587 goto cleanup;
1588 }
1589 clFlush(queue);
1590
1591 if (ALIGNED(filteredPixels,CLPixelPacket))
1592 {
1593 length = inputImage->columns * inputImage->rows;
1594 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1595 }
1596 else
1597 {
1598 length = inputImage->columns * inputImage->rows;
1599 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1600 }
1601 if (clStatus != CL_SUCCESS)
1602 {
1603 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1604 goto cleanup;
1605 }
1606 outputReady = MagickTrue;
1607
1608cleanup:
1609 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1610 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1611 if (sinThetaBuffer!=NULL) clReleaseMemObject(sinThetaBuffer);
1612 if (cosThetaBuffer!=NULL) clReleaseMemObject(cosThetaBuffer);
1613 if (radialBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, radialBlurKernel);
1614 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1615 if (outputReady == MagickFalse)
1616 {
1617 if (filteredImage != NULL)
1618 {
1619 DestroyImage(filteredImage);
1620 filteredImage = NULL;
1621 }
1622 }
1623 return filteredImage;
1624}
1625
1626/*
1627%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1628% %
1629% %
1630% %
1631% R a d i a l B l u r I m a g e w i t h O p e n C L %
1632% %
1633% %
1634% %
1635%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1636%
1637% RadialBlurImage() applies a radial blur to the image.
1638%
1639% Andrew Protano contributed this effect.
1640%
1641% The format of the RadialBlurImage method is:
1642%
1643% Image *RadialBlurImage(const Image *image,const double angle,
1644% ExceptionInfo *exception)
1645% Image *RadialBlurImageChannel(const Image *image,const ChannelType channel,
1646% const double angle,ExceptionInfo *exception)
1647%
1648% A description of each parameter follows:
1649%
1650% o image: the image.
1651%
1652% o channel: the channel type.
1653%
1654% o angle: the angle of the radial blur.
1655%
1656% o exception: return any errors or warnings in this structure.
1657%
1658*/
1659
1660MagickExport
1661Image* AccelerateRadialBlurImage(const Image *image, const ChannelType channel, const double angle, ExceptionInfo *exception)
1662{
1663 MagickBooleanType status;
1664 Image* filteredImage;
1665
1666
1667 assert(image != NULL);
1668 assert(exception != NULL);
1669
1670 status = checkOpenCLEnvironment(exception);
1671 if (status == MagickFalse)
1672 return NULL;
1673
1674 status = checkAccelerateCondition(image, channel, exception);
1675 if (status == MagickFalse)
1676 return NULL;
1677
1678 filteredImage = ComputeRadialBlurImage(image, channel, angle, exception);
1679 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1680 return filteredImage;
1681}
1682
1683
1684
1685static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1686 const double gain,const double threshold,ExceptionInfo *exception)
1687{
1688 MagickBooleanType outputReady = MagickFalse;
1689 Image* filteredImage = NULL;
1690 MagickCLEnv clEnv = NULL;
1691
1692 cl_int clStatus;
1693
1694 const void *inputPixels;
1695 void *filteredPixels;
1696 cl_mem_flags mem_flags;
1697
1698 KernelInfo *kernel = NULL;
1699 char geometry[MaxTextExtent];
1700
1701 cl_context context = NULL;
1702 cl_mem inputImageBuffer = NULL;
1703 cl_mem filteredImageBuffer = NULL;
1704 cl_mem tempImageBuffer = NULL;
1705 cl_mem imageKernelBuffer = NULL;
1706 cl_kernel blurRowKernel = NULL;
1707 cl_kernel unsharpMaskBlurColumnKernel = NULL;
1708 cl_command_queue queue = NULL;
1709
1710 void* hostPtr;
1711 float* kernelBufferPtr;
1712 MagickSizeType length;
1713 unsigned int kernelWidth;
1714 float fGain;
1715 float fThreshold;
1716 unsigned int imageColumns, imageRows;
1717 int chunkSize;
1718 unsigned int i;
1719
1720 clEnv = GetDefaultOpenCLEnv();
1721 context = GetOpenCLContext(clEnv);
1722 queue = AcquireOpenCLCommandQueue(clEnv);
1723
1724 /* Create and initialize OpenCL buffers. */
1725 {
1726 inputPixels = NULL;
1727 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1728 if (inputPixels == (const void *) NULL)
1729 {
1730 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1731 goto cleanup;
1732 }
1733
1734 /* If the host pointer is aligned to the size of CLPixelPacket,
1735 then use the host buffer directly from the GPU; otherwise,
1736 create a buffer on the GPU and copy the data over */
1737 if (ALIGNED(inputPixels,CLPixelPacket))
1738 {
1739 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1740 }
1741 else
1742 {
1743 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1744 }
1745 /* create a CL buffer from image pixel buffer */
1746 length = inputImage->columns * inputImage->rows;
1747 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1748 if (clStatus != CL_SUCCESS)
1749 {
1750 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1751 goto cleanup;
1752 }
1753 }
1754
1755 /* create output */
1756 {
1757 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1758 assert(filteredImage != NULL);
1759 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
1760 {
1761 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1762 goto cleanup;
1763 }
1764 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1765 if (filteredPixels == (void *) NULL)
1766 {
1767 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1768 goto cleanup;
1769 }
1770
1771 if (ALIGNED(filteredPixels,CLPixelPacket))
1772 {
1773 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1774 hostPtr = filteredPixels;
1775 }
1776 else
1777 {
1778 mem_flags = CL_MEM_WRITE_ONLY;
1779 hostPtr = NULL;
1780 }
1781
1782 /* create a CL buffer from image pixel buffer */
1783 length = inputImage->columns * inputImage->rows;
1784 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1785 if (clStatus != CL_SUCCESS)
1786 {
1787 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1788 goto cleanup;
1789 }
1790 }
1791
1792 /* create the blur kernel */
1793 {
1794 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1795 kernel=AcquireKernelInfo(geometry);
1796 if (kernel == (KernelInfo *) NULL)
1797 {
1798 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
1799 goto cleanup;
1800 }
1801
1802 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
1803 if (clStatus != CL_SUCCESS)
1804 {
1805 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1806 goto cleanup;
1807 }
1808
1809
1810 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1811 if (clStatus != CL_SUCCESS)
1812 {
1813 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
1814 goto cleanup;
1815 }
1816 for (i = 0; i < kernel->width; i++)
1817 {
1818 kernelBufferPtr[i] = (float) kernel->values[i];
1819 }
1820 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1821 if (clStatus != CL_SUCCESS)
1822 {
1823 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1824 goto cleanup;
1825 }
1826 }
1827
1828 {
1829 /* create temp buffer */
1830 {
1831 length = inputImage->columns * inputImage->rows;
1832 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1833 if (clStatus != CL_SUCCESS)
1834 {
1835 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1836 goto cleanup;
1837 }
1838 }
1839
1840 /* get the opencl kernel */
1841 {
1842 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
1843 if (blurRowKernel == NULL)
1844 {
1845 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1846 goto cleanup;
1847 };
1848
1849 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
1850 if (unsharpMaskBlurColumnKernel == NULL)
1851 {
1852 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1853 goto cleanup;
1854 };
1855 }
1856
1857 {
1858 chunkSize = 256;
1859
1860 imageColumns = inputImage->columns;
1861 imageRows = inputImage->rows;
1862
1863 kernelWidth = kernel->width;
1864
1865 /* set the kernel arguments */
1866 i = 0;
1867 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1868 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1869 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1870 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1871 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1872 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1873 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1874 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1875 if (clStatus != CL_SUCCESS)
1876 {
1877 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1878 goto cleanup;
1879 }
1880 }
1881
1882 /* launch the kernel */
1883 {
1884 size_t gsize[2];
1885 size_t wsize[2];
1886
1887 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
1888 gsize[1] = inputImage->rows;
1889 wsize[0] = chunkSize;
1890 wsize[1] = 1;
1891
1892 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1893 if (clStatus != CL_SUCCESS)
1894 {
1895 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1896 goto cleanup;
1897 }
1898 clFlush(queue);
1899 }
1900
1901
1902 {
1903 chunkSize = 256;
1904 imageColumns = inputImage->columns;
1905 imageRows = inputImage->rows;
1906 kernelWidth = kernel->width;
1907 fGain = (float)gain;
1908 fThreshold = (float)threshold;
1909
1910 i = 0;
1911 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1912 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1913 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1914 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1915 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1916 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
1917 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
1918 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
1919 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1920 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1921 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
1922 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
1923
1924 if (clStatus != CL_SUCCESS)
1925 {
1926 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1927 goto cleanup;
1928 }
1929 }
1930
1931 /* launch the kernel */
1932 {
1933 size_t gsize[2];
1934 size_t wsize[2];
1935
1936 gsize[0] = inputImage->columns;
1937 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
1938 wsize[0] = 1;
1939 wsize[1] = chunkSize;
1940
1941 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1942 if (clStatus != CL_SUCCESS)
1943 {
1944 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1945 goto cleanup;
1946 }
1947 clFlush(queue);
1948 }
1949
1950 }
1951
1952 /* get result */
1953 if (ALIGNED(filteredPixels,CLPixelPacket))
1954 {
1955 length = inputImage->columns * inputImage->rows;
1956 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1957 }
1958 else
1959 {
1960 length = inputImage->columns * inputImage->rows;
1961 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1962 }
1963 if (clStatus != CL_SUCCESS)
1964 {
1965 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1966 goto cleanup;
1967 }
1968
1969 outputReady = MagickTrue;
1970
1971cleanup:
1972 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
1973 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1974 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1975 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1976 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1977 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1978 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
1979 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1980 if (outputReady == MagickFalse)
1981 {
1982 if (filteredImage != NULL)
1983 {
1984 DestroyImage(filteredImage);
1985 filteredImage = NULL;
1986 }
1987 }
1988 return filteredImage;
1989}
1990
1991
1992static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1993 const double gain,const double threshold,ExceptionInfo *exception)
1994{
1995 MagickBooleanType outputReady = MagickFalse;
1996 Image* filteredImage = NULL;
1997 MagickCLEnv clEnv = NULL;
1998
1999 cl_int clStatus;
2000
2001 const void *inputPixels;
2002 void *filteredPixels;
2003 cl_mem_flags mem_flags;
2004
2005 KernelInfo *kernel = NULL;
2006 char geometry[MaxTextExtent];
2007
2008 cl_context context = NULL;
2009 cl_mem inputImageBuffer = NULL;
2010 cl_mem filteredImageBuffer = NULL;
2011 cl_mem tempImageBuffer = NULL;
2012 cl_mem imageKernelBuffer = NULL;
2013 cl_kernel blurRowKernel = NULL;
2014 cl_kernel unsharpMaskBlurColumnKernel = NULL;
2015 cl_command_queue queue = NULL;
2016
2017 void* hostPtr;
2018 float* kernelBufferPtr;
2019 MagickSizeType length;
2020 unsigned int kernelWidth;
2021 float fGain;
2022 float fThreshold;
2023 unsigned int imageColumns, imageRows;
2024 int chunkSize;
2025 unsigned int i;
2026
2027 clEnv = GetDefaultOpenCLEnv();
2028 context = GetOpenCLContext(clEnv);
2029 queue = AcquireOpenCLCommandQueue(clEnv);
2030
2031 /* Create and initialize OpenCL buffers. */
2032 {
2033 inputPixels = NULL;
2034 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2035 if (inputPixels == (const void *) NULL)
2036 {
2037 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
2038 goto cleanup;
2039 }
2040
2041 /* If the host pointer is aligned to the size of CLPixelPacket,
2042 then use the host buffer directly from the GPU; otherwise,
2043 create a buffer on the GPU and copy the data over */
2044 if (ALIGNED(inputPixels,CLPixelPacket))
2045 {
2046 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2047 }
2048 else
2049 {
2050 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2051 }
2052 /* create a CL buffer from image pixel buffer */
2053 length = inputImage->columns * inputImage->rows;
2054 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2055 if (clStatus != CL_SUCCESS)
2056 {
2057 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2058 goto cleanup;
2059 }
2060 }
2061
2062 /* create output */
2063 {
2064 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
2065 assert(filteredImage != NULL);
2066 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
2067 {
2068 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2069 goto cleanup;
2070 }
2071 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2072 if (filteredPixels == (void *) NULL)
2073 {
2074 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2075 goto cleanup;
2076 }
2077
2078 if (ALIGNED(filteredPixels,CLPixelPacket))
2079 {
2080 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2081 hostPtr = filteredPixels;
2082 }
2083 else
2084 {
2085 mem_flags = CL_MEM_WRITE_ONLY;
2086 hostPtr = NULL;
2087 }
2088
2089 /* create a CL buffer from image pixel buffer */
2090 length = inputImage->columns * inputImage->rows;
2091 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2092 if (clStatus != CL_SUCCESS)
2093 {
2094 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2095 goto cleanup;
2096 }
2097 }
2098
2099 /* create the blur kernel */
2100 {
2101 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2102 kernel=AcquireKernelInfo(geometry);
2103 if (kernel == (KernelInfo *) NULL)
2104 {
2105 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
2106 goto cleanup;
2107 }
2108
2109 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2110 if (clStatus != CL_SUCCESS)
2111 {
2112 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2113 goto cleanup;
2114 }
2115
2116
2117 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2118 if (clStatus != CL_SUCCESS)
2119 {
2120 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
2121 goto cleanup;
2122 }
2123 for (i = 0; i < kernel->width; i++)
2124 {
2125 kernelBufferPtr[i] = (float) kernel->values[i];
2126 }
2127 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2128 if (clStatus != CL_SUCCESS)
2129 {
2130 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
2131 goto cleanup;
2132 }
2133 }
2134
2135 {
2136 unsigned int offsetRows;
2137 unsigned int sec;
2138
2139 /* create temp buffer */
2140 {
2141 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
2142 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
2143 if (clStatus != CL_SUCCESS)
2144 {
2145 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2146 goto cleanup;
2147 }
2148 }
2149
2150 /* get the opencl kernel */
2151 {
2152 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
2153 if (blurRowKernel == NULL)
2154 {
2155 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2156 goto cleanup;
2157 };
2158
2159 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
2160 if (unsharpMaskBlurColumnKernel == NULL)
2161 {
2162 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2163 goto cleanup;
2164 };
2165 }
2166
2167 for (sec = 0; sec < 2; sec++)
2168 {
2169 {
2170 chunkSize = 256;
2171
2172 imageColumns = inputImage->columns;
2173 if (sec == 0)
2174 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2175 else
2176 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
2177
2178 offsetRows = sec * inputImage->rows / 2;
2179
2180 kernelWidth = kernel->width;
2181
2182 /* set the kernel arguments */
2183 i = 0;
2184 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2185 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2186 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
2187 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2188 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2189 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2190 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2191 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
2192 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2193 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
2194 if (clStatus != CL_SUCCESS)
2195 {
2196 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2197 goto cleanup;
2198 }
2199 }
2200 /* launch the kernel */
2201 {
2202 size_t gsize[2];
2203 size_t wsize[2];
2204
2205 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
2206 gsize[1] = imageRows;
2207 wsize[0] = chunkSize;
2208 wsize[1] = 1;
2209
2210 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2211 if (clStatus != CL_SUCCESS)
2212 {
2213 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2214 goto cleanup;
2215 }
2216 clFlush(queue);
2217 }
2218
2219
2220 {
2221 chunkSize = 256;
2222
2223 imageColumns = inputImage->columns;
2224 if (sec == 0)
2225 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2226 else
2227 imageRows = (inputImage->rows - inputImage->rows / 2);
2228
2229 offsetRows = sec * inputImage->rows / 2;
2230
2231 kernelWidth = kernel->width;
2232
2233 fGain = (float)gain;
2234 fThreshold = (float)threshold;
2235
2236 i = 0;
2237 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2238 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2239 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2240 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2241 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2242 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
2243 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
2244 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
2245 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2246 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2247 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
2248 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
2249 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2250 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
2251
2252 if (clStatus != CL_SUCCESS)
2253 {
2254 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2255 goto cleanup;
2256 }
2257 }
2258
2259 /* launch the kernel */
2260 {
2261 size_t gsize[2];
2262 size_t wsize[2];
2263
2264 gsize[0] = imageColumns;
2265 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
2266 wsize[0] = 1;
2267 wsize[1] = chunkSize;
2268
2269 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2270 if (clStatus != CL_SUCCESS)
2271 {
2272 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2273 goto cleanup;
2274 }
2275 clFlush(queue);
2276 }
2277 }
2278 }
2279
2280 /* get result */
2281 if (ALIGNED(filteredPixels,CLPixelPacket))
2282 {
2283 length = inputImage->columns * inputImage->rows;
2284 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2285 }
2286 else
2287 {
2288 length = inputImage->columns * inputImage->rows;
2289 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2290 }
2291 if (clStatus != CL_SUCCESS)
2292 {
2293 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
2294 goto cleanup;
2295 }
2296
2297 outputReady = MagickTrue;
2298
2299cleanup:
2300 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
2301 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2302 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2303 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2304 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
2305 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
2306 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
2307 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2308 if (outputReady == MagickFalse)
2309 {
2310 if (filteredImage != NULL)
2311 {
2312 DestroyImage(filteredImage);
2313 filteredImage = NULL;
2314 }
2315 }
2316 return filteredImage;
2317}
2318
2319
2320/*
2321%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2322% %
2323% %
2324% %
2325% U n s h a r p M a s k I m a g e w i t h O p e n C L %
2326% %
2327% %
2328% %
2329%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2330%
2331% UnsharpMaskImage() sharpens one or more image channels. We convolve the
2332% image with a Gaussian operator of the given radius and standard deviation
2333% (sigma). For reasonable results, radius should be larger than sigma. Use a
2334% radius of 0 and UnsharpMaskImage() selects a suitable radius for you.
2335%
2336% The format of the UnsharpMaskImage method is:
2337%
2338% Image *UnsharpMaskImage(const Image *image,const double radius,
2339% const double sigma,const double amount,const double threshold,
2340% ExceptionInfo *exception)
2341% Image *UnsharpMaskImageChannel(const Image *image,
2342% const ChannelType channel,const double radius,const double sigma,
2343% const double gain,const double threshold,ExceptionInfo *exception)
2344%
2345% A description of each parameter follows:
2346%
2347% o image: the image.
2348%
2349% o channel: the channel type.
2350%
2351% o radius: the radius of the Gaussian, in pixels, not counting the center
2352% pixel.
2353%
2354% o sigma: the standard deviation of the Gaussian, in pixels.
2355%
2356% o gain: the percentage of the difference between the original and the
2357% blur image that is added back into the original.
2358%
2359% o threshold: the threshold in pixels needed to apply the diffence gain.
2360%
2361% o exception: return any errors or warnings in this structure.
2362%
2363*/
2364
2365
2366MagickExport
2367Image* AccelerateUnsharpMaskImage(const Image *image, const ChannelType channel,const double radius,const double sigma,
2368 const double gain,const double threshold,ExceptionInfo *exception)
2369{
2370 MagickBooleanType status;
2371 Image* filteredImage;
2372
2373
2374 assert(image != NULL);
2375 assert(exception != NULL);
2376
2377 status = checkOpenCLEnvironment(exception);
2378 if (status == MagickFalse)
2379 return NULL;
2380
2381 status = checkAccelerateCondition(image, channel, exception);
2382 if (status == MagickFalse)
2383 return NULL;
2384
2385 if (splitImage(image) && (image->rows / 2 > radius))
2386 filteredImage = ComputeUnsharpMaskImageSection(image,channel,radius,sigma,gain,threshold,exception);
2387 else
2388 filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
2389 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2390 return filteredImage;
2391
2392}
2393
2394
2395static inline double MagickMax(const double x,const double y)
2396{
2397 if (x > y)
2398 return(x);
2399 return(y);
2400}
2401
2402static inline double MagickMin(const double x,const double y)
2403{
2404 if (x < y)
2405 return(x);
2406 return(y);
2407}
2408
2409
2410static MagickBooleanType resizeHorizontalFilter(cl_mem inputImage
2411 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2412 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2413 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float xFactor
2414 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2415{
2416 MagickBooleanType status = MagickFalse;
2417
2418 float scale, support;
2419 unsigned int i;
2420 cl_kernel horizontalKernel = NULL;
2421 cl_int clStatus;
2422 size_t global_work_size[2];
2423 size_t local_work_size[2];
2424 int resizeFilterType, resizeWindowType;
2425 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2426 size_t totalLocalMemorySize;
2427 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2428 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2429 size_t deviceLocalMemorySize;
2430 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2431
2432 const unsigned int workgroupSize = 256;
2433 unsigned int pixelPerWorkgroup;
2434 unsigned int chunkSize;
2435
2436 /*
2437 Apply filter to resize vertically from image to resize image.
2438 */
2439 scale=MagickMax(1.0/xFactor+MagickEpsilon,1.0);
2440 support=scale*GetResizeFilterSupport(resizeFilter);
2441 if (support < 0.5)
2442 {
2443 /*
2444 Support too small even for nearest neighbour: Reduce to point
2445 sampling.
2446 */
2447 support=(MagickRealType) 0.5;
2448 scale=1.0;
2449 }
2450 scale=PerceptibleReciprocal(scale);
2451
2452 if (resizedColumns < workgroupSize)
2453 {
2454 chunkSize = 32;
2455 pixelPerWorkgroup = 32;
2456 }
2457 else
2458 {
2459 chunkSize = workgroupSize;
2460 pixelPerWorkgroup = workgroupSize;
2461 }
2462
2463 /* get the local memory size supported by the device */
2464 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2465
2466 while(1)
2467 {
2468 /* calculate the local memory size needed per workgroup */
2469 cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
2470 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
2471 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2472 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2473 totalLocalMemorySize = imageCacheLocalMemorySize;
2474
2475 /* local size for the pixel accumulator */
2476 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2477 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2478
2479 /* local memory size for the weight accumulator */
2480 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2481 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2482
2483 /* local memory size for the gamma accumulator */
2484 if (matte == 0)
2485 gammaAccumulatorLocalMemorySize = sizeof(float);
2486 else
2487 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2488 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2489
2490 if (totalLocalMemorySize <= deviceLocalMemorySize)
2491 break;
2492 else
2493 {
2494 pixelPerWorkgroup = pixelPerWorkgroup/2;
2495 chunkSize = chunkSize/2;
2496 if (pixelPerWorkgroup == 0
2497 || chunkSize == 0)
2498 {
2499 /* quit, fallback to CPU */
2500 goto cleanup;
2501 }
2502 }
2503 }
2504
2505 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2506 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2507
2508
2509 if (resizeFilterType == SincFastWeightingFunction
2510 && resizeWindowType == SincFastWeightingFunction)
2511 {
2512 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilterSinc");
2513 }
2514 else
2515 {
2516 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
2517 }
2518 if (horizontalKernel == NULL)
2519 {
2520 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2521 goto cleanup;
2522 }
2523
2524 i = 0;
2525 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2526 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2527 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2528 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2529 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
2530 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2531
2532 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2533 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2534
2535 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2536 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2537 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2538
2539 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2540 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2541
2542 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2543 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2544
2545 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2546 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2547
2548 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2549 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2550
2551
2552 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2553 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2554 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2555 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2556
2557
2558 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2559 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2560 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2561
2562 if (clStatus != CL_SUCCESS)
2563 {
2564 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2565 goto cleanup;
2566 }
2567
2568 global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2569 global_work_size[1] = resizedRows;
2570
2571 local_work_size[0] = workgroupSize;
2572 local_work_size[1] = 1;
2573 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2574 if (clStatus != CL_SUCCESS)
2575 {
2576 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2577 goto cleanup;
2578 }
2579 clFlush(queue);
2580 status = MagickTrue;
2581
2582
2583cleanup:
2584 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2585
2586 return status;
2587}
2588
2589
2590static MagickBooleanType resizeVerticalFilter(cl_mem inputImage
2591 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2592 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2593 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float yFactor
2594 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2595{
2596 MagickBooleanType status = MagickFalse;
2597
2598 float scale, support;
2599 unsigned int i;
2600 cl_kernel horizontalKernel = NULL;
2601 cl_int clStatus;
2602 size_t global_work_size[2];
2603 size_t local_work_size[2];
2604 int resizeFilterType, resizeWindowType;
2605 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2606 size_t totalLocalMemorySize;
2607 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2608 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2609 size_t deviceLocalMemorySize;
2610 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2611
2612 const unsigned int workgroupSize = 256;
2613 unsigned int pixelPerWorkgroup;
2614 unsigned int chunkSize;
2615
2616 /*
2617 Apply filter to resize vertically from image to resize image.
2618 */
2619 scale=MagickMax(1.0/yFactor+MagickEpsilon,1.0);
2620 support=scale*GetResizeFilterSupport(resizeFilter);
2621 if (support < 0.5)
2622 {
2623 /*
2624 Support too small even for nearest neighbour: Reduce to point
2625 sampling.
2626 */
2627 support=(MagickRealType) 0.5;
2628 scale=1.0;
2629 }
2630 scale=PerceptibleReciprocal(scale);
2631
2632 if (resizedRows < workgroupSize)
2633 {
2634 chunkSize = 32;
2635 pixelPerWorkgroup = 32;
2636 }
2637 else
2638 {
2639 chunkSize = workgroupSize;
2640 pixelPerWorkgroup = workgroupSize;
2641 }
2642
2643 /* get the local memory size supported by the device */
2644 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2645
2646 while(1)
2647 {
2648 /* calculate the local memory size needed per workgroup */
2649 cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
2650 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
2651 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2652 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2653 totalLocalMemorySize = imageCacheLocalMemorySize;
2654
2655 /* local size for the pixel accumulator */
2656 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2657 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2658
2659 /* local memory size for the weight accumulator */
2660 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2661 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2662
2663 /* local memory size for the gamma accumulator */
2664 if (matte == 0)
2665 gammaAccumulatorLocalMemorySize = sizeof(float);
2666 else
2667 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2668 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2669
2670 if (totalLocalMemorySize <= deviceLocalMemorySize)
2671 break;
2672 else
2673 {
2674 pixelPerWorkgroup = pixelPerWorkgroup/2;
2675 chunkSize = chunkSize/2;
2676 if (pixelPerWorkgroup == 0
2677 || chunkSize == 0)
2678 {
2679 /* quit, fallback to CPU */
2680 goto cleanup;
2681 }
2682 }
2683 }
2684
2685 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2686 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2687
2688 if (resizeFilterType == SincFastWeightingFunction
2689 && resizeWindowType == SincFastWeightingFunction)
2690 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilterSinc");
2691 else
2692 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
2693
2694 if (horizontalKernel == NULL)
2695 {
2696 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2697 goto cleanup;
2698 }
2699
2700 i = 0;
2701 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2702 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2703 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2704 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2705 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
2706 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2707
2708 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2709 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2710
2711 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2712 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2713 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2714
2715 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2716 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2717
2718 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2719 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2720
2721 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2722 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2723
2724 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2725 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2726
2727
2728 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2729 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2730 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2731 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2732
2733
2734 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2735 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2736 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2737
2738 if (clStatus != CL_SUCCESS)
2739 {
2740 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2741 goto cleanup;
2742 }
2743
2744 global_work_size[0] = resizedColumns;
2745 global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2746
2747 local_work_size[0] = 1;
2748 local_work_size[1] = workgroupSize;
2749 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2750 if (clStatus != CL_SUCCESS)
2751 {
2752 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2753 goto cleanup;
2754 }
2755 clFlush(queue);
2756 status = MagickTrue;
2757
2758
2759cleanup:
2760 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2761
2762 return status;
2763}
2764
2765
2766
2767static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedColumns, const size_t resizedRows
2768 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
2769{
2770
2771 MagickBooleanType outputReady = MagickFalse;
2772 Image* filteredImage = NULL;
2773 MagickCLEnv clEnv = NULL;
2774
2775 cl_int clStatus;
2776 MagickBooleanType status;
2777 const void *inputPixels;
2778 void* filteredPixels;
2779 void* hostPtr;
2780 const MagickRealType* resizeFilterCoefficient;
2781 float* mappedCoefficientBuffer;
2782 float xFactor, yFactor;
2783 MagickSizeType length;
2784
2785 cl_mem_flags mem_flags;
2786 cl_context context = NULL;
2787 cl_mem inputImageBuffer = NULL;
2788 cl_mem tempImageBuffer = NULL;
2789 cl_mem filteredImageBuffer = NULL;
2790 cl_mem cubicCoefficientsBuffer = NULL;
2791 cl_command_queue queue = NULL;
2792
2793 unsigned int i;
2794
2795 clEnv = GetDefaultOpenCLEnv();
2796 context = GetOpenCLContext(clEnv);
2797
2798 /* Create and initialize OpenCL buffers. */
2799 inputPixels = NULL;
2800 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2801 if (inputPixels == (const void *) NULL)
2802 {
2803 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
2804 goto cleanup;
2805 }
2806
2807 /* If the host pointer is aligned to the size of CLPixelPacket,
2808 then use the host buffer directly from the GPU; otherwise,
2809 create a buffer on the GPU and copy the data over */
2810 if (ALIGNED(inputPixels,CLPixelPacket))
2811 {
2812 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2813 }
2814 else
2815 {
2816 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2817 }
2818 /* create a CL buffer from image pixel buffer */
2819 length = inputImage->columns * inputImage->rows;
2820 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2821 if (clStatus != CL_SUCCESS)
2822 {
2823 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2824 goto cleanup;
2825 }
2826
2827 cubicCoefficientsBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
2828 if (clStatus != CL_SUCCESS)
2829 {
2830 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2831 goto cleanup;
2832 }
2833 queue = AcquireOpenCLCommandQueue(clEnv);
2834 mappedCoefficientBuffer = (float*)clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
2835 , 0, NULL, NULL, &clStatus);
2836 if (clStatus != CL_SUCCESS)
2837 {
2838 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
2839 goto cleanup;
2840 }
2841 resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
2842 for (i = 0; i < 7; i++)
2843 {
2844 mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
2845 }
2846 clStatus = clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
2847 if (clStatus != CL_SUCCESS)
2848 {
2849 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
2850 goto cleanup;
2851 }
2852
2853 filteredImage = CloneImage(inputImage,resizedColumns,resizedRows,MagickTrue,exception);
2854 if (filteredImage == NULL)
2855 goto cleanup;
2856
2857 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
2858 {
2859 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2860 goto cleanup;
2861 }
2862 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2863 if (filteredPixels == (void *) NULL)
2864 {
2865 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2866 goto cleanup;
2867 }
2868
2869 if (ALIGNED(filteredPixels,CLPixelPacket))
2870 {
2871 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2872 hostPtr = filteredPixels;
2873 }
2874 else
2875 {
2876 mem_flags = CL_MEM_WRITE_ONLY;
2877 hostPtr = NULL;
2878 }
2879
2880 /* create a CL buffer from image pixel buffer */
2881 length = filteredImage->columns * filteredImage->rows;
2882 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2883 if (clStatus != CL_SUCCESS)
2884 {
2885 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2886 goto cleanup;
2887 }
2888
2889 xFactor=(float) resizedColumns/(float) inputImage->columns;
2890 yFactor=(float) resizedRows/(float) inputImage->rows;
2891 if (xFactor > yFactor)
2892 {
2893
2894 length = resizedColumns*inputImage->rows;
2895 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2896 if (clStatus != CL_SUCCESS)
2897 {
2898 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2899 goto cleanup;
2900 }
2901
2902 status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte == MagickTrue)?1:0
2903 , tempImageBuffer, resizedColumns, inputImage->rows
2904 , resizeFilter, cubicCoefficientsBuffer
2905 , xFactor, clEnv, queue, exception);
2906 if (status != MagickTrue)
2907 goto cleanup;
2908
2909 status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->matte == MagickTrue)?1:0
2910 , filteredImageBuffer, resizedColumns, resizedRows
2911 , resizeFilter, cubicCoefficientsBuffer
2912 , yFactor, clEnv, queue, exception);
2913 if (status != MagickTrue)
2914 goto cleanup;
2915 }
2916 else
2917 {
2918 length = inputImage->columns*resizedRows;
2919 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2920 if (clStatus != CL_SUCCESS)
2921 {
2922 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2923 goto cleanup;
2924 }
2925
2926 status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte == MagickTrue)?1:0
2927 , tempImageBuffer, inputImage->columns, resizedRows
2928 , resizeFilter, cubicCoefficientsBuffer
2929 , yFactor, clEnv, queue, exception);
2930 if (status != MagickTrue)
2931 goto cleanup;
2932
2933 status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->matte == MagickTrue)?1:0
2934 , filteredImageBuffer, resizedColumns, resizedRows
2935 , resizeFilter, cubicCoefficientsBuffer
2936 , xFactor, clEnv, queue, exception);
2937 if (status != MagickTrue)
2938 goto cleanup;
2939 }
2940 length = resizedColumns*resizedRows;
2941 if (ALIGNED(filteredPixels,CLPixelPacket))
2942 {
2943 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2944 }
2945 else
2946 {
2947 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2948 }
2949 if (clStatus != CL_SUCCESS)
2950 {
2951 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
2952 goto cleanup;
2953 }
2954 outputReady = MagickTrue;
2955
2956cleanup:
2957 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2958 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2959 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2960 if (cubicCoefficientsBuffer!=NULL) clReleaseMemObject(cubicCoefficientsBuffer);
2961 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2962 if (outputReady == MagickFalse)
2963 {
2964 if (filteredImage != NULL)
2965 {
2966 DestroyImage(filteredImage);
2967 filteredImage = NULL;
2968 }
2969 }
2970
2971 return filteredImage;
2972}
2973
2974const ResizeWeightingFunctionType supportedResizeWeighting[] =
2975{
2976 BoxWeightingFunction
2977 ,TriangleWeightingFunction
2978 ,HanningWeightingFunction
2979 ,HammingWeightingFunction
2980 ,BlackmanWeightingFunction
2981 ,CubicBCWeightingFunction
2982 ,SincWeightingFunction
2983 ,SincFastWeightingFunction
2984 ,LastWeightingFunction
2985};
2986
2987static MagickBooleanType gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)
2988{
2989 MagickBooleanType supported = MagickFalse;
2990 unsigned int i;
2991 for (i = 0; ;i++)
2992 {
2993 if (supportedResizeWeighting[i] == LastWeightingFunction)
2994 break;
2995 if (supportedResizeWeighting[i] == f)
2996 {
2997 supported = MagickTrue;
2998 break;
2999 }
3000 }
3001 return supported;
3002}
3003
3004
3005/*
3006%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3007% %
3008% %
3009% %
3010% A c c e l e r a t e R e s i z e I m a g e %
3011% %
3012% %
3013% %
3014%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3015%
3016% AccelerateResizeImage() is an OpenCL implementation of ResizeImage()
3017%
3018% AccelerateResizeImage() scales an image to the desired dimensions, using the given
3019% filter (see AcquireFilterInfo()).
3020%
3021% If an undefined filter is given the filter defaults to Mitchell for a
3022% colormapped image, a image with a matte channel, or if the image is
3023% enlarged. Otherwise the filter defaults to a Lanczos.
3024%
3025% AccelerateResizeImage() was inspired by Paul Heckbert's "zoom" program.
3026%
3027% The format of the AccelerateResizeImage method is:
3028%
3029% Image *ResizeImage(Image *image,const size_t columns,
3030% const size_t rows, const ResizeFilter* filter,
cristy3f6d1482010-01-20 21:01:21 +00003031% ExceptionInfo *exception)
3032%
3033% A description of each parameter follows:
3034%
3035% o image: the image.
3036%
cristyf034abb2013-11-24 14:16:14 +00003037% o columns: the number of columns in the scaled image.
cristy3f6d1482010-01-20 21:01:21 +00003038%
cristyf034abb2013-11-24 14:16:14 +00003039% o rows: the number of rows in the scaled image.
3040%
3041% o filter: Image filter to use.
cristy3f6d1482010-01-20 21:01:21 +00003042%
3043% o exception: return any errors or warnings in this structure.
3044%
3045*/
cristyd43a46b2010-01-21 02:13:41 +00003046
cristyf034abb2013-11-24 14:16:14 +00003047MagickExport
3048Image* AccelerateResizeImage(const Image* image, const size_t resizedColumns, const size_t resizedRows
3049 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
cristyd43a46b2010-01-21 02:13:41 +00003050{
cristyf034abb2013-11-24 14:16:14 +00003051 MagickBooleanType status;
3052 Image* filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003053
cristyf034abb2013-11-24 14:16:14 +00003054 assert(image != NULL);
3055 assert(resizeFilter != NULL);
cristyd43a46b2010-01-21 02:13:41 +00003056
cristyf034abb2013-11-24 14:16:14 +00003057 status = checkOpenCLEnvironment(exception);
3058 if (status == MagickFalse)
3059 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003060
cristyf034abb2013-11-24 14:16:14 +00003061 status = checkAccelerateCondition(image, AllChannels, exception);
3062 if (status == MagickFalse)
3063 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003064
cristyf034abb2013-11-24 14:16:14 +00003065 if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse
3066 || gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
3067 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003068
cristyf034abb2013-11-24 14:16:14 +00003069 filteredImage = ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
3070 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3071 return filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003072
cristyd43a46b2010-01-21 02:13:41 +00003073}
3074
cristyd43a46b2010-01-21 02:13:41 +00003075
cristyf034abb2013-11-24 14:16:14 +00003076static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBooleanType sharpen, ExceptionInfo *exception)
3077{
3078 MagickBooleanType outputReady = MagickFalse;
3079 MagickCLEnv clEnv = NULL;
3080
3081 cl_int clStatus;
3082 size_t global_work_size[2];
3083
3084 void *inputPixels = NULL;
3085 MagickSizeType length;
3086 unsigned int uSharpen;
3087 unsigned int i;
3088
3089 cl_mem_flags mem_flags;
3090 cl_context context = NULL;
3091 cl_mem inputImageBuffer = NULL;
3092 cl_kernel filterKernel = NULL;
3093 cl_command_queue queue = NULL;
3094
3095 clEnv = GetDefaultOpenCLEnv();
3096 context = GetOpenCLContext(clEnv);
3097
3098 /* Create and initialize OpenCL buffers. */
3099 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3100 if (inputPixels == (void *) NULL)
3101 {
3102 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3103 goto cleanup;
3104 }
3105
3106 /* If the host pointer is aligned to the size of CLPixelPacket,
3107 then use the host buffer directly from the GPU; otherwise,
3108 create a buffer on the GPU and copy the data over */
3109 if (ALIGNED(inputPixels,CLPixelPacket))
3110 {
3111 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3112 }
3113 else
3114 {
3115 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3116 }
3117 /* create a CL buffer from image pixel buffer */
3118 length = inputImage->columns * inputImage->rows;
3119 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3120 if (clStatus != CL_SUCCESS)
3121 {
3122 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3123 goto cleanup;
3124 }
3125
3126 filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
3127 if (filterKernel == NULL)
3128 {
3129 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3130 goto cleanup;
3131 }
3132
3133 i = 0;
3134 clStatus=clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3135
3136 uSharpen = (sharpen == MagickFalse)?0:1;
3137 clStatus|=clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
3138 if (clStatus != CL_SUCCESS)
3139 {
3140 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3141 goto cleanup;
3142 }
3143
3144 global_work_size[0] = inputImage->columns;
3145 global_work_size[1] = inputImage->rows;
3146 /* launch the kernel */
3147 queue = AcquireOpenCLCommandQueue(clEnv);
3148 clStatus = clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3149 if (clStatus != CL_SUCCESS)
3150 {
3151 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3152 goto cleanup;
3153 }
3154 clFlush(queue);
3155
3156 if (ALIGNED(inputPixels,CLPixelPacket))
3157 {
3158 length = inputImage->columns * inputImage->rows;
3159 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3160 }
3161 else
3162 {
3163 length = inputImage->columns * inputImage->rows;
3164 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3165 }
3166 if (clStatus != CL_SUCCESS)
3167 {
3168 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3169 goto cleanup;
3170 }
3171 outputReady = MagickTrue;
3172
3173cleanup:
3174
3175 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
3176 if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel);
3177 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
3178 return outputReady;
3179}
3180
3181/*
3182%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3183% %
3184% %
3185% %
3186% C o n t r a s t I m a g e w i t h O p e n C L %
3187% %
3188% %
3189% %
3190%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3191%
3192% ContrastImage() enhances the intensity differences between the lighter and
3193% darker elements of the image. Set sharpen to a MagickTrue to increase the
3194% image contrast otherwise the contrast is reduced.
3195%
3196% The format of the ContrastImage method is:
3197%
3198% MagickBooleanType ContrastImage(Image *image,
3199% const MagickBooleanType sharpen)
3200%
3201% A description of each parameter follows:
3202%
3203% o image: the image.
3204%
3205% o sharpen: Increase or decrease image contrast.
3206%
3207*/
3208
3209MagickExport
3210MagickBooleanType AccelerateContrastImage(Image* image, const MagickBooleanType sharpen, ExceptionInfo* exception)
3211{
3212 MagickBooleanType status;
3213
3214 assert(image != NULL);
3215 assert(exception != NULL);
3216
3217 status = checkOpenCLEnvironment(exception);
3218 if (status == MagickFalse)
3219 return MagickFalse;
3220
3221 status = checkAccelerateCondition(image, AllChannels, exception);
3222 if (status == MagickFalse)
3223 return MagickFalse;
3224
3225 status = ComputeContrastImage(image,sharpen,exception);
3226 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3227 return status;
3228}
3229
3230
3231
3232MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3233{
3234 register ssize_t
cristyd43a46b2010-01-21 02:13:41 +00003235 i;
3236
cristyf034abb2013-11-24 14:16:14 +00003237 cl_float
3238 bright,
3239 hue,
3240 saturation;
3241
3242 cl_int color;
3243
3244 MagickBooleanType outputReady;
3245
3246 MagickCLEnv clEnv;
3247
3248 void *inputPixels;
3249
3250 MagickSizeType length;
3251
3252 cl_context context;
3253 cl_command_queue queue;
3254 cl_kernel modulateKernel;
3255
3256 cl_mem inputImageBuffer;
3257 cl_mem_flags mem_flags;
3258
3259 cl_int clStatus;
3260
3261 Image * inputImage = image;
3262
3263 inputImageBuffer = NULL;
3264 modulateKernel = NULL;
3265
3266 assert(inputImage != (Image *) NULL);
3267 assert(inputImage->signature == MagickSignature);
3268 if (inputImage->debug != MagickFalse)
3269 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
cristyd43a46b2010-01-21 02:13:41 +00003270
3271 /*
cristyf034abb2013-11-24 14:16:14 +00003272 * initialize opencl env
3273 */
3274 clEnv = GetDefaultOpenCLEnv();
3275 context = GetOpenCLContext(clEnv);
3276 queue = AcquireOpenCLCommandQueue(clEnv);
cristyd43a46b2010-01-21 02:13:41 +00003277
cristyf034abb2013-11-24 14:16:14 +00003278 outputReady = MagickFalse;
cristyd43a46b2010-01-21 02:13:41 +00003279
cristyf034abb2013-11-24 14:16:14 +00003280 /* Create and initialize OpenCL buffers.
3281 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3282 assume this will get a writable image
3283 */
3284 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3285 if (inputPixels == (void *) NULL)
cristyd43a46b2010-01-21 02:13:41 +00003286 {
cristyf034abb2013-11-24 14:16:14 +00003287 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3288 goto cleanup;
cristyd43a46b2010-01-21 02:13:41 +00003289 }
cristyf034abb2013-11-24 14:16:14 +00003290
3291 /* If the host pointer is aligned to the size of CLPixelPacket,
3292 then use the host buffer directly from the GPU; otherwise,
3293 create a buffer on the GPU and copy the data over
3294 */
3295 if (ALIGNED(inputPixels,CLPixelPacket))
3296 {
3297 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3298 }
3299 else
3300 {
3301 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3302 }
3303 /* create a CL buffer from image pixel buffer */
3304 length = inputImage->columns * inputImage->rows;
3305 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3306 if (clStatus != CL_SUCCESS)
3307 {
3308 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3309 goto cleanup;
3310 }
3311
3312 modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
3313 if (modulateKernel == NULL)
3314 {
3315 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3316 goto cleanup;
3317 }
3318
3319 bright=percent_brightness;
3320 hue=percent_hue;
3321 saturation=percent_saturation;
3322 color=colorspace;
3323
3324 i = 0;
3325 clStatus=clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3326 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3327 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3328 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3329 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3330 if (clStatus != CL_SUCCESS)
3331 {
3332 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3333 printf("no kernel\n");
3334 goto cleanup;
3335 }
3336
3337 {
3338 size_t global_work_size[2];
3339 global_work_size[0] = inputImage->columns;
3340 global_work_size[1] = inputImage->rows;
3341 /* launch the kernel */
3342 clStatus = clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3343 if (clStatus != CL_SUCCESS)
3344 {
3345 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3346 goto cleanup;
3347 }
3348 clFlush(queue);
3349 }
3350
3351 if (ALIGNED(inputPixels,CLPixelPacket))
3352 {
3353 length = inputImage->columns * inputImage->rows;
3354 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3355 }
3356 else
3357 {
3358 length = inputImage->columns * inputImage->rows;
3359 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3360 }
3361 if (clStatus != CL_SUCCESS)
3362 {
3363 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3364 goto cleanup;
3365 }
3366
3367 outputReady = MagickTrue;
3368
3369cleanup:
3370
3371 if (inputPixels) {
3372 //ReleasePixelCachePixels();
3373 inputPixels = NULL;
3374 }
3375
3376 if (inputImageBuffer!=NULL)
3377 clReleaseMemObject(inputImageBuffer);
3378 if (modulateKernel!=NULL)
3379 RelinquishOpenCLKernel(clEnv, modulateKernel);
3380 if (queue != NULL)
3381 RelinquishOpenCLCommandQueue(clEnv, queue);
3382
3383 return outputReady;
3384
cristy3f6d1482010-01-20 21:01:21 +00003385}
cristyf034abb2013-11-24 14:16:14 +00003386
3387/*
3388%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3389% %
3390% %
3391% %
3392% M o d u l a t e I m a g e w i t h O p e n C L %
3393% %
3394% %
3395% %
3396%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3397%
3398% ModulateImage() lets you control the brightness, saturation, and hue
3399% of an image. Modulate represents the brightness, saturation, and hue
3400% as one parameter (e.g. 90,150,100). If the image colorspace is HSL, the
3401% modulation is lightness, saturation, and hue. For HWB, use blackness,
3402% whiteness, and hue. And for HCL, use chrome, luma, and hue.
3403%
3404% The format of the ModulateImage method is:
3405%
3406% MagickBooleanType ModulateImage(Image *image,const char *modulate)
3407%
3408% A description of each parameter follows:
3409%
3410% o image: the image.
3411%
3412% o percent_*: Define the percent change in brightness, saturation, and
3413% hue.
3414%
3415*/
3416
3417MagickExport
3418MagickBooleanType AccelerateModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3419{
3420 MagickBooleanType status;
3421
3422 assert(image != NULL);
3423 assert(exception != NULL);
3424
3425 status = checkOpenCLEnvironment(exception);
3426 if (status == MagickFalse)
3427 return MagickFalse;
3428
3429 status = checkAccelerateCondition(image, AllChannels, exception);
3430 if (status == MagickFalse)
3431 return MagickFalse;
3432
3433 if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
3434 return MagickFalse;
3435
3436
3437 status = ComputeModulateImage(image,percent_brightness, percent_hue, percent_saturation, colorspace, exception);
3438 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3439 return status;
3440}
3441
3442
3443MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception)
3444{
3445#define EqualizeImageTag "Equalize/Image"
3446
3447 ExceptionInfo
3448 *exception=_exception;
3449
3450 FloatPixelPacket
3451 white,
3452 black,
3453 intensity,
3454 *map;
3455
3456 cl_uint4
3457 *histogram;
3458
3459 PixelPacket
3460 *equalize_map;
3461
3462 register ssize_t
3463 i;
3464
3465 Image * image = inputImage;
3466
3467 MagickBooleanType outputReady;
3468 MagickCLEnv clEnv;
3469
3470 cl_int clStatus;
3471 size_t global_work_size[2];
3472
3473 void *inputPixels;
3474 cl_mem_flags mem_flags;
3475
3476 cl_context context;
3477 cl_mem inputImageBuffer;
3478 cl_mem histogramBuffer;
3479 cl_mem equalizeMapBuffer;
3480 cl_kernel histogramKernel;
3481 cl_kernel equalizeKernel;
3482 cl_command_queue queue;
3483 cl_int colorspace;
3484
3485 void* hostPtr;
3486
3487 MagickSizeType length;
3488
3489 inputPixels = NULL;
3490 inputImageBuffer = NULL;
3491 histogramBuffer = NULL;
3492 histogramKernel = NULL;
3493 equalizeKernel = NULL;
3494 context = NULL;
3495 queue = NULL;
3496 outputReady = MagickFalse;
3497
3498 assert(inputImage != (Image *) NULL);
3499 assert(inputImage->signature == MagickSignature);
3500 if (inputImage->debug != MagickFalse)
3501 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
3502
3503 /*
3504 Allocate and initialize histogram arrays.
3505 */
3506 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
3507 if (histogram == (cl_uint4 *) NULL)
3508 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3509
3510 /* reset histogram */
3511 (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
3512
3513 /*
3514 * initialize opencl env
3515 */
3516 clEnv = GetDefaultOpenCLEnv();
3517 context = GetOpenCLContext(clEnv);
3518 queue = AcquireOpenCLCommandQueue(clEnv);
3519
3520 /* Create and initialize OpenCL buffers. */
3521 /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */
3522 /* assume this will get a writable image */
3523 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3524
3525 if (inputPixels == (void *) NULL)
3526 {
3527 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3528 goto cleanup;
3529 }
3530 /* If the host pointer is aligned to the size of CLPixelPacket,
3531 then use the host buffer directly from the GPU; otherwise,
3532 create a buffer on the GPU and copy the data over */
3533 if (ALIGNED(inputPixels,CLPixelPacket))
3534 {
3535 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3536 }
3537 else
3538 {
3539 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3540 }
3541 /* create a CL buffer from image pixel buffer */
3542 length = inputImage->columns * inputImage->rows;
3543 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3544 if (clStatus != CL_SUCCESS)
3545 {
3546 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3547 goto cleanup;
3548 }
3549
3550 /* If the host pointer is aligned to the size of cl_uint,
3551 then use the host buffer directly from the GPU; otherwise,
3552 create a buffer on the GPU and copy the data over */
3553 if (ALIGNED(histogram,cl_uint4))
3554 {
3555 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3556 hostPtr = histogram;
3557 }
3558 else
3559 {
3560 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3561 hostPtr = histogram;
3562 }
3563 /* create a CL buffer for histogram */
3564 length = (MaxMap+1);
3565 histogramBuffer = clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
3566 if (clStatus != CL_SUCCESS)
3567 {
3568 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3569 goto cleanup;
3570 }
3571
3572 switch (inputImage->colorspace)
3573 {
3574 case RGBColorspace:
3575 colorspace = 1;
3576 break;
3577 case sRGBColorspace:
3578 colorspace = 0;
3579 break;
3580 default:
3581 {
3582 /* something is wrong, as we checked in checkAccelerateCondition */
3583 }
3584 }
3585
3586 /* get the OpenCL kernel */
3587 histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
3588 if (histogramKernel == NULL)
3589 {
3590 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3591 goto cleanup;
3592 }
3593
3594 /* set the kernel arguments */
3595 i = 0;
3596 clStatus=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3597 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
3598 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
3599 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
3600 if (clStatus != CL_SUCCESS)
3601 {
3602 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3603 goto cleanup;
3604 }
3605
3606 /* launch the kernel */
3607 global_work_size[0] = inputImage->columns;
3608 global_work_size[1] = inputImage->rows;
3609
3610 clStatus = clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3611
3612 if (clStatus != CL_SUCCESS)
3613 {
3614 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3615 goto cleanup;
3616 }
3617 clFlush(queue);
3618
3619 /* read from the kenel output */
3620 if (ALIGNED(histogram,cl_uint4))
3621 {
3622 length = (MaxMap+1);
3623 clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
3624 }
3625 else
3626 {
3627 length = (MaxMap+1);
3628 clStatus = clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
3629 }
3630 if (clStatus != CL_SUCCESS)
3631 {
3632 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3633 goto cleanup;
3634 }
3635
3636 /* unmap, don't block gpu to use this buffer again. */
3637 if (ALIGNED(histogram,cl_uint4))
3638 {
3639 clStatus = clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
3640 if (clStatus != CL_SUCCESS)
3641 {
3642 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
3643 goto cleanup;
3644 }
3645 }
3646
3647 if (getenv("TEST")) {
3648 unsigned int i;
3649 for (i=0; i<(MaxMap+1UL); i++)
3650 {
3651 printf("histogram %d: red %d\n", i, histogram[i].s[2]);
3652 printf("histogram %d: green %d\n", i, histogram[i].s[1]);
3653 printf("histogram %d: blue %d\n", i, histogram[i].s[0]);
3654 printf("histogram %d: opacity %d\n", i, histogram[i].s[3]);
3655 }
3656 }
3657
3658 /* cpu stuff */
3659 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
3660 if (equalize_map == (PixelPacket *) NULL)
3661 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3662
3663 map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
3664 if (map == (FloatPixelPacket *) NULL)
3665 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3666
3667 /*
3668 Integrate the histogram to get the equalization map.
3669 */
3670 (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
3671 for (i=0; i <= (ssize_t) MaxMap; i++)
3672 {
3673 if ((channel & SyncChannels) != 0)
3674 {
3675 intensity.red+=histogram[i].s[2];
3676 map[i]=intensity;
3677 continue;
3678 }
3679 if ((channel & RedChannel) != 0)
3680 intensity.red+=histogram[i].s[2];
3681 if ((channel & GreenChannel) != 0)
3682 intensity.green+=histogram[i].s[1];
3683 if ((channel & BlueChannel) != 0)
3684 intensity.blue+=histogram[i].s[0];
3685 if ((channel & OpacityChannel) != 0)
3686 intensity.opacity+=histogram[i].s[3];
3687 if (((channel & IndexChannel) != 0) &&
3688 (image->colorspace == CMYKColorspace))
3689 {
3690 printf("something here\n");
3691 /*intensity.index+=histogram[i].index; */
3692 }
3693 map[i]=intensity;
3694 }
3695 black=map[0];
3696 white=map[(int) MaxMap];
3697 (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
3698 for (i=0; i <= (ssize_t) MaxMap; i++)
3699 {
3700 if ((channel & SyncChannels) != 0)
3701 {
3702 if (white.red != black.red)
3703 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3704 (map[i].red-black.red))/(white.red-black.red)));
3705 continue;
3706 }
3707 if (((channel & RedChannel) != 0) && (white.red != black.red))
3708 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3709 (map[i].red-black.red))/(white.red-black.red)));
3710 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3711 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3712 (map[i].green-black.green))/(white.green-black.green)));
3713 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3714 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3715 (map[i].blue-black.blue))/(white.blue-black.blue)));
3716 if (((channel & OpacityChannel) != 0) && (white.opacity != black.opacity))
3717 equalize_map[i].opacity=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3718 (map[i].opacity-black.opacity))/(white.opacity-black.opacity)));
3719 /*
3720 if ((((channel & IndexChannel) != 0) &&
3721 (image->colorspace == CMYKColorspace)) &&
3722 (white.index != black.index))
3723 equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3724 (map[i].index-black.index))/(white.index-black.index)));
3725 */
3726 }
3727
3728 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
3729 map=(FloatPixelPacket *) RelinquishMagickMemory(map);
3730
3731 if (image->storage_class == PseudoClass)
3732 {
3733 /*
3734 Equalize colormap.
3735 */
3736 for (i=0; i < (ssize_t) image->colors; i++)
3737 {
3738 if ((channel & SyncChannels) != 0)
3739 {
3740 if (white.red != black.red)
3741 {
3742 image->colormap[i].red=equalize_map[
3743 ScaleQuantumToMap(image->colormap[i].red)].red;
3744 image->colormap[i].green=equalize_map[
3745 ScaleQuantumToMap(image->colormap[i].green)].red;
3746 image->colormap[i].blue=equalize_map[
3747 ScaleQuantumToMap(image->colormap[i].blue)].red;
3748 image->colormap[i].opacity=equalize_map[
3749 ScaleQuantumToMap(image->colormap[i].opacity)].red;
3750 }
3751 continue;
3752 }
3753 if (((channel & RedChannel) != 0) && (white.red != black.red))
3754 image->colormap[i].red=equalize_map[
3755 ScaleQuantumToMap(image->colormap[i].red)].red;
3756 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3757 image->colormap[i].green=equalize_map[
3758 ScaleQuantumToMap(image->colormap[i].green)].green;
3759 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3760 image->colormap[i].blue=equalize_map[
3761 ScaleQuantumToMap(image->colormap[i].blue)].blue;
3762 if (((channel & OpacityChannel) != 0) &&
3763 (white.opacity != black.opacity))
3764 image->colormap[i].opacity=equalize_map[
3765 ScaleQuantumToMap(image->colormap[i].opacity)].opacity;
3766 }
3767 }
3768
3769 /*
3770 Equalize image.
3771 */
3772
3773 /* GPU can work on this again, image and equalize map as input
3774 image: uchar4 (CLPixelPacket)
3775 equalize_map: uchar4 (PixelPacket)
3776 black, white: float4 (FloatPixelPacket) */
3777
3778 if (inputImageBuffer!=NULL)
3779 clReleaseMemObject(inputImageBuffer);
3780
3781 /* If the host pointer is aligned to the size of CLPixelPacket,
3782 then use the host buffer directly from the GPU; otherwise,
3783 create a buffer on the GPU and copy the data over */
3784 if (ALIGNED(inputPixels,CLPixelPacket))
3785 {
3786 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3787 }
3788 else
3789 {
3790 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3791 }
3792 /* create a CL buffer from image pixel buffer */
3793 length = inputImage->columns * inputImage->rows;
3794 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3795 if (clStatus != CL_SUCCESS)
3796 {
3797 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3798 goto cleanup;
3799 }
3800
3801 /* Create and initialize OpenCL buffers. */
3802 if (ALIGNED(equalize_map, PixelPacket))
3803 {
3804 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3805 hostPtr = equalize_map;
3806 }
3807 else
3808 {
3809 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3810 hostPtr = equalize_map;
3811 }
3812 /* create a CL buffer for eqaulize_map */
3813 length = (MaxMap+1);
3814 equalizeMapBuffer = clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
3815 if (clStatus != CL_SUCCESS)
3816 {
3817 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3818 goto cleanup;
3819 }
3820
3821 /* get the OpenCL kernel */
3822 equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
3823 if (equalizeKernel == NULL)
3824 {
3825 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3826 goto cleanup;
3827 }
3828
3829 /* set the kernel arguments */
3830 i = 0;
3831 clStatus=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3832 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
3833 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
3834 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
3835 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
3836 if (clStatus != CL_SUCCESS)
3837 {
3838 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3839 goto cleanup;
3840 }
3841
3842 /* launch the kernel */
3843 global_work_size[0] = inputImage->columns;
3844 global_work_size[1] = inputImage->rows;
3845
3846 clStatus = clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3847
3848 if (clStatus != CL_SUCCESS)
3849 {
3850 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3851 goto cleanup;
3852 }
3853 clFlush(queue);
3854
3855 /* read the data back */
3856 if (ALIGNED(inputPixels,CLPixelPacket))
3857 {
3858 length = inputImage->columns * inputImage->rows;
3859 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3860 }
3861 else
3862 {
3863 length = inputImage->columns * inputImage->rows;
3864 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3865 }
3866 if (clStatus != CL_SUCCESS)
3867 {
3868 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3869 goto cleanup;
3870 }
3871
3872 outputReady = MagickTrue;
3873
3874 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
3875
3876cleanup:
3877
3878 if (inputPixels) {
3879 /*ReleasePixelCachePixels();*/
3880 inputPixels = NULL;
3881 }
3882
3883 if (inputImageBuffer!=NULL)
3884 clReleaseMemObject(inputImageBuffer);
3885 if (histogramBuffer!=NULL)
3886 clReleaseMemObject(histogramBuffer);
3887 if (histogramKernel!=NULL)
3888 RelinquishOpenCLKernel(clEnv, histogramKernel);
3889 if (queue != NULL)
3890 RelinquishOpenCLCommandQueue(clEnv, queue);
3891
3892 return outputReady;
3893}
3894
3895/*
3896%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3897% %
3898% %
3899% %
3900% E q u a l i z e I m a g e w i t h O p e n C L %
3901% %
3902% %
3903% %
3904%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3905%
3906% EqualizeImage() applies a histogram equalization to the image.
3907%
3908% The format of the EqualizeImage method is:
3909%
3910% MagickBooleanType EqualizeImage(Image *image)
3911% MagickBooleanType EqualizeImageChannel(Image *image,
3912% const ChannelType channel)
3913%
3914% A description of each parameter follows:
3915%
3916% o image: the image.
3917%
3918% o channel: the channel.
3919%
3920*/
3921
3922
3923MagickExport
3924MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channel, ExceptionInfo* exception)
3925{
3926 MagickBooleanType status;
3927
3928 assert(image != NULL);
3929 assert(exception != NULL);
3930
3931 status = checkOpenCLEnvironment(exception);
3932 if (status == MagickFalse)
3933 return MagickFalse;
3934
3935 status = checkAccelerateCondition(image, channel, exception);
3936 if (status == MagickFalse)
3937 return MagickFalse;
3938
3939 /* ensure this is the only pass get in for now. */
3940 if ((channel & SyncChannels) == 0)
3941 return MagickFalse;
3942
3943 if (image->colorspace != sRGBColorspace)
3944 return MagickFalse;
3945
3946 status = ComputeEqualizeImage(image,channel,exception);
3947 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3948 return status;
3949}
3950
3951
3952static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception)
3953{
3954
3955 MagickBooleanType outputReady = MagickFalse;
3956 MagickCLEnv clEnv = NULL;
3957
3958 cl_int clStatus;
3959 size_t global_work_size[2];
3960
3961 const void *inputPixels = NULL;
3962 Image* filteredImage = NULL;
3963 void *filteredPixels = NULL;
3964 void *hostPtr;
3965 MagickSizeType length;
3966
3967 cl_mem_flags mem_flags;
3968 cl_context context = NULL;
3969 cl_mem inputImageBuffer = NULL;
3970 cl_mem tempImageBuffer[2];
3971 cl_mem filteredImageBuffer = NULL;
3972 cl_command_queue queue = NULL;
3973 cl_kernel hullPass1 = NULL;
3974 cl_kernel hullPass2 = NULL;
3975
3976 unsigned int imageWidth, imageHeight;
3977 int matte;
3978 int k;
3979
3980 static const int
3981 X[4] = {0, 1, 1,-1},
3982 Y[4] = {1, 0, 1, 1};
3983
3984 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
3985 clEnv = GetDefaultOpenCLEnv();
3986 context = GetOpenCLContext(clEnv);
3987 queue = AcquireOpenCLCommandQueue(clEnv);
3988
3989 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3990 if (inputPixels == (void *) NULL)
3991 {
3992 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3993 goto cleanup;
3994 }
3995
3996 if (ALIGNED(inputPixels,CLPixelPacket))
3997 {
3998 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3999 }
4000 else
4001 {
4002 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4003 }
4004 /* create a CL buffer from image pixel buffer */
4005 length = inputImage->columns * inputImage->rows;
4006 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4007 if (clStatus != CL_SUCCESS)
4008 {
4009 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4010 goto cleanup;
4011 }
4012
4013 mem_flags = CL_MEM_READ_WRITE;
4014 length = inputImage->columns * inputImage->rows;
4015 for (k = 0; k < 2; k++)
4016 {
4017 tempImageBuffer[k] = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
4018 if (clStatus != CL_SUCCESS)
4019 {
4020 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4021 goto cleanup;
4022 }
4023 }
4024
4025 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4026 assert(filteredImage != NULL);
4027 if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
4028 {
4029 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4030 goto cleanup;
4031 }
4032 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4033 if (filteredPixels == (void *) NULL)
4034 {
4035 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4036 goto cleanup;
4037 }
4038
4039 if (ALIGNED(filteredPixels,CLPixelPacket))
4040 {
4041 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4042 hostPtr = filteredPixels;
4043 }
4044 else
4045 {
4046 mem_flags = CL_MEM_WRITE_ONLY;
4047 hostPtr = NULL;
4048 }
4049 /* create a CL buffer from image pixel buffer */
4050 length = inputImage->columns * inputImage->rows;
4051 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4052 if (clStatus != CL_SUCCESS)
4053 {
4054 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4055 goto cleanup;
4056 }
4057
4058 hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
4059 hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
4060
4061 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer);
4062 clStatus |=clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4063 imageWidth = inputImage->columns;
4064 clStatus |=clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
4065 imageHeight = inputImage->rows;
4066 clStatus |=clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
4067 matte = (inputImage->matte==MagickFalse)?0:1;
4068 clStatus |=clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
4069 if (clStatus != CL_SUCCESS)
4070 {
4071 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4072 goto cleanup;
4073 }
4074
4075 clStatus = clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4076 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
4077 imageWidth = inputImage->columns;
4078 clStatus |=clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
4079 imageHeight = inputImage->rows;
4080 clStatus |=clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
4081 matte = (inputImage->matte==MagickFalse)?0:1;
4082 clStatus |=clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
4083 if (clStatus != CL_SUCCESS)
4084 {
4085 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4086 goto cleanup;
4087 }
4088
4089
4090 global_work_size[0] = inputImage->columns;
4091 global_work_size[1] = inputImage->rows;
4092
4093
4094 for (k = 0; k < 4; k++)
4095 {
4096 cl_int2 offset;
4097 int polarity;
4098
4099
4100 offset.s[0] = X[k];
4101 offset.s[1] = Y[k];
4102 polarity = 1;
4103 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4104 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4105 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4106 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4107 if (clStatus != CL_SUCCESS)
4108 {
4109 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4110 goto cleanup;
4111 }
4112 /* launch the kernel */
4113 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4114 if (clStatus != CL_SUCCESS)
4115 {
4116 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4117 goto cleanup;
4118 }
4119 /* launch the kernel */
4120 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4121 if (clStatus != CL_SUCCESS)
4122 {
4123 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4124 goto cleanup;
4125 }
4126
4127
4128 if (k == 0)
4129 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
4130 offset.s[0] = -X[k];
4131 offset.s[1] = -Y[k];
4132 polarity = 1;
4133 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4134 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4135 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4136 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4137 if (clStatus != CL_SUCCESS)
4138 {
4139 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4140 goto cleanup;
4141 }
4142 /* launch the kernel */
4143 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4144 if (clStatus != CL_SUCCESS)
4145 {
4146 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4147 goto cleanup;
4148 }
4149 /* launch the kernel */
4150 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4151 if (clStatus != CL_SUCCESS)
4152 {
4153 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4154 goto cleanup;
4155 }
4156
4157 offset.s[0] = -X[k];
4158 offset.s[1] = -Y[k];
4159 polarity = -1;
4160 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4161 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4162 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4163 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4164 if (clStatus != CL_SUCCESS)
4165 {
4166 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4167 goto cleanup;
4168 }
4169 /* launch the kernel */
4170 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4171 if (clStatus != CL_SUCCESS)
4172 {
4173 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4174 goto cleanup;
4175 }
4176 /* launch the kernel */
4177 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4178 if (clStatus != CL_SUCCESS)
4179 {
4180 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4181 goto cleanup;
4182 }
4183
4184 offset.s[0] = X[k];
4185 offset.s[1] = Y[k];
4186 polarity = -1;
4187 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4188 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4189 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4190 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4191
4192 if (k == 3)
4193 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
4194
4195 if (clStatus != CL_SUCCESS)
4196 {
4197 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4198 goto cleanup;
4199 }
4200 /* launch the kernel */
4201 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4202 if (clStatus != CL_SUCCESS)
4203 {
4204 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4205 goto cleanup;
4206 }
4207 /* launch the kernel */
4208 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4209 if (clStatus != CL_SUCCESS)
4210 {
4211 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4212 goto cleanup;
4213 }
4214 }
4215
4216 if (ALIGNED(filteredPixels,CLPixelPacket))
4217 {
4218 length = inputImage->columns * inputImage->rows;
4219 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4220 }
4221 else
4222 {
4223 length = inputImage->columns * inputImage->rows;
4224 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4225 }
4226 if (clStatus != CL_SUCCESS)
4227 {
4228 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
4229 goto cleanup;
4230 }
4231
4232 outputReady = MagickTrue;
4233
4234cleanup:
4235 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4236 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4237 for (k = 0; k < 2; k++)
4238 {
4239 if (tempImageBuffer[k]!=NULL) clReleaseMemObject(tempImageBuffer[k]);
4240 }
4241 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4242 if (hullPass1!=NULL) RelinquishOpenCLKernel(clEnv, hullPass1);
4243 if (hullPass2!=NULL) RelinquishOpenCLKernel(clEnv, hullPass2);
4244 if (outputReady == MagickFalse)
4245 {
4246 if (filteredImage != NULL)
4247 {
4248 DestroyImage(filteredImage);
4249 filteredImage = NULL;
4250 }
4251 }
4252 return filteredImage;
4253}
4254
4255/*
4256%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4257% %
4258% %
4259% %
4260% D e s p e c k l e I m a g e w i t h O p e n C L %
4261% %
4262% %
4263% %
4264%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4265%
4266% DespeckleImage() reduces the speckle noise in an image while perserving the
4267% edges of the original image. A speckle removing filter uses a complementary
4268% hulling technique (raising pixels that are darker than their surrounding
4269% neighbors, then complementarily lowering pixels that are brighter than their
4270% surrounding neighbors) to reduce the speckle index of that image (reference
4271% Crimmins speckle removal).
4272%
4273% The format of the DespeckleImage method is:
4274%
4275% Image *DespeckleImage(const Image *image,ExceptionInfo *exception)
4276%
4277% A description of each parameter follows:
4278%
4279% o image: the image.
4280%
4281% o exception: return any errors or warnings in this structure.
4282%
4283*/
4284
4285MagickExport
4286Image* AccelerateDespeckleImage(const Image* image, ExceptionInfo* exception)
4287{
4288 MagickBooleanType status;
4289 Image* newImage = NULL;
4290
4291 assert(image != NULL);
4292 assert(exception != NULL);
4293
4294 status = checkOpenCLEnvironment(exception);
4295 if (status == MagickFalse)
4296 return NULL;
4297
4298 status = checkAccelerateCondition(image, AllChannels, exception);
4299 if (status == MagickFalse)
4300 return NULL;
4301
4302 newImage = ComputeDespeckleImage(image,exception);
4303 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4304 return newImage;
4305}
4306
4307#else /* MAGICKCORE_OPENCL_SUPPORT */
4308
4309MagickExport Image *AccelerateConvolveImageChannel(
4310 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4311 const KernelInfo *magick_unused(kernel),
4312 ExceptionInfo *magick_unused(exception))
4313{
4314 magick_unreferenced(image);
4315 magick_unreferenced(channel);
4316 magick_unreferenced(kernel);
4317 magick_unreferenced(exception);
4318
4319 return NULL;
4320}
4321
4322MagickExport MagickBooleanType AccelerateFunctionImage(
4323 Image *magick_unused(image),const ChannelType magick_unused(channel),
4324 const MagickFunction magick_unused(function),
4325 const size_t magick_unused(number_parameters),
4326 const double *magick_unused(parameters),
4327 ExceptionInfo *magick_unused(exception))
4328{
4329 magick_unreferenced(image);
4330 magick_unreferenced(channel);
4331 magick_unreferenced(function);
4332 magick_unreferenced(number_parameters);
4333 magick_unreferenced(parameters);
4334 magick_unreferenced(exception);
4335
4336 return MagickFalse;
4337}
4338
4339MagickExport Image *AccelerateBlurImage(const Image *magick_unused(image),
4340 const ChannelType magick_unused(channel),const double magick_unused(radius),
4341 const double magick_unused(sigma),ExceptionInfo *magick_unused(exception))
4342{
4343 magick_unreferenced(image);
4344 magick_unreferenced(channel);
4345 magick_unreferenced(radius);
4346 magick_unreferenced(sigma);
4347 magick_unreferenced(exception);
4348
4349 return NULL;
4350}
4351
4352MagickExport Image *AccelerateRadialBlurImage(
4353 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4354 const double magick_unused(angle),ExceptionInfo *magick_unused(exception))
4355{
4356 magick_unreferenced(image);
4357 magick_unreferenced(channel);
4358 magick_unreferenced(angle);
4359 magick_unreferenced(exception);
4360
4361 return NULL;
4362}
4363
4364
4365MagickExport Image *AccelerateUnsharpMaskImage(
4366 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4367 const double magick_unused(radius),const double magick_unused(sigma),
4368 const double magick_unused(gain),const double magick_unused(threshold),
4369 ExceptionInfo *magick_unused(exception))
4370{
4371 magick_unreferenced(image);
4372 magick_unreferenced(channel);
4373 magick_unreferenced(radius);
4374 magick_unreferenced(sigma);
4375 magick_unreferenced(gain);
4376 magick_unreferenced(threshold);
4377 magick_unreferenced(exception);
4378
4379 return NULL;
4380}
4381
4382
4383MagickExport MagickBooleanType AccelerateContrastImage(
4384 Image* magick_unused(image),const MagickBooleanType magick_unused(sharpen),
4385 ExceptionInfo* magick_unused(exception))
4386{
4387 magick_unreferenced(image);
4388 magick_unreferenced(sharpen);
4389 magick_unreferenced(exception);
4390
4391 return MagickFalse;
4392}
4393
4394MagickExport MagickBooleanType AccelerateEqualizeImage(
4395 Image* magick_unused(image), const ChannelType magick_unused(channel),
4396 ExceptionInfo* magick_unused(exception))
4397{
4398 magick_unreferenced(image);
4399 magick_unreferenced(channel);
4400 magick_unreferenced(exception);
4401
4402 return MagickFalse;
4403}
4404
4405MagickExport Image *AccelerateDespeckleImage(const Image* magick_unused(image),
4406 ExceptionInfo* magick_unused(exception))
4407{
4408 magick_unreferenced(image);
4409 magick_unreferenced(exception);
4410
4411 return NULL;
4412}
4413
4414MagickExport Image *AccelerateResizeImage(const Image* magick_unused(image),
4415 const size_t magick_unused(resizedColumns),
4416 const size_t magick_unused(resizedRows),
4417 const ResizeFilter* magick_unused(resizeFilter),
4418 ExceptionInfo *magick_unused(exception))
4419{
4420 magick_unreferenced(image);
4421 magick_unreferenced(resizedColumns);
4422 magick_unreferenced(resizedRows);
4423 magick_unreferenced(resizeFilter);
4424 magick_unreferenced(exception);
4425
4426 return NULL;
4427}
4428
4429
4430MagickExport
4431MagickBooleanType AccelerateModulateImage(
4432 Image* image, double percent_brightness, double percent_hue,
4433 double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
4434{
4435 magick_unreferenced(image);
4436 magick_unreferenced(percent_brightness);
4437 magick_unreferenced(percent_hue);
4438 magick_unreferenced(percent_saturation);
4439 magick_unreferenced(colorspace);
4440 magick_unreferenced(exception);
4441 return(MagickFalse);
4442}
4443
4444
4445#endif /* MAGICKCORE_OPENCL_SUPPORT */
4446
4447MagickExport MagickBooleanType AccelerateConvolveImage(
4448 const Image *magick_unused(image),const KernelInfo *magick_unused(kernel),
4449 Image *magick_unused(convolve_image),ExceptionInfo *magick_unused(exception))
4450{
4451 magick_unreferenced(image);
4452 magick_unreferenced(kernel);
4453 magick_unreferenced(convolve_image);
4454 magick_unreferenced(exception);
4455
4456 /* legacy, do not use */
4457 return(MagickFalse);
4458}
4459