blob: 5f022d4ae6b19531b0460946f82de2f553885eb4 [file] [log] [blame]
cristy3f6d1482010-01-20 21:01:21 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7% A A C C E L E R R A A T E %
8% AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9% A A C C E L E R R A A T E %
10% A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11% %
12% %
13% MagickCore Acceleration Methods %
14% %
15% Software Design %
cristyde984cd2013-12-01 14:49:27 +000016% Cristy %
cristyf034abb2013-11-24 14:16:14 +000017% SiuChi Chan %
18% Guansong Zhang %
cristy3f6d1482010-01-20 21:01:21 +000019% January 2010 %
20% %
21% %
cristyfe676ee2013-11-18 13:03:38 +000022% Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization %
cristy3f6d1482010-01-20 21:01:21 +000023% dedicated to making software imaging solutions freely available. %
24% %
25% You may not use this file except in compliance with the License. You may %
26% obtain a copy of the License at %
27% %
28% http://www.imagemagick.org/script/license.php %
29% %
30% Unless required by applicable law or agreed to in writing, software %
31% distributed under the License is distributed on an "AS IS" BASIS, %
32% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
33% See the License for the specific language governing permissions and %
34% limitations under the License. %
35% %
36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cristy3f6d1482010-01-20 21:01:21 +000037*/
cristyf034abb2013-11-24 14:16:14 +000038
cristy3f6d1482010-01-20 21:01:21 +000039/*
cristyf034abb2013-11-24 14:16:14 +000040Include declarations.
cristy3f6d1482010-01-20 21:01:21 +000041*/
cristy4c08aed2011-07-01 19:47:50 +000042#include "MagickCore/studio.h"
43#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000044#include "MagickCore/accelerate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000045#include "MagickCore/artifact.h"
cristy35f33492011-07-07 16:54:49 +000046#include "MagickCore/cache.h"
cristyd1dd6e42011-09-04 01:46:08 +000047#include "MagickCore/cache-private.h"
cristy4c08aed2011-07-01 19:47:50 +000048#include "MagickCore/cache-view.h"
49#include "MagickCore/color-private.h"
cristy7f866842013-07-11 01:15:58 +000050#include "MagickCore/delegate-private.h"
cristy4c08aed2011-07-01 19:47:50 +000051#include "MagickCore/enhance.h"
52#include "MagickCore/exception.h"
53#include "MagickCore/exception-private.h"
54#include "MagickCore/gem.h"
55#include "MagickCore/hashmap.h"
56#include "MagickCore/image.h"
57#include "MagickCore/image-private.h"
58#include "MagickCore/list.h"
59#include "MagickCore/memory_.h"
60#include "MagickCore/monitor-private.h"
61#include "MagickCore/accelerate.h"
cristyf034abb2013-11-24 14:16:14 +000062#include "MagickCore/opencl.h"
63#include "MagickCore/opencl-private.h"
cristy4c08aed2011-07-01 19:47:50 +000064#include "MagickCore/option.h"
cristyf034abb2013-11-24 14:16:14 +000065#include "MagickCore/pixel-private.h"
cristy4c08aed2011-07-01 19:47:50 +000066#include "MagickCore/prepress.h"
67#include "MagickCore/quantize.h"
cristye85d0f72013-11-27 02:25:43 +000068#include "MagickCore/random_.h"
69#include "MagickCore/random-private.h"
cristy4c08aed2011-07-01 19:47:50 +000070#include "MagickCore/registry.h"
cristyf034abb2013-11-24 14:16:14 +000071#include "MagickCore/resize.h"
72#include "MagickCore/resize-private.h"
cristy4c08aed2011-07-01 19:47:50 +000073#include "MagickCore/semaphore.h"
74#include "MagickCore/splay-tree.h"
75#include "MagickCore/statistic.h"
76#include "MagickCore/string_.h"
77#include "MagickCore/string-private.h"
78#include "MagickCore/token.h"
cristyf034abb2013-11-24 14:16:14 +000079
80#ifdef MAGICKCORE_CLPERFMARKER
81#include "CLPerfMarker.h"
82#endif
83
cristye85d0f72013-11-27 02:25:43 +000084#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
85#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
86
cristyf034abb2013-11-24 14:16:14 +000087#if defined(MAGICKCORE_OPENCL_SUPPORT)
88
89#define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0)
90/*#define ALIGNED(pointer,type) (0) */
91
92static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
93{
94 MagickBooleanType flag;
95
96 MagickCLEnv clEnv;
97 clEnv = GetDefaultOpenCLEnv();
98
99 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
100 , sizeof(MagickBooleanType), &flag, exception);
101 if (flag == MagickTrue)
102 return MagickFalse;
103
104 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED
105 , sizeof(MagickBooleanType), &flag, exception);
106 if (flag == MagickFalse)
107 {
108 if(InitOpenCLEnv(clEnv, exception) == MagickFalse)
109 return MagickFalse;
110 }
111
112 return MagickTrue;
113}
114
115
dirk5dcb7622013-12-01 10:43:43 +0000116static MagickBooleanType checkAccelerateCondition(const Image* image, const ChannelType channel)
cristyf034abb2013-11-24 14:16:14 +0000117{
118 /* check if the image's colorspace is supported */
119 if (image->colorspace != RGBColorspace
120 && image->colorspace != sRGBColorspace)
121 return MagickFalse;
122
123 /* check if the channel is supported */
124 if (((channel&RedChannel) == 0)
125 || ((channel&GreenChannel) == 0)
126 || ((channel&BlueChannel) == 0))
127 {
128 return MagickFalse;
129 }
130
131
132 /* check if if the virtual pixel method is compatible with the OpenCL implementation */
133 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod)&&
134 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
135 return MagickFalse;
136
137 return MagickTrue;
138}
139
140
141static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
142{
143 MagickBooleanType outputReady;
144 MagickCLEnv clEnv;
145
146 cl_int clStatus;
147 size_t global_work_size[2];
148 size_t localGroupSize[2];
149 size_t localMemoryRequirement;
150 Image* filteredImage;
151 MagickSizeType length;
152 const void *inputPixels;
153 void *filteredPixels;
154 cl_mem_flags mem_flags;
155 float* kernelBufferPtr;
156 unsigned kernelSize;
157 unsigned int i;
158 void *hostPtr;
159 unsigned int matte, filterWidth, filterHeight, imageWidth, imageHeight;
160
161 cl_context context;
162 cl_kernel clkernel;
163 cl_mem inputImageBuffer, filteredImageBuffer, convolutionKernel;
164 cl_ulong deviceLocalMemorySize;
165 cl_device_id device;
166
167 cl_command_queue queue;
168
169 /* intialize all CL objects to NULL */
170 context = NULL;
171 inputImageBuffer = NULL;
172 filteredImageBuffer = NULL;
173 convolutionKernel = NULL;
174 clkernel = NULL;
175 queue = NULL;
176 device = NULL;
177
178 filteredImage = NULL;
179 outputReady = MagickFalse;
180
181 clEnv = GetDefaultOpenCLEnv();
182 context = GetOpenCLContext(clEnv);
183
184 inputPixels = NULL;
185 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
186 if (inputPixels == (const void *) NULL)
187 {
188 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
189 goto cleanup;
190 }
191
192 /* Create and initialize OpenCL buffers. */
193
194 /* If the host pointer is aligned to the size of CLPixelPacket,
195 then use the host buffer directly from the GPU; otherwise,
196 create a buffer on the GPU and copy the data over */
197 if (ALIGNED(inputPixels,CLPixelPacket))
198 {
199 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
200 }
201 else
202 {
203 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
204 }
205 /* create a CL buffer from image pixel buffer */
206 length = inputImage->columns * inputImage->rows;
207 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
208 if (clStatus != CL_SUCCESS)
209 {
210 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
211 goto cleanup;
212 }
213
214 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
215 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +0000216 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +0000217 {
218 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
219 goto cleanup;
220 }
221 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
222 if (filteredPixels == (void *) NULL)
223 {
224 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
225 goto cleanup;
226 }
227
228 if (ALIGNED(filteredPixels,CLPixelPacket))
229 {
230 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
231 hostPtr = filteredPixels;
232 }
233 else
234 {
235 mem_flags = CL_MEM_WRITE_ONLY;
236 hostPtr = NULL;
237 }
238 /* create a CL buffer from image pixel buffer */
239 length = inputImage->columns * inputImage->rows;
240 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
241 if (clStatus != CL_SUCCESS)
242 {
243 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
244 goto cleanup;
245 }
246
247 kernelSize = kernel->width * kernel->height;
248 convolutionKernel = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
249 if (clStatus != CL_SUCCESS)
250 {
251 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
252 goto cleanup;
253 }
254
255 queue = AcquireOpenCLCommandQueue(clEnv);
256
257 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
258 , 0, NULL, NULL, &clStatus);
259 if (clStatus != CL_SUCCESS)
260 {
261 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
262 goto cleanup;
263 }
264 for (i = 0; i < kernelSize; i++)
265 {
266 kernelBufferPtr[i] = (float) kernel->values[i];
267 }
268 clStatus = clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
269 if (clStatus != CL_SUCCESS)
270 {
271 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
272 goto cleanup;
273 }
274 clFlush(queue);
275
276 /* Compute the local memory requirement for a 16x16 workgroup.
277 If it's larger than 16k, reduce the workgroup size to 8x8 */
278 localGroupSize[0] = 16;
279 localGroupSize[1] = 16;
280 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
281 + kernel->width*kernel->height*sizeof(float);
282 if (localMemoryRequirement > 16384)
283 {
284
285
286 localGroupSize[0] = 8;
287 localGroupSize[1] = 8;
288
289 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
290 + kernel->width*kernel->height*sizeof(float);
291 }
292
293 GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_DEVICE, sizeof(cl_device_id), &device, exception);
294 clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &deviceLocalMemorySize, NULL);
295 if (localMemoryRequirement <= deviceLocalMemorySize)
296 {
297 /* get the OpenCL kernel */
298 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
299 if (clkernel == NULL)
300 {
301 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
302 goto cleanup;
303 }
304
305 /* set the kernel arguments */
306 i = 0;
307 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
308 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
309 imageWidth = inputImage->columns;
310 imageHeight = inputImage->rows;
311 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
312 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
313 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
314 filterWidth = kernel->width;
315 filterHeight = kernel->height;
316 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
317 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
dirke19d0cc2013-12-01 10:07:42 +0000318 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +0000319 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
320 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
321 clStatus|=clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
322 clStatus|=clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
323 if (clStatus != CL_SUCCESS)
324 {
325 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
326 goto cleanup;
327 }
328
329 /* pad the global size to a multiple of the local work size dimension */
330 global_work_size[0] = ((inputImage->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
331 global_work_size[1] = ((inputImage->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
332
333 /* launch the kernel */
334 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
335 if (clStatus != CL_SUCCESS)
336 {
337 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
338 goto cleanup;
339 }
340 }
341 else
342 {
343 /* get the OpenCL kernel */
344 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
345 if (clkernel == NULL)
346 {
347 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
348 goto cleanup;
349 }
350
351 /* set the kernel arguments */
352 i = 0;
353 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
354 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
355 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
356 filterWidth = kernel->width;
357 filterHeight = kernel->height;
358 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
359 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
dirke19d0cc2013-12-01 10:07:42 +0000360 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +0000361 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
362 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
363 if (clStatus != CL_SUCCESS)
364 {
365 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
366 goto cleanup;
367 }
368
369 global_work_size[0] = inputImage->columns;
370 global_work_size[1] = inputImage->rows;
371
372 /* launch the kernel */
373 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
374 if (clStatus != CL_SUCCESS)
375 {
376 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
377 goto cleanup;
378 }
379 }
380 clFlush(queue);
381
382 if (ALIGNED(filteredPixels,CLPixelPacket))
383 {
384 length = inputImage->columns * inputImage->rows;
385 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
386 }
387 else
388 {
389 length = inputImage->columns * inputImage->rows;
390 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
391 }
392 if (clStatus != CL_SUCCESS)
393 {
394 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
395 goto cleanup;
396 }
397
398 /* everything is fine! :) */
399 outputReady = MagickTrue;
400
401
402cleanup:
403
404 if (inputImageBuffer != NULL)
405 clReleaseMemObject(inputImageBuffer);
406
407 if (filteredImageBuffer != NULL)
408 clReleaseMemObject(filteredImageBuffer);
409
410 if (convolutionKernel != NULL)
411 clReleaseMemObject(convolutionKernel);
412
413 if (clkernel != NULL)
414 RelinquishOpenCLKernel(clEnv, clkernel);
415
416 if (queue != NULL)
417 RelinquishOpenCLCommandQueue(clEnv, queue);
418
419 if (outputReady == MagickFalse)
420 {
421 if (filteredImage != NULL)
422 {
423 DestroyImage(filteredImage);
424 filteredImage = NULL;
425 }
426 }
427
428 return filteredImage;
429}
430
cristy3f6d1482010-01-20 21:01:21 +0000431/*
432%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
433% %
434% %
435% %
cristyf034abb2013-11-24 14:16:14 +0000436% C o n v o l v e I m a g e w i t h O p e n C L %
cristy3f6d1482010-01-20 21:01:21 +0000437% %
438% %
439% %
440%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
441%
cristyf034abb2013-11-24 14:16:14 +0000442% ConvolveImage() applies a custom convolution kernel to the image.
cristy3f6d1482010-01-20 21:01:21 +0000443%
cristyf034abb2013-11-24 14:16:14 +0000444% The format of the ConvolveImage method is:
cristy3f6d1482010-01-20 21:01:21 +0000445%
cristyf034abb2013-11-24 14:16:14 +0000446% Image *ConvolveImage(const Image *image,const size_t order,
447% const double *kernel,ExceptionInfo *exception)
448% Image *ConvolveImageChannel(const Image *image,const ChannelType channel,
449% const size_t order,const double *kernel,ExceptionInfo *exception)
450%
451% A description of each parameter follows:
452%
453% o image: the image.
454%
455% o channel: the channel type.
456%
457% o kernel: kernel info.
458%
459% o exception: return any errors or warnings in this structure.
460%
461*/
462
463MagickExport Image* AccelerateConvolveImageChannel(const Image *image, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
464{
465 MagickBooleanType status;
466 Image* filteredImage = NULL;
467
468 assert(image != NULL);
469 assert(kernel != (KernelInfo *) NULL);
470 assert(exception != (ExceptionInfo *) NULL);
471
472 status = checkOpenCLEnvironment(exception);
473 if (status == MagickFalse)
474 return NULL;
475
dirk5dcb7622013-12-01 10:43:43 +0000476 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +0000477 if (status == MagickFalse)
478 return NULL;
479
480 filteredImage = ComputeConvolveImage(image, channel, kernel, exception);
481 OpenCLLogException(__FUNCTION__,__LINE__,exception);
482 return filteredImage;
483}
484
485static MagickBooleanType ComputeFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
486 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
487{
488 MagickBooleanType status;
489
490 MagickCLEnv clEnv;
491
492 MagickSizeType length;
493 void* pixels;
494 float* parametersBufferPtr;
495
496 cl_int clStatus;
497 cl_context context;
498 cl_kernel clkernel;
499 cl_command_queue queue;
500 cl_mem_flags mem_flags;
501 cl_mem imageBuffer;
502 cl_mem parametersBuffer;
503 size_t globalWorkSize[2];
504
505 unsigned int i;
506
507 status = MagickFalse;
508
509 context = NULL;
510 clkernel = NULL;
511 queue = NULL;
512 imageBuffer = NULL;
513 parametersBuffer = NULL;
514
515 clEnv = GetDefaultOpenCLEnv();
516 context = GetOpenCLContext(clEnv);
517
518 pixels = GetPixelCachePixels(image, &length, exception);
519 if (pixels == (void *) NULL)
520 {
521 (void) ThrowMagickException(exception, GetMagickModule(), CacheWarning,
522 "GetPixelCachePixels failed.",
523 "'%s'", image->filename);
524 goto cleanup;
525 }
526
527
528 if (ALIGNED(pixels,CLPixelPacket))
529 {
530 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
531 }
532 else
533 {
534 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
535 }
536 /* create a CL buffer from image pixel buffer */
537 length = image->columns * image->rows;
538 imageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
539 if (clStatus != CL_SUCCESS)
540 {
541 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
542 goto cleanup;
543 }
544
545 parametersBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
546 if (clStatus != CL_SUCCESS)
547 {
548 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
549 goto cleanup;
550 }
551
552 queue = AcquireOpenCLCommandQueue(clEnv);
553
554 parametersBufferPtr = (float*)clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
555 , 0, NULL, NULL, &clStatus);
556 if (clStatus != CL_SUCCESS)
557 {
558 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
559 goto cleanup;
560 }
561 for (i = 0; i < number_parameters; i++)
562 {
563 parametersBufferPtr[i] = (float)parameters[i];
564 }
565 clStatus = clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
566 if (clStatus != CL_SUCCESS)
567 {
568 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
569 goto cleanup;
570 }
571 clFlush(queue);
572
573 clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage");
574 if (clkernel == NULL)
575 {
576 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
577 goto cleanup;
578 }
579
580 /* set the kernel arguments */
581 i = 0;
582 clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
583 clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
584 clStatus|=clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
585 clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
586 clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
587 if (clStatus != CL_SUCCESS)
588 {
589 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
590 goto cleanup;
591 }
592
593 globalWorkSize[0] = image->columns;
594 globalWorkSize[1] = image->rows;
595 /* launch the kernel */
596 clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
597 if (clStatus != CL_SUCCESS)
598 {
599 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
600 goto cleanup;
601 }
602 clFlush(queue);
603
604
605 if (ALIGNED(pixels,CLPixelPacket))
606 {
607 length = image->columns * image->rows;
608 clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
609 }
610 else
611 {
612 length = image->columns * image->rows;
613 clStatus = clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
614 }
615 if (clStatus != CL_SUCCESS)
616 {
617 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
618 goto cleanup;
619 }
620 status = MagickTrue;
621
622cleanup:
623
624 if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
625 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
626 if (imageBuffer != NULL) clReleaseMemObject(imageBuffer);
627 if (parametersBuffer != NULL) clReleaseMemObject(parametersBuffer);
628
629 return status;
630}
631
632
633
634MagickExport MagickBooleanType
635 AccelerateFunctionImage(Image *image, const ChannelType channel,const MagickFunction function,
636 const size_t number_parameters,const double *parameters, ExceptionInfo *exception)
637{
638 MagickBooleanType status;
639
640 status = MagickFalse;
641
642 assert(image != NULL);
643 assert(exception != (ExceptionInfo *) NULL);
644
645 status = checkOpenCLEnvironment(exception);
646 if (status == MagickTrue)
647 {
dirk5dcb7622013-12-01 10:43:43 +0000648 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +0000649 if (status == MagickTrue)
650 {
651 status = ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
652 OpenCLLogException(__FUNCTION__,__LINE__,exception);
653 }
654 }
655 return status;
656}
657
658
659static MagickBooleanType splitImage(const Image* inputImage)
660{
661 MagickBooleanType split;
662
663 MagickCLEnv clEnv;
664 unsigned long allocSize;
665 unsigned long tempSize;
666
667 clEnv = GetDefaultOpenCLEnv();
668
669 allocSize = GetOpenCLDeviceMaxMemAllocSize(clEnv);
670 tempSize = inputImage->columns * inputImage->rows * 4 * 4;
671
672 /*
673 printf("alloc size: %lu\n", allocSize);
674 printf("temp size: %lu\n", tempSize);
675 */
676
677 split = ((tempSize > allocSize) ? MagickTrue:MagickFalse);
678
679 return split;
680}
681
682static Image* ComputeBlurImage(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
683{
684 MagickBooleanType outputReady;
685 Image* filteredImage;
686 MagickCLEnv clEnv;
687
688 cl_int clStatus;
689
690 const void *inputPixels;
691 void *filteredPixels;
692 cl_mem_flags mem_flags;
693
694 cl_context context;
695 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
696 cl_kernel blurRowKernel, blurColumnKernel;
697 cl_command_queue queue;
698
699 void* hostPtr;
700 float* kernelBufferPtr;
701 MagickSizeType length;
702
703 char geometry[MaxTextExtent];
704 KernelInfo* kernel = NULL;
705 unsigned int kernelWidth;
706 unsigned int imageColumns, imageRows;
707
708 unsigned int i;
709
710 context = NULL;
711 filteredImage = NULL;
712 inputImageBuffer = NULL;
713 tempImageBuffer = NULL;
714 filteredImageBuffer = NULL;
715 imageKernelBuffer = NULL;
716 blurRowKernel = NULL;
717 blurColumnKernel = NULL;
718 queue = NULL;
719
720 outputReady = MagickFalse;
721
722 clEnv = GetDefaultOpenCLEnv();
723 context = GetOpenCLContext(clEnv);
724 queue = AcquireOpenCLCommandQueue(clEnv);
725
726 /* Create and initialize OpenCL buffers. */
727 {
728 inputPixels = NULL;
729 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
730 if (inputPixels == (const void *) NULL)
731 {
732 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
733 goto cleanup;
734 }
735 /* If the host pointer is aligned to the size of CLPixelPacket,
736 then use the host buffer directly from the GPU; otherwise,
737 create a buffer on the GPU and copy the data over */
738 if (ALIGNED(inputPixels,CLPixelPacket))
739 {
740 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
741 }
742 else
743 {
744 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
745 }
746 /* create a CL buffer from image pixel buffer */
747 length = inputImage->columns * inputImage->rows;
748 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
749 if (clStatus != CL_SUCCESS)
750 {
751 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
752 goto cleanup;
753 }
754 }
755
756 /* create output */
757 {
758 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
759 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +0000760 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +0000761 {
762 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
763 goto cleanup;
764 }
765 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
766 if (filteredPixels == (void *) NULL)
767 {
768 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
769 goto cleanup;
770 }
771
772 if (ALIGNED(filteredPixels,CLPixelPacket))
773 {
774 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
775 hostPtr = filteredPixels;
776 }
777 else
778 {
779 mem_flags = CL_MEM_WRITE_ONLY;
780 hostPtr = NULL;
781 }
782 /* create a CL buffer from image pixel buffer */
783 length = inputImage->columns * inputImage->rows;
784 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
785 if (clStatus != CL_SUCCESS)
786 {
787 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
788 goto cleanup;
789 }
790 }
791
792 /* create processing kernel */
793 {
794 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
795 kernel=AcquireKernelInfo(geometry);
796 if (kernel == (KernelInfo *) NULL)
797 {
798 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
799 goto cleanup;
800 }
801
802 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
803 if (clStatus != CL_SUCCESS)
804 {
805 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
806 goto cleanup;
807 }
808 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
809 if (clStatus != CL_SUCCESS)
810 {
811 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
812 goto cleanup;
813 }
814
815 for (i = 0; i < kernel->width; i++)
816 {
817 kernelBufferPtr[i] = (float) kernel->values[i];
818 }
819
820 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
821 if (clStatus != CL_SUCCESS)
822 {
823 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
824 goto cleanup;
825 }
826 }
827
828 {
829
830 /* create temp buffer */
831 {
832 length = inputImage->columns * inputImage->rows;
833 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
834 if (clStatus != CL_SUCCESS)
835 {
836 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
837 goto cleanup;
838 }
839 }
840
841 /* get the OpenCL kernels */
842 {
843 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
844 if (blurRowKernel == NULL)
845 {
846 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
847 goto cleanup;
848 };
849
850 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
851 if (blurColumnKernel == NULL)
852 {
853 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
854 goto cleanup;
855 };
856 }
857
858 {
859 /* need logic to decide this value */
860 int chunkSize = 256;
861
862 {
863 imageColumns = inputImage->columns;
864 imageRows = inputImage->rows;
865
866 /* set the kernel arguments */
867 i = 0;
868 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
869 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
870 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
871 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
872 kernelWidth = kernel->width;
873 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
874 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
875 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
876 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
877 if (clStatus != CL_SUCCESS)
878 {
879 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
880 goto cleanup;
881 }
882 }
883
884 /* launch the kernel */
885 {
886 size_t gsize[2];
887 size_t wsize[2];
888
889 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
890 gsize[1] = inputImage->rows;
891 wsize[0] = chunkSize;
892 wsize[1] = 1;
893
894 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
895 if (clStatus != CL_SUCCESS)
896 {
897 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
898 goto cleanup;
899 }
900 clFlush(queue);
901 }
902 }
903
904 {
905 /* need logic to decide this value */
906 int chunkSize = 256;
907
908 {
909 imageColumns = inputImage->columns;
910 imageRows = inputImage->rows;
911
912 /* set the kernel arguments */
913 i = 0;
914 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
915 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
916 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
917 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
918 kernelWidth = kernel->width;
919 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
920 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
921 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
922 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
923 if (clStatus != CL_SUCCESS)
924 {
925 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
926 goto cleanup;
927 }
928 }
929
930 /* launch the kernel */
931 {
932 size_t gsize[2];
933 size_t wsize[2];
934
935 gsize[0] = inputImage->columns;
936 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
937 wsize[0] = 1;
938 wsize[1] = chunkSize;
939
940 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
941 if (clStatus != CL_SUCCESS)
942 {
943 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
944 goto cleanup;
945 }
946 clFlush(queue);
947 }
948 }
949
950 }
951
952 /* get result */
953 if (ALIGNED(filteredPixels,CLPixelPacket))
954 {
955 length = inputImage->columns * inputImage->rows;
956 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
957 }
958 else
959 {
960 length = inputImage->columns * inputImage->rows;
961 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
962 }
963 if (clStatus != CL_SUCCESS)
964 {
965 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
966 goto cleanup;
967 }
968
969 outputReady = MagickTrue;
970
971cleanup:
972 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
973 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
974 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
975 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
976 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
977 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
978 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
979 if (kernel!=NULL) DestroyKernelInfo(kernel);
980 if (outputReady == MagickFalse)
981 {
982 if (filteredImage != NULL)
983 {
984 DestroyImage(filteredImage);
985 filteredImage = NULL;
986 }
987 }
988 return filteredImage;
989}
990
991static Image* ComputeBlurImageSection(const Image* inputImage, const ChannelType channel, const double radius, const double sigma, ExceptionInfo *exception)
992{
993 MagickBooleanType outputReady;
994 Image* filteredImage;
995 MagickCLEnv clEnv;
996
997 cl_int clStatus;
998
999 const void *inputPixels;
1000 void *filteredPixels;
1001 cl_mem_flags mem_flags;
1002
1003 cl_context context;
1004 cl_mem inputImageBuffer, tempImageBuffer, filteredImageBuffer, imageKernelBuffer;
1005 cl_kernel blurRowKernel, blurColumnKernel;
1006 cl_command_queue queue;
1007
1008 void* hostPtr;
1009 float* kernelBufferPtr;
1010 MagickSizeType length;
1011
1012 char geometry[MaxTextExtent];
1013 KernelInfo* kernel = NULL;
1014 unsigned int kernelWidth;
1015 unsigned int imageColumns, imageRows;
1016
1017 unsigned int i;
1018
1019 context = NULL;
1020 filteredImage = NULL;
1021 inputImageBuffer = NULL;
1022 tempImageBuffer = NULL;
1023 filteredImageBuffer = NULL;
1024 imageKernelBuffer = NULL;
1025 blurRowKernel = NULL;
1026 blurColumnKernel = NULL;
1027 queue = NULL;
1028
1029 outputReady = MagickFalse;
1030
1031 clEnv = GetDefaultOpenCLEnv();
1032 context = GetOpenCLContext(clEnv);
1033 queue = AcquireOpenCLCommandQueue(clEnv);
1034
1035 /* Create and initialize OpenCL buffers. */
1036 {
1037 inputPixels = NULL;
1038 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1039 if (inputPixels == (const void *) NULL)
1040 {
1041 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1042 goto cleanup;
1043 }
1044 /* If the host pointer is aligned to the size of CLPixelPacket,
1045 then use the host buffer directly from the GPU; otherwise,
1046 create a buffer on the GPU and copy the data over */
1047 if (ALIGNED(inputPixels,CLPixelPacket))
1048 {
1049 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1050 }
1051 else
1052 {
1053 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1054 }
1055 /* create a CL buffer from image pixel buffer */
1056 length = inputImage->columns * inputImage->rows;
1057 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1058 if (clStatus != CL_SUCCESS)
1059 {
1060 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1061 goto cleanup;
1062 }
1063 }
1064
1065 /* create output */
1066 {
1067 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1068 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001069 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001070 {
1071 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1072 goto cleanup;
1073 }
1074 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1075 if (filteredPixels == (void *) NULL)
1076 {
1077 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1078 goto cleanup;
1079 }
1080
1081 if (ALIGNED(filteredPixels,CLPixelPacket))
1082 {
1083 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1084 hostPtr = filteredPixels;
1085 }
1086 else
1087 {
1088 mem_flags = CL_MEM_WRITE_ONLY;
1089 hostPtr = NULL;
1090 }
1091 /* create a CL buffer from image pixel buffer */
1092 length = inputImage->columns * inputImage->rows;
1093 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1094 if (clStatus != CL_SUCCESS)
1095 {
1096 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1097 goto cleanup;
1098 }
1099 }
1100
1101 /* create processing kernel */
1102 {
1103 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1104 kernel=AcquireKernelInfo(geometry);
1105 if (kernel == (KernelInfo *) NULL)
1106 {
1107 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
1108 goto cleanup;
1109 }
1110
1111 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
1112 if (clStatus != CL_SUCCESS)
1113 {
1114 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1115 goto cleanup;
1116 }
1117 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1118 if (clStatus != CL_SUCCESS)
1119 {
1120 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
1121 goto cleanup;
1122 }
1123
1124 for (i = 0; i < kernel->width; i++)
1125 {
1126 kernelBufferPtr[i] = (float) kernel->values[i];
1127 }
1128
1129 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1130 if (clStatus != CL_SUCCESS)
1131 {
1132 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1133 goto cleanup;
1134 }
1135 }
1136
1137 {
1138 unsigned int offsetRows;
1139 unsigned int sec;
1140
1141 /* create temp buffer */
1142 {
1143 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
1144 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1145 if (clStatus != CL_SUCCESS)
1146 {
1147 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1148 goto cleanup;
1149 }
1150 }
1151
1152 /* get the OpenCL kernels */
1153 {
1154 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
1155 if (blurRowKernel == NULL)
1156 {
1157 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1158 goto cleanup;
1159 };
1160
1161 blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumnSection");
1162 if (blurColumnKernel == NULL)
1163 {
1164 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1165 goto cleanup;
1166 };
1167 }
1168
1169 for (sec = 0; sec < 2; sec++)
1170 {
1171 {
1172 /* need logic to decide this value */
1173 int chunkSize = 256;
1174
1175 {
1176 imageColumns = inputImage->columns;
1177 if (sec == 0)
1178 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
1179 else
1180 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
1181
1182 offsetRows = sec * inputImage->rows / 2;
1183
1184 kernelWidth = kernel->width;
1185
1186 /* set the kernel arguments */
1187 i = 0;
1188 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1189 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1190 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1191 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1192 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1193 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1194 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1195 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1196 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1197 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
1198 if (clStatus != CL_SUCCESS)
1199 {
1200 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1201 goto cleanup;
1202 }
1203 }
1204
1205 /* launch the kernel */
1206 {
1207 size_t gsize[2];
1208 size_t wsize[2];
1209
1210 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
1211 gsize[1] = imageRows;
1212 wsize[0] = chunkSize;
1213 wsize[1] = 1;
1214
1215 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1216 if (clStatus != CL_SUCCESS)
1217 {
1218 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1219 goto cleanup;
1220 }
1221 clFlush(queue);
1222 }
1223 }
1224
1225 {
1226 /* need logic to decide this value */
1227 int chunkSize = 256;
1228
1229 {
1230 imageColumns = inputImage->columns;
1231 if (sec == 0)
1232 imageRows = inputImage->rows / 2;
1233 else
1234 imageRows = (inputImage->rows - inputImage->rows / 2);
1235
1236 offsetRows = sec * inputImage->rows / 2;
1237
1238 kernelWidth = kernel->width;
1239
1240 /* set the kernel arguments */
1241 i = 0;
1242 clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1243 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1244 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1245 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1246 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1247 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1248 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1249 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
1250 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1251 clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
1252 if (clStatus != CL_SUCCESS)
1253 {
1254 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1255 goto cleanup;
1256 }
1257 }
1258
1259 /* launch the kernel */
1260 {
1261 size_t gsize[2];
1262 size_t wsize[2];
1263
1264 gsize[0] = imageColumns;
1265 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
1266 wsize[0] = 1;
1267 wsize[1] = chunkSize;
1268
1269 clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1270 if (clStatus != CL_SUCCESS)
1271 {
1272 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1273 goto cleanup;
1274 }
1275 clFlush(queue);
1276 }
1277 }
1278 }
1279
1280 }
1281
1282 /* get result */
1283 if (ALIGNED(filteredPixels,CLPixelPacket))
1284 {
1285 length = inputImage->columns * inputImage->rows;
1286 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1287 }
1288 else
1289 {
1290 length = inputImage->columns * inputImage->rows;
1291 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1292 }
1293 if (clStatus != CL_SUCCESS)
1294 {
1295 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1296 goto cleanup;
1297 }
1298
1299 outputReady = MagickTrue;
1300
1301cleanup:
1302 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1303 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1304 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1305 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1306 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1307 if (blurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1308 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1309 if (kernel!=NULL) DestroyKernelInfo(kernel);
1310 if (outputReady == MagickFalse)
1311 {
1312 if (filteredImage != NULL)
1313 {
1314 DestroyImage(filteredImage);
1315 filteredImage = NULL;
1316 }
1317 }
1318 return filteredImage;
1319}
1320
1321/*
1322%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1323% %
1324% %
1325% %
1326% B l u r I m a g e w i t h O p e n C L %
1327% %
1328% %
1329% %
1330%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1331%
1332% BlurImage() blurs an image. We convolve the image with a Gaussian operator
1333% of the given radius and standard deviation (sigma). For reasonable results,
1334% the radius should be larger than sigma. Use a radius of 0 and BlurImage()
1335% selects a suitable radius for you.
1336%
1337% The format of the BlurImage method is:
1338%
1339% Image *BlurImage(const Image *image,const double radius,
1340% const double sigma,ExceptionInfo *exception)
1341% Image *BlurImageChannel(const Image *image,const ChannelType channel,
1342% const double radius,const double sigma,ExceptionInfo *exception)
1343%
1344% A description of each parameter follows:
1345%
1346% o image: the image.
1347%
1348% o channel: the channel type.
1349%
1350% o radius: the radius of the Gaussian, in pixels, not counting the center
1351% pixel.
1352%
1353% o sigma: the standard deviation of the Gaussian, in pixels.
1354%
1355% o exception: return any errors or warnings in this structure.
1356%
1357*/
1358
1359MagickExport
1360Image* AccelerateBlurImage(const Image *image, const ChannelType channel, const double radius, const double sigma,ExceptionInfo *exception)
1361{
1362 MagickBooleanType status;
1363 Image* filteredImage = NULL;
1364
1365 assert(image != NULL);
1366 assert(exception != (ExceptionInfo *) NULL);
1367
1368 status = checkOpenCLEnvironment(exception);
1369 if (status == MagickFalse)
1370 return NULL;
1371
dirk5dcb7622013-12-01 10:43:43 +00001372 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00001373 if (status == MagickFalse)
1374 return NULL;
1375
1376 if (splitImage(image) && (image->rows / 2 > radius))
1377 filteredImage = ComputeBlurImageSection(image, channel, radius, sigma, exception);
1378 else
1379 filteredImage = ComputeBlurImage(image, channel, radius, sigma, exception);
1380
1381 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1382 return filteredImage;
1383}
1384
1385
1386static Image* ComputeRadialBlurImage(const Image *inputImage, const ChannelType channel, const double angle, ExceptionInfo *exception)
1387{
1388
1389 MagickBooleanType outputReady;
1390 Image* filteredImage;
1391 MagickCLEnv clEnv;
1392
1393 cl_int clStatus;
1394 size_t global_work_size[2];
1395
1396 cl_context context;
1397 cl_mem_flags mem_flags;
1398 cl_mem inputImageBuffer, filteredImageBuffer, sinThetaBuffer, cosThetaBuffer;
1399 cl_kernel radialBlurKernel;
1400 cl_command_queue queue;
1401
1402 const void *inputPixels;
1403 void *filteredPixels;
1404 void* hostPtr;
1405 float* sinThetaPtr;
1406 float* cosThetaPtr;
1407 MagickSizeType length;
1408 unsigned int matte;
dirke19d0cc2013-12-01 10:07:42 +00001409 PixelInfo bias;
cristyf034abb2013-11-24 14:16:14 +00001410 cl_float4 biasPixel;
1411 cl_float2 blurCenter;
1412 float blurRadius;
1413 unsigned int cossin_theta_size;
1414 float offset, theta;
1415
1416 unsigned int i;
1417
1418 outputReady = MagickFalse;
1419 context = NULL;
1420 filteredImage = NULL;
1421 inputImageBuffer = NULL;
1422 filteredImageBuffer = NULL;
1423 sinThetaBuffer = NULL;
1424 cosThetaBuffer = NULL;
1425 queue = NULL;
1426 radialBlurKernel = NULL;
1427
1428
1429 clEnv = GetDefaultOpenCLEnv();
1430 context = GetOpenCLContext(clEnv);
1431
1432
1433 /* Create and initialize OpenCL buffers. */
1434
1435 inputPixels = NULL;
1436 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1437 if (inputPixels == (const void *) NULL)
1438 {
1439 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1440 goto cleanup;
1441 }
1442
1443 /* If the host pointer is aligned to the size of CLPixelPacket,
1444 then use the host buffer directly from the GPU; otherwise,
1445 create a buffer on the GPU and copy the data over */
1446 if (ALIGNED(inputPixels,CLPixelPacket))
1447 {
1448 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1449 }
1450 else
1451 {
1452 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1453 }
1454 /* create a CL buffer from image pixel buffer */
1455 length = inputImage->columns * inputImage->rows;
1456 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1457 if (clStatus != CL_SUCCESS)
1458 {
1459 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1460 goto cleanup;
1461 }
1462
1463
1464 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1465 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001466 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001467 {
1468 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1469 goto cleanup;
1470 }
1471 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1472 if (filteredPixels == (void *) NULL)
1473 {
1474 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1475 goto cleanup;
1476 }
1477
1478 if (ALIGNED(filteredPixels,CLPixelPacket))
1479 {
1480 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1481 hostPtr = filteredPixels;
1482 }
1483 else
1484 {
1485 mem_flags = CL_MEM_WRITE_ONLY;
1486 hostPtr = NULL;
1487 }
1488 /* create a CL buffer from image pixel buffer */
1489 length = inputImage->columns * inputImage->rows;
1490 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1491 if (clStatus != CL_SUCCESS)
1492 {
1493 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1494 goto cleanup;
1495 }
1496
1497 blurCenter.s[0] = (float) (inputImage->columns-1)/2.0;
1498 blurCenter.s[1] = (float) (inputImage->rows-1)/2.0;
1499 blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
1500 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
1501
1502 /* create a buffer for sin_theta and cos_theta */
1503 sinThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1504 if (clStatus != CL_SUCCESS)
1505 {
1506 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1507 goto cleanup;
1508 }
1509 cosThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1510 if (clStatus != CL_SUCCESS)
1511 {
1512 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1513 goto cleanup;
1514 }
1515
1516
1517 queue = AcquireOpenCLCommandQueue(clEnv);
1518 sinThetaPtr = (float*) clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1519 if (clStatus != CL_SUCCESS)
1520 {
1521 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1522 goto cleanup;
1523 }
1524
1525 cosThetaPtr = (float*) clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1526 if (clStatus != CL_SUCCESS)
1527 {
1528 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1529 goto cleanup;
1530 }
1531
1532 theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
1533 offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
1534 for (i=0; i < (ssize_t) cossin_theta_size; i++)
1535 {
1536 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
1537 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
1538 }
1539
1540 clStatus = clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
1541 clStatus |= clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
1542 if (clStatus != CL_SUCCESS)
1543 {
1544 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1545 goto cleanup;
1546 }
1547
1548 /* get the OpenCL kernel */
1549 radialBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RadialBlur");
1550 if (radialBlurKernel == NULL)
1551 {
1552 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1553 goto cleanup;
1554 }
1555
1556
1557 /* set the kernel arguments */
1558 i = 0;
1559 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1560 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1561
dirke19d0cc2013-12-01 10:07:42 +00001562 GetPixelInfo(inputImage,&bias);
cristyf034abb2013-11-24 14:16:14 +00001563 biasPixel.s[0] = bias.red;
1564 biasPixel.s[1] = bias.green;
1565 biasPixel.s[2] = bias.blue;
dirke19d0cc2013-12-01 10:07:42 +00001566 biasPixel.s[3] = bias.alpha;
cristyf034abb2013-11-24 14:16:14 +00001567 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
1568 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
1569
dirke19d0cc2013-12-01 10:07:42 +00001570 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +00001571 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
1572
1573 clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
1574
1575 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
1576 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
1577 clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
1578 if (clStatus != CL_SUCCESS)
1579 {
1580 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1581 goto cleanup;
1582 }
1583
1584
1585 global_work_size[0] = inputImage->columns;
1586 global_work_size[1] = inputImage->rows;
1587 /* launch the kernel */
1588 clStatus = clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
1589 if (clStatus != CL_SUCCESS)
1590 {
1591 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1592 goto cleanup;
1593 }
1594 clFlush(queue);
1595
1596 if (ALIGNED(filteredPixels,CLPixelPacket))
1597 {
1598 length = inputImage->columns * inputImage->rows;
1599 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1600 }
1601 else
1602 {
1603 length = inputImage->columns * inputImage->rows;
1604 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1605 }
1606 if (clStatus != CL_SUCCESS)
1607 {
1608 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1609 goto cleanup;
1610 }
1611 outputReady = MagickTrue;
1612
1613cleanup:
1614 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1615 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1616 if (sinThetaBuffer!=NULL) clReleaseMemObject(sinThetaBuffer);
1617 if (cosThetaBuffer!=NULL) clReleaseMemObject(cosThetaBuffer);
1618 if (radialBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, radialBlurKernel);
1619 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1620 if (outputReady == MagickFalse)
1621 {
1622 if (filteredImage != NULL)
1623 {
1624 DestroyImage(filteredImage);
1625 filteredImage = NULL;
1626 }
1627 }
1628 return filteredImage;
1629}
1630
1631/*
1632%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1633% %
1634% %
1635% %
1636% R a d i a l B l u r I m a g e w i t h O p e n C L %
1637% %
1638% %
1639% %
1640%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1641%
1642% RadialBlurImage() applies a radial blur to the image.
1643%
1644% Andrew Protano contributed this effect.
1645%
1646% The format of the RadialBlurImage method is:
1647%
1648% Image *RadialBlurImage(const Image *image,const double angle,
1649% ExceptionInfo *exception)
1650% Image *RadialBlurImageChannel(const Image *image,const ChannelType channel,
1651% const double angle,ExceptionInfo *exception)
1652%
1653% A description of each parameter follows:
1654%
1655% o image: the image.
1656%
1657% o channel: the channel type.
1658%
1659% o angle: the angle of the radial blur.
1660%
1661% o exception: return any errors or warnings in this structure.
1662%
1663*/
1664
1665MagickExport
1666Image* AccelerateRadialBlurImage(const Image *image, const ChannelType channel, const double angle, ExceptionInfo *exception)
1667{
1668 MagickBooleanType status;
1669 Image* filteredImage;
1670
1671
1672 assert(image != NULL);
1673 assert(exception != NULL);
1674
1675 status = checkOpenCLEnvironment(exception);
1676 if (status == MagickFalse)
1677 return NULL;
1678
dirk5dcb7622013-12-01 10:43:43 +00001679 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00001680 if (status == MagickFalse)
1681 return NULL;
1682
1683 filteredImage = ComputeRadialBlurImage(image, channel, angle, exception);
1684 OpenCLLogException(__FUNCTION__,__LINE__,exception);
1685 return filteredImage;
1686}
1687
1688
1689
1690static Image* ComputeUnsharpMaskImage(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1691 const double gain,const double threshold,ExceptionInfo *exception)
1692{
1693 MagickBooleanType outputReady = MagickFalse;
1694 Image* filteredImage = NULL;
1695 MagickCLEnv clEnv = NULL;
1696
1697 cl_int clStatus;
1698
1699 const void *inputPixels;
1700 void *filteredPixels;
1701 cl_mem_flags mem_flags;
1702
1703 KernelInfo *kernel = NULL;
1704 char geometry[MaxTextExtent];
1705
1706 cl_context context = NULL;
1707 cl_mem inputImageBuffer = NULL;
1708 cl_mem filteredImageBuffer = NULL;
1709 cl_mem tempImageBuffer = NULL;
1710 cl_mem imageKernelBuffer = NULL;
1711 cl_kernel blurRowKernel = NULL;
1712 cl_kernel unsharpMaskBlurColumnKernel = NULL;
1713 cl_command_queue queue = NULL;
1714
1715 void* hostPtr;
1716 float* kernelBufferPtr;
1717 MagickSizeType length;
1718 unsigned int kernelWidth;
1719 float fGain;
1720 float fThreshold;
1721 unsigned int imageColumns, imageRows;
1722 int chunkSize;
1723 unsigned int i;
1724
1725 clEnv = GetDefaultOpenCLEnv();
1726 context = GetOpenCLContext(clEnv);
1727 queue = AcquireOpenCLCommandQueue(clEnv);
1728
1729 /* Create and initialize OpenCL buffers. */
1730 {
1731 inputPixels = NULL;
1732 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
1733 if (inputPixels == (const void *) NULL)
1734 {
1735 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
1736 goto cleanup;
1737 }
1738
1739 /* If the host pointer is aligned to the size of CLPixelPacket,
1740 then use the host buffer directly from the GPU; otherwise,
1741 create a buffer on the GPU and copy the data over */
1742 if (ALIGNED(inputPixels,CLPixelPacket))
1743 {
1744 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1745 }
1746 else
1747 {
1748 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1749 }
1750 /* create a CL buffer from image pixel buffer */
1751 length = inputImage->columns * inputImage->rows;
1752 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1753 if (clStatus != CL_SUCCESS)
1754 {
1755 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1756 goto cleanup;
1757 }
1758 }
1759
1760 /* create output */
1761 {
1762 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
1763 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00001764 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00001765 {
1766 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1767 goto cleanup;
1768 }
1769 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
1770 if (filteredPixels == (void *) NULL)
1771 {
1772 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1773 goto cleanup;
1774 }
1775
1776 if (ALIGNED(filteredPixels,CLPixelPacket))
1777 {
1778 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1779 hostPtr = filteredPixels;
1780 }
1781 else
1782 {
1783 mem_flags = CL_MEM_WRITE_ONLY;
1784 hostPtr = NULL;
1785 }
1786
1787 /* create a CL buffer from image pixel buffer */
1788 length = inputImage->columns * inputImage->rows;
1789 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1790 if (clStatus != CL_SUCCESS)
1791 {
1792 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1793 goto cleanup;
1794 }
1795 }
1796
1797 /* create the blur kernel */
1798 {
1799 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1800 kernel=AcquireKernelInfo(geometry);
1801 if (kernel == (KernelInfo *) NULL)
1802 {
1803 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
1804 goto cleanup;
1805 }
1806
1807 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
1808 if (clStatus != CL_SUCCESS)
1809 {
1810 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1811 goto cleanup;
1812 }
1813
1814
1815 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1816 if (clStatus != CL_SUCCESS)
1817 {
1818 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
1819 goto cleanup;
1820 }
1821 for (i = 0; i < kernel->width; i++)
1822 {
1823 kernelBufferPtr[i] = (float) kernel->values[i];
1824 }
1825 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1826 if (clStatus != CL_SUCCESS)
1827 {
1828 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
1829 goto cleanup;
1830 }
1831 }
1832
1833 {
1834 /* create temp buffer */
1835 {
1836 length = inputImage->columns * inputImage->rows;
1837 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1838 if (clStatus != CL_SUCCESS)
1839 {
1840 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
1841 goto cleanup;
1842 }
1843 }
1844
1845 /* get the opencl kernel */
1846 {
1847 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
1848 if (blurRowKernel == NULL)
1849 {
1850 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1851 goto cleanup;
1852 };
1853
1854 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
1855 if (unsharpMaskBlurColumnKernel == NULL)
1856 {
1857 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
1858 goto cleanup;
1859 };
1860 }
1861
1862 {
1863 chunkSize = 256;
1864
1865 imageColumns = inputImage->columns;
1866 imageRows = inputImage->rows;
1867
1868 kernelWidth = kernel->width;
1869
1870 /* set the kernel arguments */
1871 i = 0;
1872 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1873 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1874 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1875 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1876 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1877 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1878 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1879 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1880 if (clStatus != CL_SUCCESS)
1881 {
1882 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1883 goto cleanup;
1884 }
1885 }
1886
1887 /* launch the kernel */
1888 {
1889 size_t gsize[2];
1890 size_t wsize[2];
1891
1892 gsize[0] = chunkSize*((inputImage->columns+chunkSize-1)/chunkSize);
1893 gsize[1] = inputImage->rows;
1894 wsize[0] = chunkSize;
1895 wsize[1] = 1;
1896
1897 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1898 if (clStatus != CL_SUCCESS)
1899 {
1900 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1901 goto cleanup;
1902 }
1903 clFlush(queue);
1904 }
1905
1906
1907 {
1908 chunkSize = 256;
1909 imageColumns = inputImage->columns;
1910 imageRows = inputImage->rows;
1911 kernelWidth = kernel->width;
1912 fGain = (float)gain;
1913 fThreshold = (float)threshold;
1914
1915 i = 0;
1916 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
1917 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1918 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1919 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1920 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1921 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
1922 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
1923 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
1924 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1925 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1926 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
1927 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
1928
1929 if (clStatus != CL_SUCCESS)
1930 {
1931 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
1932 goto cleanup;
1933 }
1934 }
1935
1936 /* launch the kernel */
1937 {
1938 size_t gsize[2];
1939 size_t wsize[2];
1940
1941 gsize[0] = inputImage->columns;
1942 gsize[1] = chunkSize*((inputImage->rows+chunkSize-1)/chunkSize);
1943 wsize[0] = 1;
1944 wsize[1] = chunkSize;
1945
1946 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1947 if (clStatus != CL_SUCCESS)
1948 {
1949 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
1950 goto cleanup;
1951 }
1952 clFlush(queue);
1953 }
1954
1955 }
1956
1957 /* get result */
1958 if (ALIGNED(filteredPixels,CLPixelPacket))
1959 {
1960 length = inputImage->columns * inputImage->rows;
1961 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1962 }
1963 else
1964 {
1965 length = inputImage->columns * inputImage->rows;
1966 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1967 }
1968 if (clStatus != CL_SUCCESS)
1969 {
1970 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
1971 goto cleanup;
1972 }
1973
1974 outputReady = MagickTrue;
1975
1976cleanup:
1977 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
1978 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
1979 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
1980 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
1981 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
1982 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
1983 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
1984 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
1985 if (outputReady == MagickFalse)
1986 {
1987 if (filteredImage != NULL)
1988 {
1989 DestroyImage(filteredImage);
1990 filteredImage = NULL;
1991 }
1992 }
1993 return filteredImage;
1994}
1995
1996
1997static Image* ComputeUnsharpMaskImageSection(const Image *inputImage, const ChannelType channel,const double radius,const double sigma,
1998 const double gain,const double threshold,ExceptionInfo *exception)
1999{
2000 MagickBooleanType outputReady = MagickFalse;
2001 Image* filteredImage = NULL;
2002 MagickCLEnv clEnv = NULL;
2003
2004 cl_int clStatus;
2005
2006 const void *inputPixels;
2007 void *filteredPixels;
2008 cl_mem_flags mem_flags;
2009
2010 KernelInfo *kernel = NULL;
2011 char geometry[MaxTextExtent];
2012
2013 cl_context context = NULL;
2014 cl_mem inputImageBuffer = NULL;
2015 cl_mem filteredImageBuffer = NULL;
2016 cl_mem tempImageBuffer = NULL;
2017 cl_mem imageKernelBuffer = NULL;
2018 cl_kernel blurRowKernel = NULL;
2019 cl_kernel unsharpMaskBlurColumnKernel = NULL;
2020 cl_command_queue queue = NULL;
2021
2022 void* hostPtr;
2023 float* kernelBufferPtr;
2024 MagickSizeType length;
2025 unsigned int kernelWidth;
2026 float fGain;
2027 float fThreshold;
2028 unsigned int imageColumns, imageRows;
2029 int chunkSize;
2030 unsigned int i;
2031
2032 clEnv = GetDefaultOpenCLEnv();
2033 context = GetOpenCLContext(clEnv);
2034 queue = AcquireOpenCLCommandQueue(clEnv);
2035
2036 /* Create and initialize OpenCL buffers. */
2037 {
2038 inputPixels = NULL;
2039 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2040 if (inputPixels == (const void *) NULL)
2041 {
2042 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
2043 goto cleanup;
2044 }
2045
2046 /* If the host pointer is aligned to the size of CLPixelPacket,
2047 then use the host buffer directly from the GPU; otherwise,
2048 create a buffer on the GPU and copy the data over */
2049 if (ALIGNED(inputPixels,CLPixelPacket))
2050 {
2051 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2052 }
2053 else
2054 {
2055 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2056 }
2057 /* create a CL buffer from image pixel buffer */
2058 length = inputImage->columns * inputImage->rows;
2059 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2060 if (clStatus != CL_SUCCESS)
2061 {
2062 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2063 goto cleanup;
2064 }
2065 }
2066
2067 /* create output */
2068 {
2069 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
2070 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00002071 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00002072 {
2073 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2074 goto cleanup;
2075 }
2076 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2077 if (filteredPixels == (void *) NULL)
2078 {
2079 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2080 goto cleanup;
2081 }
2082
2083 if (ALIGNED(filteredPixels,CLPixelPacket))
2084 {
2085 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2086 hostPtr = filteredPixels;
2087 }
2088 else
2089 {
2090 mem_flags = CL_MEM_WRITE_ONLY;
2091 hostPtr = NULL;
2092 }
2093
2094 /* create a CL buffer from image pixel buffer */
2095 length = inputImage->columns * inputImage->rows;
2096 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2097 if (clStatus != CL_SUCCESS)
2098 {
2099 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2100 goto cleanup;
2101 }
2102 }
2103
2104 /* create the blur kernel */
2105 {
2106 (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2107 kernel=AcquireKernelInfo(geometry);
2108 if (kernel == (KernelInfo *) NULL)
2109 {
2110 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
2111 goto cleanup;
2112 }
2113
2114 imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2115 if (clStatus != CL_SUCCESS)
2116 {
2117 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2118 goto cleanup;
2119 }
2120
2121
2122 kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2123 if (clStatus != CL_SUCCESS)
2124 {
2125 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
2126 goto cleanup;
2127 }
2128 for (i = 0; i < kernel->width; i++)
2129 {
2130 kernelBufferPtr[i] = (float) kernel->values[i];
2131 }
2132 clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2133 if (clStatus != CL_SUCCESS)
2134 {
2135 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
2136 goto cleanup;
2137 }
2138 }
2139
2140 {
2141 unsigned int offsetRows;
2142 unsigned int sec;
2143
2144 /* create temp buffer */
2145 {
2146 length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
2147 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
2148 if (clStatus != CL_SUCCESS)
2149 {
2150 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2151 goto cleanup;
2152 }
2153 }
2154
2155 /* get the opencl kernel */
2156 {
2157 blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
2158 if (blurRowKernel == NULL)
2159 {
2160 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2161 goto cleanup;
2162 };
2163
2164 unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
2165 if (unsharpMaskBlurColumnKernel == NULL)
2166 {
2167 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2168 goto cleanup;
2169 };
2170 }
2171
2172 for (sec = 0; sec < 2; sec++)
2173 {
2174 {
2175 chunkSize = 256;
2176
2177 imageColumns = inputImage->columns;
2178 if (sec == 0)
2179 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2180 else
2181 imageRows = (inputImage->rows - inputImage->rows / 2) + (kernel->width-1) / 2;
2182
2183 offsetRows = sec * inputImage->rows / 2;
2184
2185 kernelWidth = kernel->width;
2186
2187 /* set the kernel arguments */
2188 i = 0;
2189 clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2190 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2191 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
2192 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2193 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2194 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2195 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2196 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
2197 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2198 clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
2199 if (clStatus != CL_SUCCESS)
2200 {
2201 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2202 goto cleanup;
2203 }
2204 }
2205 /* launch the kernel */
2206 {
2207 size_t gsize[2];
2208 size_t wsize[2];
2209
2210 gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
2211 gsize[1] = imageRows;
2212 wsize[0] = chunkSize;
2213 wsize[1] = 1;
2214
2215 clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2216 if (clStatus != CL_SUCCESS)
2217 {
2218 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2219 goto cleanup;
2220 }
2221 clFlush(queue);
2222 }
2223
2224
2225 {
2226 chunkSize = 256;
2227
2228 imageColumns = inputImage->columns;
2229 if (sec == 0)
2230 imageRows = inputImage->rows / 2 + (kernel->width-1) / 2;
2231 else
2232 imageRows = (inputImage->rows - inputImage->rows / 2);
2233
2234 offsetRows = sec * inputImage->rows / 2;
2235
2236 kernelWidth = kernel->width;
2237
2238 fGain = (float)gain;
2239 fThreshold = (float)threshold;
2240
2241 i = 0;
2242 clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
2243 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2244 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2245 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2246 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2247 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
2248 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
2249 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
2250 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2251 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2252 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
2253 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
2254 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2255 clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
2256
2257 if (clStatus != CL_SUCCESS)
2258 {
2259 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2260 goto cleanup;
2261 }
2262 }
2263
2264 /* launch the kernel */
2265 {
2266 size_t gsize[2];
2267 size_t wsize[2];
2268
2269 gsize[0] = imageColumns;
2270 gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
2271 wsize[0] = 1;
2272 wsize[1] = chunkSize;
2273
2274 clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2275 if (clStatus != CL_SUCCESS)
2276 {
2277 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2278 goto cleanup;
2279 }
2280 clFlush(queue);
2281 }
2282 }
2283 }
2284
2285 /* get result */
2286 if (ALIGNED(filteredPixels,CLPixelPacket))
2287 {
2288 length = inputImage->columns * inputImage->rows;
2289 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2290 }
2291 else
2292 {
2293 length = inputImage->columns * inputImage->rows;
2294 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2295 }
2296 if (clStatus != CL_SUCCESS)
2297 {
2298 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
2299 goto cleanup;
2300 }
2301
2302 outputReady = MagickTrue;
2303
2304cleanup:
2305 if (kernel != NULL) kernel=DestroyKernelInfo(kernel);
2306 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2307 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2308 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2309 if (imageKernelBuffer!=NULL) clReleaseMemObject(imageKernelBuffer);
2310 if (blurRowKernel!=NULL) RelinquishOpenCLKernel(clEnv, blurRowKernel);
2311 if (unsharpMaskBlurColumnKernel!=NULL) RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
2312 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2313 if (outputReady == MagickFalse)
2314 {
2315 if (filteredImage != NULL)
2316 {
2317 DestroyImage(filteredImage);
2318 filteredImage = NULL;
2319 }
2320 }
2321 return filteredImage;
2322}
2323
2324
2325/*
2326%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2327% %
2328% %
2329% %
2330% U n s h a r p M a s k I m a g e w i t h O p e n C L %
2331% %
2332% %
2333% %
2334%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2335%
2336% UnsharpMaskImage() sharpens one or more image channels. We convolve the
2337% image with a Gaussian operator of the given radius and standard deviation
2338% (sigma). For reasonable results, radius should be larger than sigma. Use a
2339% radius of 0 and UnsharpMaskImage() selects a suitable radius for you.
2340%
2341% The format of the UnsharpMaskImage method is:
2342%
2343% Image *UnsharpMaskImage(const Image *image,const double radius,
2344% const double sigma,const double amount,const double threshold,
2345% ExceptionInfo *exception)
2346% Image *UnsharpMaskImageChannel(const Image *image,
2347% const ChannelType channel,const double radius,const double sigma,
2348% const double gain,const double threshold,ExceptionInfo *exception)
2349%
2350% A description of each parameter follows:
2351%
2352% o image: the image.
2353%
2354% o channel: the channel type.
2355%
2356% o radius: the radius of the Gaussian, in pixels, not counting the center
2357% pixel.
2358%
2359% o sigma: the standard deviation of the Gaussian, in pixels.
2360%
2361% o gain: the percentage of the difference between the original and the
2362% blur image that is added back into the original.
2363%
2364% o threshold: the threshold in pixels needed to apply the diffence gain.
2365%
2366% o exception: return any errors or warnings in this structure.
2367%
2368*/
2369
2370
2371MagickExport
2372Image* AccelerateUnsharpMaskImage(const Image *image, const ChannelType channel,const double radius,const double sigma,
2373 const double gain,const double threshold,ExceptionInfo *exception)
2374{
2375 MagickBooleanType status;
2376 Image* filteredImage;
2377
2378
2379 assert(image != NULL);
2380 assert(exception != NULL);
2381
2382 status = checkOpenCLEnvironment(exception);
2383 if (status == MagickFalse)
2384 return NULL;
2385
dirk5dcb7622013-12-01 10:43:43 +00002386 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00002387 if (status == MagickFalse)
2388 return NULL;
2389
2390 if (splitImage(image) && (image->rows / 2 > radius))
2391 filteredImage = ComputeUnsharpMaskImageSection(image,channel,radius,sigma,gain,threshold,exception);
2392 else
2393 filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
2394 OpenCLLogException(__FUNCTION__,__LINE__,exception);
2395 return filteredImage;
2396
2397}
2398
cristyf034abb2013-11-24 14:16:14 +00002399static MagickBooleanType resizeHorizontalFilter(cl_mem inputImage
2400 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2401 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2402 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float xFactor
2403 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2404{
2405 MagickBooleanType status = MagickFalse;
2406
2407 float scale, support;
2408 unsigned int i;
2409 cl_kernel horizontalKernel = NULL;
2410 cl_int clStatus;
2411 size_t global_work_size[2];
2412 size_t local_work_size[2];
2413 int resizeFilterType, resizeWindowType;
2414 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2415 size_t totalLocalMemorySize;
2416 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2417 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2418 size_t deviceLocalMemorySize;
2419 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2420
2421 const unsigned int workgroupSize = 256;
2422 unsigned int pixelPerWorkgroup;
2423 unsigned int chunkSize;
2424
2425 /*
2426 Apply filter to resize vertically from image to resize image.
2427 */
cristye85d0f72013-11-27 02:25:43 +00002428 scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002429 support=scale*GetResizeFilterSupport(resizeFilter);
2430 if (support < 0.5)
2431 {
2432 /*
2433 Support too small even for nearest neighbour: Reduce to point
2434 sampling.
2435 */
2436 support=(MagickRealType) 0.5;
2437 scale=1.0;
2438 }
2439 scale=PerceptibleReciprocal(scale);
2440
2441 if (resizedColumns < workgroupSize)
2442 {
2443 chunkSize = 32;
2444 pixelPerWorkgroup = 32;
2445 }
2446 else
2447 {
2448 chunkSize = workgroupSize;
2449 pixelPerWorkgroup = workgroupSize;
2450 }
2451
2452 /* get the local memory size supported by the device */
2453 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2454
dirk5dcb7622013-12-01 10:43:43 +00002455DisableMSCWarning(4127)
cristyf034abb2013-11-24 14:16:14 +00002456 while(1)
dirk5dcb7622013-12-01 10:43:43 +00002457RestoreMSCWarning
cristyf034abb2013-11-24 14:16:14 +00002458 {
2459 /* calculate the local memory size needed per workgroup */
2460 cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
2461 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
2462 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2463 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2464 totalLocalMemorySize = imageCacheLocalMemorySize;
2465
2466 /* local size for the pixel accumulator */
2467 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2468 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2469
2470 /* local memory size for the weight accumulator */
2471 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2472 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2473
2474 /* local memory size for the gamma accumulator */
2475 if (matte == 0)
2476 gammaAccumulatorLocalMemorySize = sizeof(float);
2477 else
2478 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2479 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2480
2481 if (totalLocalMemorySize <= deviceLocalMemorySize)
2482 break;
2483 else
2484 {
2485 pixelPerWorkgroup = pixelPerWorkgroup/2;
2486 chunkSize = chunkSize/2;
2487 if (pixelPerWorkgroup == 0
2488 || chunkSize == 0)
2489 {
2490 /* quit, fallback to CPU */
2491 goto cleanup;
2492 }
2493 }
2494 }
2495
2496 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2497 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2498
2499
2500 if (resizeFilterType == SincFastWeightingFunction
2501 && resizeWindowType == SincFastWeightingFunction)
2502 {
2503 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilterSinc");
2504 }
2505 else
2506 {
2507 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
2508 }
2509 if (horizontalKernel == NULL)
2510 {
2511 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2512 goto cleanup;
2513 }
2514
2515 i = 0;
2516 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2517 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2518 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2519 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2520 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
2521 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2522
2523 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2524 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2525
2526 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2527 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2528 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2529
2530 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2531 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2532
2533 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2534 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2535
2536 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2537 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2538
2539 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2540 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2541
2542
2543 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2544 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2545 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2546 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2547
2548
2549 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2550 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2551 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2552
2553 if (clStatus != CL_SUCCESS)
2554 {
2555 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2556 goto cleanup;
2557 }
2558
2559 global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2560 global_work_size[1] = resizedRows;
2561
2562 local_work_size[0] = workgroupSize;
2563 local_work_size[1] = 1;
2564 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2565 if (clStatus != CL_SUCCESS)
2566 {
2567 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2568 goto cleanup;
2569 }
2570 clFlush(queue);
2571 status = MagickTrue;
2572
2573
2574cleanup:
2575 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2576
2577 return status;
2578}
2579
2580
2581static MagickBooleanType resizeVerticalFilter(cl_mem inputImage
2582 , const unsigned int inputImageColumns, const unsigned int inputImageRows, const unsigned int matte
2583 , cl_mem resizedImage, const unsigned int resizedColumns, const unsigned int resizedRows
2584 , const ResizeFilter* resizeFilter, cl_mem resizeFilterCubicCoefficients, const float yFactor
2585 , MagickCLEnv clEnv, cl_command_queue queue, ExceptionInfo *exception)
2586{
2587 MagickBooleanType status = MagickFalse;
2588
2589 float scale, support;
2590 unsigned int i;
2591 cl_kernel horizontalKernel = NULL;
2592 cl_int clStatus;
2593 size_t global_work_size[2];
2594 size_t local_work_size[2];
2595 int resizeFilterType, resizeWindowType;
2596 float resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur;
2597 size_t totalLocalMemorySize;
2598 size_t imageCacheLocalMemorySize, pixelAccumulatorLocalMemorySize
2599 , weightAccumulatorLocalMemorySize, gammaAccumulatorLocalMemorySize;
2600 size_t deviceLocalMemorySize;
2601 int cacheRangeStart, cacheRangeEnd, numCachedPixels;
2602
2603 const unsigned int workgroupSize = 256;
2604 unsigned int pixelPerWorkgroup;
2605 unsigned int chunkSize;
2606
2607 /*
2608 Apply filter to resize vertically from image to resize image.
2609 */
cristye85d0f72013-11-27 02:25:43 +00002610 scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
cristyf034abb2013-11-24 14:16:14 +00002611 support=scale*GetResizeFilterSupport(resizeFilter);
2612 if (support < 0.5)
2613 {
2614 /*
2615 Support too small even for nearest neighbour: Reduce to point
2616 sampling.
2617 */
2618 support=(MagickRealType) 0.5;
2619 scale=1.0;
2620 }
2621 scale=PerceptibleReciprocal(scale);
2622
2623 if (resizedRows < workgroupSize)
2624 {
2625 chunkSize = 32;
2626 pixelPerWorkgroup = 32;
2627 }
2628 else
2629 {
2630 chunkSize = workgroupSize;
2631 pixelPerWorkgroup = workgroupSize;
2632 }
2633
2634 /* get the local memory size supported by the device */
2635 deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2636
dirk5dcb7622013-12-01 10:43:43 +00002637DisableMSCWarning(4127)
cristyf034abb2013-11-24 14:16:14 +00002638 while(1)
dirk5dcb7622013-12-01 10:43:43 +00002639RestoreMSCWarning
cristyf034abb2013-11-24 14:16:14 +00002640 {
2641 /* calculate the local memory size needed per workgroup */
2642 cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
2643 cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
2644 numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
2645 imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
2646 totalLocalMemorySize = imageCacheLocalMemorySize;
2647
2648 /* local size for the pixel accumulator */
2649 pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
2650 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
2651
2652 /* local memory size for the weight accumulator */
2653 weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2654 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
2655
2656 /* local memory size for the gamma accumulator */
2657 if (matte == 0)
2658 gammaAccumulatorLocalMemorySize = sizeof(float);
2659 else
2660 gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
2661 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
2662
2663 if (totalLocalMemorySize <= deviceLocalMemorySize)
2664 break;
2665 else
2666 {
2667 pixelPerWorkgroup = pixelPerWorkgroup/2;
2668 chunkSize = chunkSize/2;
2669 if (pixelPerWorkgroup == 0
2670 || chunkSize == 0)
2671 {
2672 /* quit, fallback to CPU */
2673 goto cleanup;
2674 }
2675 }
2676 }
2677
2678 resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
2679 resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
2680
2681 if (resizeFilterType == SincFastWeightingFunction
2682 && resizeWindowType == SincFastWeightingFunction)
2683 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilterSinc");
2684 else
2685 horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
2686
2687 if (horizontalKernel == NULL)
2688 {
2689 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
2690 goto cleanup;
2691 }
2692
2693 i = 0;
2694 clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
2695 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
2696 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
2697 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
2698 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
2699 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
2700
2701 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
2702 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
2703
2704 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
2705 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
2706 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
2707
2708 resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
2709 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
2710
2711 resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
2712 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
2713
2714 resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
2715 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
2716
2717 resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
2718 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
2719
2720
2721 clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
2722 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
2723 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
2724 clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
2725
2726
2727 clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
2728 clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
2729 clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
2730
2731 if (clStatus != CL_SUCCESS)
2732 {
2733 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
2734 goto cleanup;
2735 }
2736
2737 global_work_size[0] = resizedColumns;
2738 global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
2739
2740 local_work_size[0] = 1;
2741 local_work_size[1] = workgroupSize;
2742 clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
2743 if (clStatus != CL_SUCCESS)
2744 {
2745 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
2746 goto cleanup;
2747 }
2748 clFlush(queue);
2749 status = MagickTrue;
2750
2751
2752cleanup:
2753 if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
2754
2755 return status;
2756}
2757
2758
2759
2760static Image* ComputeResizeImage(const Image* inputImage, const size_t resizedColumns, const size_t resizedRows
2761 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
2762{
2763
2764 MagickBooleanType outputReady = MagickFalse;
2765 Image* filteredImage = NULL;
2766 MagickCLEnv clEnv = NULL;
2767
2768 cl_int clStatus;
2769 MagickBooleanType status;
2770 const void *inputPixels;
2771 void* filteredPixels;
2772 void* hostPtr;
2773 const MagickRealType* resizeFilterCoefficient;
2774 float* mappedCoefficientBuffer;
2775 float xFactor, yFactor;
2776 MagickSizeType length;
2777
2778 cl_mem_flags mem_flags;
2779 cl_context context = NULL;
2780 cl_mem inputImageBuffer = NULL;
2781 cl_mem tempImageBuffer = NULL;
2782 cl_mem filteredImageBuffer = NULL;
2783 cl_mem cubicCoefficientsBuffer = NULL;
2784 cl_command_queue queue = NULL;
2785
2786 unsigned int i;
2787
2788 clEnv = GetDefaultOpenCLEnv();
2789 context = GetOpenCLContext(clEnv);
2790
2791 /* Create and initialize OpenCL buffers. */
2792 inputPixels = NULL;
2793 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
2794 if (inputPixels == (const void *) NULL)
2795 {
2796 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
2797 goto cleanup;
2798 }
2799
2800 /* If the host pointer is aligned to the size of CLPixelPacket,
2801 then use the host buffer directly from the GPU; otherwise,
2802 create a buffer on the GPU and copy the data over */
2803 if (ALIGNED(inputPixels,CLPixelPacket))
2804 {
2805 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2806 }
2807 else
2808 {
2809 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2810 }
2811 /* create a CL buffer from image pixel buffer */
2812 length = inputImage->columns * inputImage->rows;
2813 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2814 if (clStatus != CL_SUCCESS)
2815 {
2816 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2817 goto cleanup;
2818 }
2819
2820 cubicCoefficientsBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
2821 if (clStatus != CL_SUCCESS)
2822 {
2823 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2824 goto cleanup;
2825 }
2826 queue = AcquireOpenCLCommandQueue(clEnv);
2827 mappedCoefficientBuffer = (float*)clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
2828 , 0, NULL, NULL, &clStatus);
2829 if (clStatus != CL_SUCCESS)
2830 {
2831 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
2832 goto cleanup;
2833 }
2834 resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
2835 for (i = 0; i < 7; i++)
2836 {
2837 mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
2838 }
2839 clStatus = clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
2840 if (clStatus != CL_SUCCESS)
2841 {
2842 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
2843 goto cleanup;
2844 }
2845
2846 filteredImage = CloneImage(inputImage,resizedColumns,resizedRows,MagickTrue,exception);
2847 if (filteredImage == NULL)
2848 goto cleanup;
2849
dirke19d0cc2013-12-01 10:07:42 +00002850 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00002851 {
2852 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2853 goto cleanup;
2854 }
2855 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
2856 if (filteredPixels == (void *) NULL)
2857 {
2858 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2859 goto cleanup;
2860 }
2861
2862 if (ALIGNED(filteredPixels,CLPixelPacket))
2863 {
2864 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2865 hostPtr = filteredPixels;
2866 }
2867 else
2868 {
2869 mem_flags = CL_MEM_WRITE_ONLY;
2870 hostPtr = NULL;
2871 }
2872
2873 /* create a CL buffer from image pixel buffer */
2874 length = filteredImage->columns * filteredImage->rows;
2875 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2876 if (clStatus != CL_SUCCESS)
2877 {
2878 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2879 goto cleanup;
2880 }
2881
2882 xFactor=(float) resizedColumns/(float) inputImage->columns;
2883 yFactor=(float) resizedRows/(float) inputImage->rows;
2884 if (xFactor > yFactor)
2885 {
2886
2887 length = resizedColumns*inputImage->rows;
2888 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2889 if (clStatus != CL_SUCCESS)
2890 {
2891 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2892 goto cleanup;
2893 }
2894
dirke19d0cc2013-12-01 10:07:42 +00002895 status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002896 , tempImageBuffer, resizedColumns, inputImage->rows
2897 , resizeFilter, cubicCoefficientsBuffer
2898 , xFactor, clEnv, queue, exception);
2899 if (status != MagickTrue)
2900 goto cleanup;
2901
dirke19d0cc2013-12-01 10:07:42 +00002902 status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002903 , filteredImageBuffer, resizedColumns, resizedRows
2904 , resizeFilter, cubicCoefficientsBuffer
2905 , yFactor, clEnv, queue, exception);
2906 if (status != MagickTrue)
2907 goto cleanup;
2908 }
2909 else
2910 {
2911 length = inputImage->columns*resizedRows;
2912 tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
2913 if (clStatus != CL_SUCCESS)
2914 {
2915 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
2916 goto cleanup;
2917 }
2918
dirke19d0cc2013-12-01 10:07:42 +00002919 status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002920 , tempImageBuffer, inputImage->columns, resizedRows
2921 , resizeFilter, cubicCoefficientsBuffer
2922 , yFactor, clEnv, queue, exception);
2923 if (status != MagickTrue)
2924 goto cleanup;
2925
dirke19d0cc2013-12-01 10:07:42 +00002926 status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
cristyf034abb2013-11-24 14:16:14 +00002927 , filteredImageBuffer, resizedColumns, resizedRows
2928 , resizeFilter, cubicCoefficientsBuffer
2929 , xFactor, clEnv, queue, exception);
2930 if (status != MagickTrue)
2931 goto cleanup;
2932 }
2933 length = resizedColumns*resizedRows;
2934 if (ALIGNED(filteredPixels,CLPixelPacket))
2935 {
2936 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2937 }
2938 else
2939 {
2940 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2941 }
2942 if (clStatus != CL_SUCCESS)
2943 {
2944 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
2945 goto cleanup;
2946 }
2947 outputReady = MagickTrue;
2948
2949cleanup:
2950 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
2951 if (tempImageBuffer!=NULL) clReleaseMemObject(tempImageBuffer);
2952 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
2953 if (cubicCoefficientsBuffer!=NULL) clReleaseMemObject(cubicCoefficientsBuffer);
2954 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
2955 if (outputReady == MagickFalse)
2956 {
2957 if (filteredImage != NULL)
2958 {
2959 DestroyImage(filteredImage);
2960 filteredImage = NULL;
2961 }
2962 }
2963
2964 return filteredImage;
2965}
2966
2967const ResizeWeightingFunctionType supportedResizeWeighting[] =
2968{
2969 BoxWeightingFunction
2970 ,TriangleWeightingFunction
2971 ,HanningWeightingFunction
2972 ,HammingWeightingFunction
2973 ,BlackmanWeightingFunction
2974 ,CubicBCWeightingFunction
2975 ,SincWeightingFunction
2976 ,SincFastWeightingFunction
2977 ,LastWeightingFunction
2978};
2979
2980static MagickBooleanType gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)
2981{
2982 MagickBooleanType supported = MagickFalse;
2983 unsigned int i;
2984 for (i = 0; ;i++)
2985 {
2986 if (supportedResizeWeighting[i] == LastWeightingFunction)
2987 break;
2988 if (supportedResizeWeighting[i] == f)
2989 {
2990 supported = MagickTrue;
2991 break;
2992 }
2993 }
2994 return supported;
2995}
2996
2997
2998/*
2999%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3000% %
3001% %
3002% %
3003% A c c e l e r a t e R e s i z e I m a g e %
3004% %
3005% %
3006% %
3007%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3008%
3009% AccelerateResizeImage() is an OpenCL implementation of ResizeImage()
3010%
3011% AccelerateResizeImage() scales an image to the desired dimensions, using the given
3012% filter (see AcquireFilterInfo()).
3013%
3014% If an undefined filter is given the filter defaults to Mitchell for a
3015% colormapped image, a image with a matte channel, or if the image is
3016% enlarged. Otherwise the filter defaults to a Lanczos.
3017%
3018% AccelerateResizeImage() was inspired by Paul Heckbert's "zoom" program.
3019%
3020% The format of the AccelerateResizeImage method is:
3021%
3022% Image *ResizeImage(Image *image,const size_t columns,
3023% const size_t rows, const ResizeFilter* filter,
cristy3f6d1482010-01-20 21:01:21 +00003024% ExceptionInfo *exception)
3025%
3026% A description of each parameter follows:
3027%
3028% o image: the image.
3029%
cristyf034abb2013-11-24 14:16:14 +00003030% o columns: the number of columns in the scaled image.
cristy3f6d1482010-01-20 21:01:21 +00003031%
cristyf034abb2013-11-24 14:16:14 +00003032% o rows: the number of rows in the scaled image.
3033%
3034% o filter: Image filter to use.
cristy3f6d1482010-01-20 21:01:21 +00003035%
3036% o exception: return any errors or warnings in this structure.
3037%
3038*/
cristyd43a46b2010-01-21 02:13:41 +00003039
cristyf034abb2013-11-24 14:16:14 +00003040MagickExport
3041Image* AccelerateResizeImage(const Image* image, const size_t resizedColumns, const size_t resizedRows
3042 , const ResizeFilter* resizeFilter, ExceptionInfo *exception)
cristyd43a46b2010-01-21 02:13:41 +00003043{
cristyf034abb2013-11-24 14:16:14 +00003044 MagickBooleanType status;
3045 Image* filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003046
cristyf034abb2013-11-24 14:16:14 +00003047 assert(image != NULL);
3048 assert(resizeFilter != NULL);
cristyd43a46b2010-01-21 02:13:41 +00003049
cristyf034abb2013-11-24 14:16:14 +00003050 status = checkOpenCLEnvironment(exception);
3051 if (status == MagickFalse)
3052 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003053
dirk5dcb7622013-12-01 10:43:43 +00003054 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003055 if (status == MagickFalse)
3056 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003057
cristyf034abb2013-11-24 14:16:14 +00003058 if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse
3059 || gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
3060 return NULL;
cristyd43a46b2010-01-21 02:13:41 +00003061
cristyf034abb2013-11-24 14:16:14 +00003062 filteredImage = ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
3063 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3064 return filteredImage;
cristyd43a46b2010-01-21 02:13:41 +00003065
cristyd43a46b2010-01-21 02:13:41 +00003066}
3067
cristyd43a46b2010-01-21 02:13:41 +00003068
cristyf034abb2013-11-24 14:16:14 +00003069static MagickBooleanType ComputeContrastImage(Image *inputImage, const MagickBooleanType sharpen, ExceptionInfo *exception)
3070{
3071 MagickBooleanType outputReady = MagickFalse;
3072 MagickCLEnv clEnv = NULL;
3073
3074 cl_int clStatus;
3075 size_t global_work_size[2];
3076
3077 void *inputPixels = NULL;
3078 MagickSizeType length;
3079 unsigned int uSharpen;
3080 unsigned int i;
3081
3082 cl_mem_flags mem_flags;
3083 cl_context context = NULL;
3084 cl_mem inputImageBuffer = NULL;
3085 cl_kernel filterKernel = NULL;
3086 cl_command_queue queue = NULL;
3087
3088 clEnv = GetDefaultOpenCLEnv();
3089 context = GetOpenCLContext(clEnv);
3090
3091 /* Create and initialize OpenCL buffers. */
3092 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3093 if (inputPixels == (void *) NULL)
3094 {
3095 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3096 goto cleanup;
3097 }
3098
3099 /* If the host pointer is aligned to the size of CLPixelPacket,
3100 then use the host buffer directly from the GPU; otherwise,
3101 create a buffer on the GPU and copy the data over */
3102 if (ALIGNED(inputPixels,CLPixelPacket))
3103 {
3104 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3105 }
3106 else
3107 {
3108 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3109 }
3110 /* create a CL buffer from image pixel buffer */
3111 length = inputImage->columns * inputImage->rows;
3112 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3113 if (clStatus != CL_SUCCESS)
3114 {
3115 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3116 goto cleanup;
3117 }
3118
3119 filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
3120 if (filterKernel == NULL)
3121 {
3122 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3123 goto cleanup;
3124 }
3125
3126 i = 0;
3127 clStatus=clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3128
3129 uSharpen = (sharpen == MagickFalse)?0:1;
3130 clStatus|=clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
3131 if (clStatus != CL_SUCCESS)
3132 {
3133 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3134 goto cleanup;
3135 }
3136
3137 global_work_size[0] = inputImage->columns;
3138 global_work_size[1] = inputImage->rows;
3139 /* launch the kernel */
3140 queue = AcquireOpenCLCommandQueue(clEnv);
3141 clStatus = clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3142 if (clStatus != CL_SUCCESS)
3143 {
3144 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3145 goto cleanup;
3146 }
3147 clFlush(queue);
3148
3149 if (ALIGNED(inputPixels,CLPixelPacket))
3150 {
3151 length = inputImage->columns * inputImage->rows;
3152 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3153 }
3154 else
3155 {
3156 length = inputImage->columns * inputImage->rows;
3157 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3158 }
3159 if (clStatus != CL_SUCCESS)
3160 {
3161 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3162 goto cleanup;
3163 }
3164 outputReady = MagickTrue;
3165
3166cleanup:
3167
3168 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
3169 if (filterKernel!=NULL) RelinquishOpenCLKernel(clEnv, filterKernel);
3170 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
3171 return outputReady;
3172}
3173
3174/*
3175%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3176% %
3177% %
3178% %
3179% C o n t r a s t I m a g e w i t h O p e n C L %
3180% %
3181% %
3182% %
3183%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3184%
3185% ContrastImage() enhances the intensity differences between the lighter and
3186% darker elements of the image. Set sharpen to a MagickTrue to increase the
3187% image contrast otherwise the contrast is reduced.
3188%
3189% The format of the ContrastImage method is:
3190%
3191% MagickBooleanType ContrastImage(Image *image,
3192% const MagickBooleanType sharpen)
3193%
3194% A description of each parameter follows:
3195%
3196% o image: the image.
3197%
3198% o sharpen: Increase or decrease image contrast.
3199%
3200*/
3201
3202MagickExport
3203MagickBooleanType AccelerateContrastImage(Image* image, const MagickBooleanType sharpen, ExceptionInfo* exception)
3204{
3205 MagickBooleanType status;
3206
3207 assert(image != NULL);
3208 assert(exception != NULL);
3209
3210 status = checkOpenCLEnvironment(exception);
3211 if (status == MagickFalse)
3212 return MagickFalse;
3213
dirk5dcb7622013-12-01 10:43:43 +00003214 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003215 if (status == MagickFalse)
3216 return MagickFalse;
3217
3218 status = ComputeContrastImage(image,sharpen,exception);
3219 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3220 return status;
3221}
3222
3223
3224
3225MagickBooleanType ComputeModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3226{
3227 register ssize_t
cristyd43a46b2010-01-21 02:13:41 +00003228 i;
3229
cristyf034abb2013-11-24 14:16:14 +00003230 cl_float
3231 bright,
3232 hue,
3233 saturation;
3234
3235 cl_int color;
3236
3237 MagickBooleanType outputReady;
3238
3239 MagickCLEnv clEnv;
3240
3241 void *inputPixels;
3242
3243 MagickSizeType length;
3244
3245 cl_context context;
3246 cl_command_queue queue;
3247 cl_kernel modulateKernel;
3248
3249 cl_mem inputImageBuffer;
3250 cl_mem_flags mem_flags;
3251
3252 cl_int clStatus;
3253
3254 Image * inputImage = image;
3255
3256 inputImageBuffer = NULL;
3257 modulateKernel = NULL;
3258
3259 assert(inputImage != (Image *) NULL);
3260 assert(inputImage->signature == MagickSignature);
3261 if (inputImage->debug != MagickFalse)
3262 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
cristyd43a46b2010-01-21 02:13:41 +00003263
3264 /*
cristyf034abb2013-11-24 14:16:14 +00003265 * initialize opencl env
3266 */
3267 clEnv = GetDefaultOpenCLEnv();
3268 context = GetOpenCLContext(clEnv);
3269 queue = AcquireOpenCLCommandQueue(clEnv);
cristyd43a46b2010-01-21 02:13:41 +00003270
cristyf034abb2013-11-24 14:16:14 +00003271 outputReady = MagickFalse;
cristyd43a46b2010-01-21 02:13:41 +00003272
cristyf034abb2013-11-24 14:16:14 +00003273 /* Create and initialize OpenCL buffers.
3274 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3275 assume this will get a writable image
3276 */
3277 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3278 if (inputPixels == (void *) NULL)
cristyd43a46b2010-01-21 02:13:41 +00003279 {
cristyf034abb2013-11-24 14:16:14 +00003280 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3281 goto cleanup;
cristyd43a46b2010-01-21 02:13:41 +00003282 }
cristyf034abb2013-11-24 14:16:14 +00003283
3284 /* If the host pointer is aligned to the size of CLPixelPacket,
3285 then use the host buffer directly from the GPU; otherwise,
3286 create a buffer on the GPU and copy the data over
3287 */
3288 if (ALIGNED(inputPixels,CLPixelPacket))
3289 {
3290 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3291 }
3292 else
3293 {
3294 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3295 }
3296 /* create a CL buffer from image pixel buffer */
3297 length = inputImage->columns * inputImage->rows;
3298 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3299 if (clStatus != CL_SUCCESS)
3300 {
3301 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3302 goto cleanup;
3303 }
3304
3305 modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
3306 if (modulateKernel == NULL)
3307 {
3308 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3309 goto cleanup;
3310 }
3311
3312 bright=percent_brightness;
3313 hue=percent_hue;
3314 saturation=percent_saturation;
3315 color=colorspace;
3316
3317 i = 0;
3318 clStatus=clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3319 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3320 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3321 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3322 clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3323 if (clStatus != CL_SUCCESS)
3324 {
3325 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3326 printf("no kernel\n");
3327 goto cleanup;
3328 }
3329
3330 {
3331 size_t global_work_size[2];
3332 global_work_size[0] = inputImage->columns;
3333 global_work_size[1] = inputImage->rows;
3334 /* launch the kernel */
3335 clStatus = clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3336 if (clStatus != CL_SUCCESS)
3337 {
3338 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3339 goto cleanup;
3340 }
3341 clFlush(queue);
3342 }
3343
3344 if (ALIGNED(inputPixels,CLPixelPacket))
3345 {
3346 length = inputImage->columns * inputImage->rows;
3347 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3348 }
3349 else
3350 {
3351 length = inputImage->columns * inputImage->rows;
3352 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3353 }
3354 if (clStatus != CL_SUCCESS)
3355 {
3356 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3357 goto cleanup;
3358 }
3359
3360 outputReady = MagickTrue;
3361
3362cleanup:
3363
3364 if (inputPixels) {
3365 //ReleasePixelCachePixels();
3366 inputPixels = NULL;
3367 }
3368
3369 if (inputImageBuffer!=NULL)
3370 clReleaseMemObject(inputImageBuffer);
3371 if (modulateKernel!=NULL)
3372 RelinquishOpenCLKernel(clEnv, modulateKernel);
3373 if (queue != NULL)
3374 RelinquishOpenCLCommandQueue(clEnv, queue);
3375
3376 return outputReady;
3377
cristy3f6d1482010-01-20 21:01:21 +00003378}
cristyf034abb2013-11-24 14:16:14 +00003379
3380/*
3381%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3382% %
3383% %
3384% %
3385% M o d u l a t e I m a g e w i t h O p e n C L %
3386% %
3387% %
3388% %
3389%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3390%
3391% ModulateImage() lets you control the brightness, saturation, and hue
3392% of an image. Modulate represents the brightness, saturation, and hue
3393% as one parameter (e.g. 90,150,100). If the image colorspace is HSL, the
3394% modulation is lightness, saturation, and hue. For HWB, use blackness,
3395% whiteness, and hue. And for HCL, use chrome, luma, and hue.
3396%
3397% The format of the ModulateImage method is:
3398%
3399% MagickBooleanType ModulateImage(Image *image,const char *modulate)
3400%
3401% A description of each parameter follows:
3402%
3403% o image: the image.
3404%
3405% o percent_*: Define the percent change in brightness, saturation, and
3406% hue.
3407%
3408*/
3409
3410MagickExport
3411MagickBooleanType AccelerateModulateImage(Image* image, double percent_brightness, double percent_hue, double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
3412{
3413 MagickBooleanType status;
3414
3415 assert(image != NULL);
3416 assert(exception != NULL);
3417
3418 status = checkOpenCLEnvironment(exception);
3419 if (status == MagickFalse)
3420 return MagickFalse;
3421
dirk5dcb7622013-12-01 10:43:43 +00003422 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00003423 if (status == MagickFalse)
3424 return MagickFalse;
3425
3426 if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
3427 return MagickFalse;
3428
3429
3430 status = ComputeModulateImage(image,percent_brightness, percent_hue, percent_saturation, colorspace, exception);
3431 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3432 return status;
3433}
3434
3435
3436MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception)
3437{
3438#define EqualizeImageTag "Equalize/Image"
3439
3440 ExceptionInfo
3441 *exception=_exception;
3442
3443 FloatPixelPacket
3444 white,
3445 black,
3446 intensity,
3447 *map;
3448
3449 cl_uint4
3450 *histogram;
3451
3452 PixelPacket
3453 *equalize_map;
3454
3455 register ssize_t
3456 i;
3457
3458 Image * image = inputImage;
3459
3460 MagickBooleanType outputReady;
3461 MagickCLEnv clEnv;
3462
3463 cl_int clStatus;
3464 size_t global_work_size[2];
3465
3466 void *inputPixels;
3467 cl_mem_flags mem_flags;
3468
3469 cl_context context;
3470 cl_mem inputImageBuffer;
3471 cl_mem histogramBuffer;
3472 cl_mem equalizeMapBuffer;
3473 cl_kernel histogramKernel;
3474 cl_kernel equalizeKernel;
3475 cl_command_queue queue;
3476 cl_int colorspace;
3477
3478 void* hostPtr;
3479
3480 MagickSizeType length;
3481
3482 inputPixels = NULL;
3483 inputImageBuffer = NULL;
3484 histogramBuffer = NULL;
3485 histogramKernel = NULL;
3486 equalizeKernel = NULL;
3487 context = NULL;
3488 queue = NULL;
3489 outputReady = MagickFalse;
3490
3491 assert(inputImage != (Image *) NULL);
3492 assert(inputImage->signature == MagickSignature);
3493 if (inputImage->debug != MagickFalse)
3494 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
3495
3496 /*
3497 Allocate and initialize histogram arrays.
3498 */
3499 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
3500 if (histogram == (cl_uint4 *) NULL)
3501 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3502
3503 /* reset histogram */
3504 (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
3505
3506 /*
3507 * initialize opencl env
3508 */
3509 clEnv = GetDefaultOpenCLEnv();
3510 context = GetOpenCLContext(clEnv);
3511 queue = AcquireOpenCLCommandQueue(clEnv);
3512
3513 /* Create and initialize OpenCL buffers. */
3514 /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */
3515 /* assume this will get a writable image */
3516 inputPixels = GetPixelCachePixels(inputImage, &length, exception);
3517
3518 if (inputPixels == (void *) NULL)
3519 {
3520 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3521 goto cleanup;
3522 }
3523 /* If the host pointer is aligned to the size of CLPixelPacket,
3524 then use the host buffer directly from the GPU; otherwise,
3525 create a buffer on the GPU and copy the data over */
3526 if (ALIGNED(inputPixels,CLPixelPacket))
3527 {
3528 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3529 }
3530 else
3531 {
3532 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3533 }
3534 /* create a CL buffer from image pixel buffer */
3535 length = inputImage->columns * inputImage->rows;
3536 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3537 if (clStatus != CL_SUCCESS)
3538 {
3539 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3540 goto cleanup;
3541 }
3542
3543 /* If the host pointer is aligned to the size of cl_uint,
3544 then use the host buffer directly from the GPU; otherwise,
3545 create a buffer on the GPU and copy the data over */
3546 if (ALIGNED(histogram,cl_uint4))
3547 {
3548 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3549 hostPtr = histogram;
3550 }
3551 else
3552 {
3553 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3554 hostPtr = histogram;
3555 }
3556 /* create a CL buffer for histogram */
3557 length = (MaxMap+1);
3558 histogramBuffer = clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
3559 if (clStatus != CL_SUCCESS)
3560 {
3561 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3562 goto cleanup;
3563 }
3564
3565 switch (inputImage->colorspace)
3566 {
3567 case RGBColorspace:
3568 colorspace = 1;
3569 break;
3570 case sRGBColorspace:
3571 colorspace = 0;
3572 break;
3573 default:
3574 {
3575 /* something is wrong, as we checked in checkAccelerateCondition */
3576 }
3577 }
3578
3579 /* get the OpenCL kernel */
3580 histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
3581 if (histogramKernel == NULL)
3582 {
3583 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3584 goto cleanup;
3585 }
3586
3587 /* set the kernel arguments */
3588 i = 0;
3589 clStatus=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3590 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
3591 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
3592 clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
3593 if (clStatus != CL_SUCCESS)
3594 {
3595 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3596 goto cleanup;
3597 }
3598
3599 /* launch the kernel */
3600 global_work_size[0] = inputImage->columns;
3601 global_work_size[1] = inputImage->rows;
3602
3603 clStatus = clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3604
3605 if (clStatus != CL_SUCCESS)
3606 {
3607 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3608 goto cleanup;
3609 }
3610 clFlush(queue);
3611
3612 /* read from the kenel output */
3613 if (ALIGNED(histogram,cl_uint4))
3614 {
3615 length = (MaxMap+1);
3616 clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
3617 }
3618 else
3619 {
3620 length = (MaxMap+1);
3621 clStatus = clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
3622 }
3623 if (clStatus != CL_SUCCESS)
3624 {
3625 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3626 goto cleanup;
3627 }
3628
3629 /* unmap, don't block gpu to use this buffer again. */
3630 if (ALIGNED(histogram,cl_uint4))
3631 {
3632 clStatus = clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
3633 if (clStatus != CL_SUCCESS)
3634 {
3635 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
3636 goto cleanup;
3637 }
3638 }
3639
3640 if (getenv("TEST")) {
3641 unsigned int i;
3642 for (i=0; i<(MaxMap+1UL); i++)
3643 {
3644 printf("histogram %d: red %d\n", i, histogram[i].s[2]);
3645 printf("histogram %d: green %d\n", i, histogram[i].s[1]);
3646 printf("histogram %d: blue %d\n", i, histogram[i].s[0]);
3647 printf("histogram %d: opacity %d\n", i, histogram[i].s[3]);
3648 }
3649 }
3650
3651 /* cpu stuff */
3652 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
3653 if (equalize_map == (PixelPacket *) NULL)
3654 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3655
3656 map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
3657 if (map == (FloatPixelPacket *) NULL)
3658 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3659
3660 /*
3661 Integrate the histogram to get the equalization map.
3662 */
3663 (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
3664 for (i=0; i <= (ssize_t) MaxMap; i++)
3665 {
3666 if ((channel & SyncChannels) != 0)
3667 {
3668 intensity.red+=histogram[i].s[2];
3669 map[i]=intensity;
3670 continue;
3671 }
3672 if ((channel & RedChannel) != 0)
3673 intensity.red+=histogram[i].s[2];
3674 if ((channel & GreenChannel) != 0)
3675 intensity.green+=histogram[i].s[1];
3676 if ((channel & BlueChannel) != 0)
3677 intensity.blue+=histogram[i].s[0];
3678 if ((channel & OpacityChannel) != 0)
dirke19d0cc2013-12-01 10:07:42 +00003679 intensity.alpha+=histogram[i].s[3];
cristyf034abb2013-11-24 14:16:14 +00003680 if (((channel & IndexChannel) != 0) &&
3681 (image->colorspace == CMYKColorspace))
3682 {
3683 printf("something here\n");
3684 /*intensity.index+=histogram[i].index; */
3685 }
3686 map[i]=intensity;
3687 }
3688 black=map[0];
3689 white=map[(int) MaxMap];
3690 (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
3691 for (i=0; i <= (ssize_t) MaxMap; i++)
3692 {
3693 if ((channel & SyncChannels) != 0)
3694 {
3695 if (white.red != black.red)
3696 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3697 (map[i].red-black.red))/(white.red-black.red)));
3698 continue;
3699 }
3700 if (((channel & RedChannel) != 0) && (white.red != black.red))
3701 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3702 (map[i].red-black.red))/(white.red-black.red)));
3703 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3704 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3705 (map[i].green-black.green))/(white.green-black.green)));
3706 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3707 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3708 (map[i].blue-black.blue))/(white.blue-black.blue)));
dirke19d0cc2013-12-01 10:07:42 +00003709 if (((channel & OpacityChannel) != 0) && (white.alpha != black.alpha))
3710 equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3711 (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
cristyf034abb2013-11-24 14:16:14 +00003712 /*
3713 if ((((channel & IndexChannel) != 0) &&
3714 (image->colorspace == CMYKColorspace)) &&
3715 (white.index != black.index))
3716 equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3717 (map[i].index-black.index))/(white.index-black.index)));
3718 */
3719 }
3720
3721 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
3722 map=(FloatPixelPacket *) RelinquishMagickMemory(map);
3723
3724 if (image->storage_class == PseudoClass)
3725 {
3726 /*
3727 Equalize colormap.
3728 */
3729 for (i=0; i < (ssize_t) image->colors; i++)
3730 {
3731 if ((channel & SyncChannels) != 0)
3732 {
3733 if (white.red != black.red)
3734 {
3735 image->colormap[i].red=equalize_map[
3736 ScaleQuantumToMap(image->colormap[i].red)].red;
3737 image->colormap[i].green=equalize_map[
3738 ScaleQuantumToMap(image->colormap[i].green)].red;
3739 image->colormap[i].blue=equalize_map[
3740 ScaleQuantumToMap(image->colormap[i].blue)].red;
dirke19d0cc2013-12-01 10:07:42 +00003741 image->colormap[i].alpha=equalize_map[
3742 ScaleQuantumToMap(image->colormap[i].alpha)].red;
cristyf034abb2013-11-24 14:16:14 +00003743 }
3744 continue;
3745 }
3746 if (((channel & RedChannel) != 0) && (white.red != black.red))
3747 image->colormap[i].red=equalize_map[
3748 ScaleQuantumToMap(image->colormap[i].red)].red;
3749 if (((channel & GreenChannel) != 0) && (white.green != black.green))
3750 image->colormap[i].green=equalize_map[
3751 ScaleQuantumToMap(image->colormap[i].green)].green;
3752 if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3753 image->colormap[i].blue=equalize_map[
3754 ScaleQuantumToMap(image->colormap[i].blue)].blue;
3755 if (((channel & OpacityChannel) != 0) &&
dirke19d0cc2013-12-01 10:07:42 +00003756 (white.alpha != black.alpha))
3757 image->colormap[i].alpha=equalize_map[
3758 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
cristyf034abb2013-11-24 14:16:14 +00003759 }
3760 }
3761
3762 /*
3763 Equalize image.
3764 */
3765
3766 /* GPU can work on this again, image and equalize map as input
3767 image: uchar4 (CLPixelPacket)
3768 equalize_map: uchar4 (PixelPacket)
3769 black, white: float4 (FloatPixelPacket) */
3770
3771 if (inputImageBuffer!=NULL)
3772 clReleaseMemObject(inputImageBuffer);
3773
3774 /* If the host pointer is aligned to the size of CLPixelPacket,
3775 then use the host buffer directly from the GPU; otherwise,
3776 create a buffer on the GPU and copy the data over */
3777 if (ALIGNED(inputPixels,CLPixelPacket))
3778 {
3779 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3780 }
3781 else
3782 {
3783 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3784 }
3785 /* create a CL buffer from image pixel buffer */
3786 length = inputImage->columns * inputImage->rows;
3787 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3788 if (clStatus != CL_SUCCESS)
3789 {
3790 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3791 goto cleanup;
3792 }
3793
3794 /* Create and initialize OpenCL buffers. */
3795 if (ALIGNED(equalize_map, PixelPacket))
3796 {
3797 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3798 hostPtr = equalize_map;
3799 }
3800 else
3801 {
3802 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3803 hostPtr = equalize_map;
3804 }
3805 /* create a CL buffer for eqaulize_map */
3806 length = (MaxMap+1);
3807 equalizeMapBuffer = clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
3808 if (clStatus != CL_SUCCESS)
3809 {
3810 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
3811 goto cleanup;
3812 }
3813
3814 /* get the OpenCL kernel */
3815 equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
3816 if (equalizeKernel == NULL)
3817 {
3818 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "AcquireOpenCLKernel failed.", "'%s'", ".");
3819 goto cleanup;
3820 }
3821
3822 /* set the kernel arguments */
3823 i = 0;
3824 clStatus=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
3825 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
3826 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
3827 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
3828 clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
3829 if (clStatus != CL_SUCCESS)
3830 {
3831 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
3832 goto cleanup;
3833 }
3834
3835 /* launch the kernel */
3836 global_work_size[0] = inputImage->columns;
3837 global_work_size[1] = inputImage->rows;
3838
3839 clStatus = clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3840
3841 if (clStatus != CL_SUCCESS)
3842 {
3843 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
3844 goto cleanup;
3845 }
3846 clFlush(queue);
3847
3848 /* read the data back */
3849 if (ALIGNED(inputPixels,CLPixelPacket))
3850 {
3851 length = inputImage->columns * inputImage->rows;
3852 clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3853 }
3854 else
3855 {
3856 length = inputImage->columns * inputImage->rows;
3857 clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3858 }
3859 if (clStatus != CL_SUCCESS)
3860 {
3861 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
3862 goto cleanup;
3863 }
3864
3865 outputReady = MagickTrue;
3866
3867 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
3868
3869cleanup:
3870
3871 if (inputPixels) {
3872 /*ReleasePixelCachePixels();*/
3873 inputPixels = NULL;
3874 }
3875
3876 if (inputImageBuffer!=NULL)
3877 clReleaseMemObject(inputImageBuffer);
3878 if (histogramBuffer!=NULL)
3879 clReleaseMemObject(histogramBuffer);
3880 if (histogramKernel!=NULL)
3881 RelinquishOpenCLKernel(clEnv, histogramKernel);
3882 if (queue != NULL)
3883 RelinquishOpenCLCommandQueue(clEnv, queue);
3884
3885 return outputReady;
3886}
3887
3888/*
3889%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3890% %
3891% %
3892% %
3893% E q u a l i z e I m a g e w i t h O p e n C L %
3894% %
3895% %
3896% %
3897%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3898%
3899% EqualizeImage() applies a histogram equalization to the image.
3900%
3901% The format of the EqualizeImage method is:
3902%
3903% MagickBooleanType EqualizeImage(Image *image)
3904% MagickBooleanType EqualizeImageChannel(Image *image,
3905% const ChannelType channel)
3906%
3907% A description of each parameter follows:
3908%
3909% o image: the image.
3910%
3911% o channel: the channel.
3912%
3913*/
3914
3915
3916MagickExport
3917MagickBooleanType AccelerateEqualizeImage(Image* image, const ChannelType channel, ExceptionInfo* exception)
3918{
3919 MagickBooleanType status;
3920
3921 assert(image != NULL);
3922 assert(exception != NULL);
3923
3924 status = checkOpenCLEnvironment(exception);
3925 if (status == MagickFalse)
3926 return MagickFalse;
3927
dirk5dcb7622013-12-01 10:43:43 +00003928 status = checkAccelerateCondition(image, channel);
cristyf034abb2013-11-24 14:16:14 +00003929 if (status == MagickFalse)
3930 return MagickFalse;
3931
3932 /* ensure this is the only pass get in for now. */
3933 if ((channel & SyncChannels) == 0)
3934 return MagickFalse;
3935
3936 if (image->colorspace != sRGBColorspace)
3937 return MagickFalse;
3938
3939 status = ComputeEqualizeImage(image,channel,exception);
3940 OpenCLLogException(__FUNCTION__,__LINE__,exception);
3941 return status;
3942}
3943
3944
3945static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception)
3946{
3947
3948 MagickBooleanType outputReady = MagickFalse;
3949 MagickCLEnv clEnv = NULL;
3950
3951 cl_int clStatus;
3952 size_t global_work_size[2];
3953
3954 const void *inputPixels = NULL;
3955 Image* filteredImage = NULL;
3956 void *filteredPixels = NULL;
3957 void *hostPtr;
3958 MagickSizeType length;
3959
3960 cl_mem_flags mem_flags;
3961 cl_context context = NULL;
3962 cl_mem inputImageBuffer = NULL;
3963 cl_mem tempImageBuffer[2];
3964 cl_mem filteredImageBuffer = NULL;
3965 cl_command_queue queue = NULL;
3966 cl_kernel hullPass1 = NULL;
3967 cl_kernel hullPass2 = NULL;
3968
3969 unsigned int imageWidth, imageHeight;
3970 int matte;
3971 int k;
3972
3973 static const int
3974 X[4] = {0, 1, 1,-1},
3975 Y[4] = {1, 0, 1, 1};
3976
3977 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
3978 clEnv = GetDefaultOpenCLEnv();
3979 context = GetOpenCLContext(clEnv);
3980 queue = AcquireOpenCLCommandQueue(clEnv);
3981
3982 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
3983 if (inputPixels == (void *) NULL)
3984 {
3985 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
3986 goto cleanup;
3987 }
3988
3989 if (ALIGNED(inputPixels,CLPixelPacket))
3990 {
3991 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3992 }
3993 else
3994 {
3995 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3996 }
3997 /* create a CL buffer from image pixel buffer */
3998 length = inputImage->columns * inputImage->rows;
3999 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4000 if (clStatus != CL_SUCCESS)
4001 {
4002 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4003 goto cleanup;
4004 }
4005
4006 mem_flags = CL_MEM_READ_WRITE;
4007 length = inputImage->columns * inputImage->rows;
4008 for (k = 0; k < 2; k++)
4009 {
4010 tempImageBuffer[k] = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
4011 if (clStatus != CL_SUCCESS)
4012 {
4013 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4014 goto cleanup;
4015 }
4016 }
4017
4018 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4019 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004020 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristyf034abb2013-11-24 14:16:14 +00004021 {
4022 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4023 goto cleanup;
4024 }
4025 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4026 if (filteredPixels == (void *) NULL)
4027 {
4028 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4029 goto cleanup;
4030 }
4031
4032 if (ALIGNED(filteredPixels,CLPixelPacket))
4033 {
4034 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4035 hostPtr = filteredPixels;
4036 }
4037 else
4038 {
4039 mem_flags = CL_MEM_WRITE_ONLY;
4040 hostPtr = NULL;
4041 }
4042 /* create a CL buffer from image pixel buffer */
4043 length = inputImage->columns * inputImage->rows;
4044 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4045 if (clStatus != CL_SUCCESS)
4046 {
4047 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4048 goto cleanup;
4049 }
4050
4051 hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
4052 hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
4053
4054 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer);
4055 clStatus |=clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4056 imageWidth = inputImage->columns;
4057 clStatus |=clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
4058 imageHeight = inputImage->rows;
4059 clStatus |=clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
dirke19d0cc2013-12-01 10:07:42 +00004060 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +00004061 clStatus |=clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
4062 if (clStatus != CL_SUCCESS)
4063 {
4064 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4065 goto cleanup;
4066 }
4067
4068 clStatus = clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
4069 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
4070 imageWidth = inputImage->columns;
4071 clStatus |=clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
4072 imageHeight = inputImage->rows;
4073 clStatus |=clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
dirke19d0cc2013-12-01 10:07:42 +00004074 matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
cristyf034abb2013-11-24 14:16:14 +00004075 clStatus |=clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
4076 if (clStatus != CL_SUCCESS)
4077 {
4078 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4079 goto cleanup;
4080 }
4081
4082
4083 global_work_size[0] = inputImage->columns;
4084 global_work_size[1] = inputImage->rows;
4085
4086
4087 for (k = 0; k < 4; k++)
4088 {
4089 cl_int2 offset;
4090 int polarity;
4091
4092
4093 offset.s[0] = X[k];
4094 offset.s[1] = Y[k];
4095 polarity = 1;
4096 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4097 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4098 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4099 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4100 if (clStatus != CL_SUCCESS)
4101 {
4102 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4103 goto cleanup;
4104 }
4105 /* launch the kernel */
4106 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4107 if (clStatus != CL_SUCCESS)
4108 {
4109 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4110 goto cleanup;
4111 }
4112 /* launch the kernel */
4113 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4114 if (clStatus != CL_SUCCESS)
4115 {
4116 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4117 goto cleanup;
4118 }
4119
4120
4121 if (k == 0)
4122 clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
4123 offset.s[0] = -X[k];
4124 offset.s[1] = -Y[k];
4125 polarity = 1;
4126 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4127 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4128 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4129 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4130 if (clStatus != CL_SUCCESS)
4131 {
4132 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4133 goto cleanup;
4134 }
4135 /* launch the kernel */
4136 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4137 if (clStatus != CL_SUCCESS)
4138 {
4139 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4140 goto cleanup;
4141 }
4142 /* launch the kernel */
4143 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4144 if (clStatus != CL_SUCCESS)
4145 {
4146 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4147 goto cleanup;
4148 }
4149
4150 offset.s[0] = -X[k];
4151 offset.s[1] = -Y[k];
4152 polarity = -1;
4153 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4154 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4155 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4156 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4157 if (clStatus != CL_SUCCESS)
4158 {
4159 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4160 goto cleanup;
4161 }
4162 /* launch the kernel */
4163 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4164 if (clStatus != CL_SUCCESS)
4165 {
4166 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4167 goto cleanup;
4168 }
4169 /* launch the kernel */
4170 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4171 if (clStatus != CL_SUCCESS)
4172 {
4173 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4174 goto cleanup;
4175 }
4176
4177 offset.s[0] = X[k];
4178 offset.s[1] = Y[k];
4179 polarity = -1;
4180 clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
4181 clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
4182 clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
4183 clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
4184
4185 if (k == 3)
4186 clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
4187
4188 if (clStatus != CL_SUCCESS)
4189 {
4190 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clSetKernelArg failed.", "'%s'", ".");
4191 goto cleanup;
4192 }
4193 /* launch the kernel */
4194 clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4195 if (clStatus != CL_SUCCESS)
4196 {
4197 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4198 goto cleanup;
4199 }
4200 /* launch the kernel */
4201 clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4202 if (clStatus != CL_SUCCESS)
4203 {
4204 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
4205 goto cleanup;
4206 }
4207 }
4208
4209 if (ALIGNED(filteredPixels,CLPixelPacket))
4210 {
4211 length = inputImage->columns * inputImage->rows;
4212 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4213 }
4214 else
4215 {
4216 length = inputImage->columns * inputImage->rows;
4217 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4218 }
4219 if (clStatus != CL_SUCCESS)
4220 {
4221 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
4222 goto cleanup;
4223 }
4224
4225 outputReady = MagickTrue;
4226
4227cleanup:
4228 if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4229 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4230 for (k = 0; k < 2; k++)
4231 {
4232 if (tempImageBuffer[k]!=NULL) clReleaseMemObject(tempImageBuffer[k]);
4233 }
4234 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4235 if (hullPass1!=NULL) RelinquishOpenCLKernel(clEnv, hullPass1);
4236 if (hullPass2!=NULL) RelinquishOpenCLKernel(clEnv, hullPass2);
4237 if (outputReady == MagickFalse)
4238 {
4239 if (filteredImage != NULL)
4240 {
4241 DestroyImage(filteredImage);
4242 filteredImage = NULL;
4243 }
4244 }
4245 return filteredImage;
4246}
4247
4248/*
4249%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4250% %
4251% %
4252% %
4253% D e s p e c k l e I m a g e w i t h O p e n C L %
4254% %
4255% %
4256% %
4257%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4258%
4259% DespeckleImage() reduces the speckle noise in an image while perserving the
4260% edges of the original image. A speckle removing filter uses a complementary
4261% hulling technique (raising pixels that are darker than their surrounding
4262% neighbors, then complementarily lowering pixels that are brighter than their
4263% surrounding neighbors) to reduce the speckle index of that image (reference
4264% Crimmins speckle removal).
4265%
4266% The format of the DespeckleImage method is:
4267%
4268% Image *DespeckleImage(const Image *image,ExceptionInfo *exception)
4269%
4270% A description of each parameter follows:
4271%
4272% o image: the image.
4273%
4274% o exception: return any errors or warnings in this structure.
4275%
4276*/
4277
4278MagickExport
4279Image* AccelerateDespeckleImage(const Image* image, ExceptionInfo* exception)
4280{
4281 MagickBooleanType status;
4282 Image* newImage = NULL;
4283
4284 assert(image != NULL);
4285 assert(exception != NULL);
4286
4287 status = checkOpenCLEnvironment(exception);
4288 if (status == MagickFalse)
4289 return NULL;
4290
dirk5dcb7622013-12-01 10:43:43 +00004291 status = checkAccelerateCondition(image, AllChannels);
cristyf034abb2013-11-24 14:16:14 +00004292 if (status == MagickFalse)
4293 return NULL;
4294
4295 newImage = ComputeDespeckleImage(image,exception);
4296 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4297 return newImage;
4298}
4299
cristye85d0f72013-11-27 02:25:43 +00004300static Image* ComputeAddNoiseImage(const Image* inputImage,
4301 const ChannelType channel, const NoiseType noise_type,
4302 ExceptionInfo *exception)
4303{
4304 MagickBooleanType outputReady = MagickFalse;
4305 MagickCLEnv clEnv = NULL;
4306
4307 cl_int clStatus;
4308 size_t global_work_size[2];
4309
4310 const void *inputPixels = NULL;
4311 Image* filteredImage = NULL;
4312 void *filteredPixels = NULL;
4313 void *hostPtr;
4314 unsigned int inputColumns, inputRows;
4315 float attenuate;
4316 float *randomNumberBufferPtr = NULL;
4317 MagickSizeType length;
4318 unsigned int numRandomNumberPerPixel;
4319 unsigned int numRowsPerKernelLaunch;
4320 unsigned int numRandomNumberPerBuffer;
4321 unsigned int r;
4322 unsigned int k;
4323 int i;
4324
4325 RandomInfo **restrict random_info;
4326 const char *option;
4327#if defined(MAGICKCORE_OPENMP_SUPPORT)
4328 unsigned long key;
4329#endif
4330
4331 cl_mem_flags mem_flags;
4332 cl_context context = NULL;
4333 cl_mem inputImageBuffer = NULL;
4334 cl_mem randomNumberBuffer = NULL;
4335 cl_mem filteredImageBuffer = NULL;
4336 cl_command_queue queue = NULL;
4337 cl_kernel addNoiseKernel = NULL;
4338
4339
4340 clEnv = GetDefaultOpenCLEnv();
4341 context = GetOpenCLContext(clEnv);
4342 queue = AcquireOpenCLCommandQueue(clEnv);
4343
4344 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4345 if (inputPixels == (void *) NULL)
4346 {
4347 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
4348 goto cleanup;
4349 }
4350
4351 if (ALIGNED(inputPixels,CLPixelPacket))
4352 {
4353 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4354 }
4355 else
4356 {
4357 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4358 }
4359 /* create a CL buffer from image pixel buffer */
4360 length = inputImage->columns * inputImage->rows;
4361 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4362 if (clStatus != CL_SUCCESS)
4363 {
4364 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4365 goto cleanup;
4366 }
4367
4368
4369 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4370 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004371 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristye85d0f72013-11-27 02:25:43 +00004372 {
4373 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4374 goto cleanup;
4375 }
4376 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4377 if (filteredPixels == (void *) NULL)
4378 {
4379 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4380 goto cleanup;
4381 }
4382
4383 if (ALIGNED(filteredPixels,CLPixelPacket))
4384 {
4385 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4386 hostPtr = filteredPixels;
4387 }
4388 else
4389 {
4390 mem_flags = CL_MEM_WRITE_ONLY;
4391 hostPtr = NULL;
4392 }
4393 /* create a CL buffer from image pixel buffer */
4394 length = inputImage->columns * inputImage->rows;
4395 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4396 if (clStatus != CL_SUCCESS)
4397 {
4398 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4399 goto cleanup;
4400 }
4401
4402 /* find out how many random numbers needed by pixel */
4403 numRandomNumberPerPixel = 0;
4404 {
4405 unsigned int numRandPerChannel = 0;
4406 switch (noise_type)
4407 {
4408 case UniformNoise:
4409 case ImpulseNoise:
4410 case LaplacianNoise:
4411 case RandomNoise:
4412 default:
4413 numRandPerChannel = 1;
4414 break;
4415 case GaussianNoise:
4416 case MultiplicativeGaussianNoise:
4417 case PoissonNoise:
4418 numRandPerChannel = 2;
4419 break;
4420 };
4421
4422 if ((channel & RedChannel) != 0)
4423 numRandomNumberPerPixel+=numRandPerChannel;
4424 if ((channel & GreenChannel) != 0)
4425 numRandomNumberPerPixel+=numRandPerChannel;
4426 if ((channel & BlueChannel) != 0)
4427 numRandomNumberPerPixel+=numRandPerChannel;
4428 if ((channel & OpacityChannel) != 0)
4429 numRandomNumberPerPixel+=numRandPerChannel;
4430 }
4431
4432 numRowsPerKernelLaunch = 512;
4433 /* create a buffer for random numbers */
4434 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4435 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float)
4436 , NULL, &clStatus);
4437
4438
4439 /* set up the random number generators */
4440 attenuate=1.0;
4441 option=GetImageArtifact(inputImage,"attenuate");
4442 if (option != (char *) NULL)
4443 attenuate=StringToDouble(option,(char **) NULL);
4444 random_info=AcquireRandomInfoThreadSet();
4445#if defined(MAGICKCORE_OPENMP_SUPPORT)
4446 key=GetRandomSecretKey(random_info[0]);
4447#endif
4448
4449 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4450
4451 k = 0;
4452 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4453 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4454 inputColumns = inputImage->columns;
4455 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4456 inputRows = inputImage->rows;
4457 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4458 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4459 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4460 attenuate=1.0f;
4461 option=GetImageArtifact(inputImage,"attenuate");
4462 if (option != (char *) NULL)
4463 attenuate=(float)StringToDouble(option,(char **) NULL);
4464 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4465 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4466 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4467
4468 global_work_size[0] = inputColumns;
4469 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4470 {
4471 /* Generate random numbers in the buffer */
4472 randomNumberBufferPtr = (float*)clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0
4473 , numRandomNumberPerBuffer*sizeof(float), 0, NULL, NULL, &clStatus);
4474 if (clStatus != CL_SUCCESS)
4475 {
4476 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
4477 goto cleanup;
4478 }
4479
4480#if defined(MAGICKCORE_OPENMP_SUPPORT)
4481 #pragma omp parallel for schedule(static,4) \
4482 num_threads((key == ~0UL) == 0 ? 1 : (size_t) GetMagickResourceLimit(ThreadResource))
4483#endif
4484 for (i = 0; i < numRandomNumberPerBuffer; i++)
4485 {
4486 const int id = GetOpenMPThreadId();
4487 randomNumberBufferPtr[i] = (float)GetPseudoRandomValue(random_info[id]);
4488 }
4489
4490 clStatus = clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL);
4491 if (clStatus != CL_SUCCESS)
4492 {
4493 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
4494 goto cleanup;
4495 }
4496
4497 /* set the row offset */
4498 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4499 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4500 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4501 }
4502
4503 if (ALIGNED(filteredPixels,CLPixelPacket))
4504 {
4505 length = inputImage->columns * inputImage->rows;
4506 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4507 }
4508 else
4509 {
4510 length = inputImage->columns * inputImage->rows;
4511 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4512 }
4513 if (clStatus != CL_SUCCESS)
4514 {
4515 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
4516 goto cleanup;
4517 }
4518
4519
4520 outputReady = MagickTrue;
4521cleanup:
4522 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4523 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4524 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4525 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4526 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4527 if (outputReady == MagickFalse
4528 && filteredImage != NULL)
4529 {
4530 DestroyImage(filteredImage);
4531 filteredImage = NULL;
4532 }
4533 return filteredImage;
4534}
4535
4536
4537static Image* ComputeAddNoiseImageOptRandomNum(const Image* inputImage,
4538 const ChannelType channel, const NoiseType noise_type,
4539 ExceptionInfo *exception)
4540{
4541 MagickBooleanType outputReady = MagickFalse;
4542 MagickCLEnv clEnv = NULL;
4543
4544 cl_int clStatus;
4545 size_t global_work_size[2];
4546 size_t random_work_size;
4547
4548 const void *inputPixels = NULL;
4549 Image* filteredImage = NULL;
4550 void *filteredPixels = NULL;
4551 void *hostPtr;
4552 unsigned int inputColumns, inputRows;
4553 float attenuate;
4554 MagickSizeType length;
4555 unsigned int numRandomNumberPerPixel;
4556 unsigned int numRowsPerKernelLaunch;
4557 unsigned int numRandomNumberPerBuffer;
4558 unsigned int numRandomNumberGenerators;
4559 unsigned int initRandom;
4560 float fNormalize;
4561 unsigned int r;
4562 unsigned int k;
4563 int i;
4564 const char *option;
4565
4566 cl_mem_flags mem_flags;
4567 cl_context context = NULL;
4568 cl_mem inputImageBuffer = NULL;
4569 cl_mem randomNumberBuffer = NULL;
4570 cl_mem filteredImageBuffer = NULL;
4571 cl_mem randomNumberSeedsBuffer = NULL;
4572 cl_command_queue queue = NULL;
4573 cl_kernel addNoiseKernel = NULL;
4574 cl_kernel randomNumberGeneratorKernel = NULL;
4575
4576
4577 clEnv = GetDefaultOpenCLEnv();
4578 context = GetOpenCLContext(clEnv);
4579 queue = AcquireOpenCLCommandQueue(clEnv);
4580
4581 inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
4582 if (inputPixels == (void *) NULL)
4583 {
4584 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
4585 goto cleanup;
4586 }
4587
4588 if (ALIGNED(inputPixels,CLPixelPacket))
4589 {
4590 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4591 }
4592 else
4593 {
4594 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4595 }
4596 /* create a CL buffer from image pixel buffer */
4597 length = inputImage->columns * inputImage->rows;
4598 inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4599 if (clStatus != CL_SUCCESS)
4600 {
4601 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4602 goto cleanup;
4603 }
4604
4605
4606 filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
4607 assert(filteredImage != NULL);
dirke19d0cc2013-12-01 10:07:42 +00004608 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
cristye85d0f72013-11-27 02:25:43 +00004609 {
4610 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4611 goto cleanup;
4612 }
4613 filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
4614 if (filteredPixels == (void *) NULL)
4615 {
4616 (void) ThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4617 goto cleanup;
4618 }
4619
4620 if (ALIGNED(filteredPixels,CLPixelPacket))
4621 {
4622 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4623 hostPtr = filteredPixels;
4624 }
4625 else
4626 {
4627 mem_flags = CL_MEM_WRITE_ONLY;
4628 hostPtr = NULL;
4629 }
4630 /* create a CL buffer from image pixel buffer */
4631 length = inputImage->columns * inputImage->rows;
4632 filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4633 if (clStatus != CL_SUCCESS)
4634 {
4635 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4636 goto cleanup;
4637 }
4638
4639 /* find out how many random numbers needed by pixel */
4640 numRandomNumberPerPixel = 0;
4641 {
4642 unsigned int numRandPerChannel = 0;
4643 switch (noise_type)
4644 {
4645 case UniformNoise:
4646 case ImpulseNoise:
4647 case LaplacianNoise:
4648 case RandomNoise:
4649 default:
4650 numRandPerChannel = 1;
4651 break;
4652 case GaussianNoise:
4653 case MultiplicativeGaussianNoise:
4654 case PoissonNoise:
4655 numRandPerChannel = 2;
4656 break;
4657 };
4658
4659 if ((channel & RedChannel) != 0)
4660 numRandomNumberPerPixel+=numRandPerChannel;
4661 if ((channel & GreenChannel) != 0)
4662 numRandomNumberPerPixel+=numRandPerChannel;
4663 if ((channel & BlueChannel) != 0)
4664 numRandomNumberPerPixel+=numRandPerChannel;
4665 if ((channel & OpacityChannel) != 0)
4666 numRandomNumberPerPixel+=numRandPerChannel;
4667 }
4668
4669 numRowsPerKernelLaunch = 512;
4670
4671 /* create a buffer for random numbers */
4672 numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
4673 randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float)
4674 , NULL, &clStatus);
4675
4676 {
4677 /* setup the random number generators */
4678 unsigned long* seeds;
4679 numRandomNumberGenerators = 512;
4680 randomNumberSeedsBuffer = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE
4681 , numRandomNumberGenerators * 4 * sizeof(unsigned long), NULL, &clStatus);
4682 if (clStatus != CL_SUCCESS)
4683 {
4684 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
4685 goto cleanup;
4686 }
4687 seeds = (unsigned long*) clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0
4688 , numRandomNumberGenerators*4*sizeof(unsigned long), 0, NULL, NULL, &clStatus);
4689 if (clStatus != CL_SUCCESS)
4690 {
4691 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
4692 goto cleanup;
4693 }
4694
4695 for (i = 0; i < numRandomNumberGenerators; i++) {
4696 RandomInfo* randomInfo = AcquireRandomInfo();
4697 const unsigned long* s = GetRandomInfoSeed(randomInfo);
4698
4699 if (i == 0)
4700 fNormalize = GetRandomInfoNormalize(randomInfo);
4701
4702 seeds[i*4] = s[0];
4703 randomInfo = DestroyRandomInfo(randomInfo);
4704 }
4705
4706 clStatus = clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL);
4707 if (clStatus != CL_SUCCESS)
4708 {
4709 (void) ThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
4710 goto cleanup;
4711 }
4712
4713 randomNumberGeneratorKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE
4714 ,"randomNumberGeneratorKernel");
4715
4716 k = 0;
4717 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer);
4718 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize);
4719 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4720 initRandom = 1;
4721 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom);
4722 clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer);
4723
4724 random_work_size = numRandomNumberGenerators;
4725 }
4726
4727 addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
4728 k = 0;
4729 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
4730 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4731 inputColumns = inputImage->columns;
4732 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
4733 inputRows = inputImage->rows;
4734 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
4735 clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
4736 clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
4737 attenuate=1.0f;
4738 option=GetImageArtifact(inputImage,"attenuate");
4739 if (option != (char *) NULL)
4740 attenuate=(float)StringToDouble(option,(char **) NULL);
4741 clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
4742 clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
4743 clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
4744
4745 global_work_size[0] = inputColumns;
4746 for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch)
4747 {
4748 size_t generator_local_size = 64;
4749 /* Generate random numbers in the buffer */
4750 clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL
4751 ,&random_work_size,&generator_local_size,0,NULL,NULL);
4752 if (initRandom != 0)
4753 {
4754 /* make sure we only do init once */
4755 initRandom = 0;
4756 clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom);
4757 }
4758
4759 /* set the row offset */
4760 clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
4761 global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
4762 clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
4763 }
4764
4765 if (ALIGNED(filteredPixels,CLPixelPacket))
4766 {
4767 length = inputImage->columns * inputImage->rows;
4768 clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4769 }
4770 else
4771 {
4772 length = inputImage->columns * inputImage->rows;
4773 clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4774 }
4775 if (clStatus != CL_SUCCESS)
4776 {
4777 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Reading output image from CL buffer failed.", "'%s'", ".");
4778 goto cleanup;
4779 }
4780
4781
4782 outputReady = MagickTrue;
4783cleanup:
4784 if (queue!=NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
4785 if (addNoiseKernel!=NULL) RelinquishOpenCLKernel(clEnv, addNoiseKernel);
4786 if (randomNumberGeneratorKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomNumberGeneratorKernel);
4787 if (inputImageBuffer!=NULL) clReleaseMemObject(inputImageBuffer);
4788 if (randomNumberBuffer!=NULL) clReleaseMemObject(randomNumberBuffer);
4789 if (filteredImageBuffer!=NULL) clReleaseMemObject(filteredImageBuffer);
4790 if (randomNumberSeedsBuffer!=NULL) clReleaseMemObject(randomNumberSeedsBuffer);
4791 if (outputReady == MagickFalse
4792 && filteredImage != NULL)
4793 {
4794 DestroyImage(filteredImage);
4795 filteredImage = NULL;
4796 }
4797 return filteredImage;
4798}
4799
4800
4801
4802MagickExport
4803Image* AccelerateAddNoiseImage(const Image *image, const ChannelType channel,
4804 const NoiseType noise_type,ExceptionInfo *exception)
4805{
4806 MagickBooleanType status;
4807 Image* filteredImage = NULL;
4808
4809 assert(image != NULL);
4810 assert(exception != NULL);
4811
4812 status = checkOpenCLEnvironment(exception);
4813 if (status == MagickFalse)
4814 return NULL;
4815
dirk5dcb7622013-12-01 10:43:43 +00004816 status = checkAccelerateCondition(image, channel);
cristye85d0f72013-11-27 02:25:43 +00004817 if (status == MagickFalse)
4818 return NULL;
4819
dirk5dcb7622013-12-01 10:43:43 +00004820DisableMSCWarning(4127)
cristye85d0f72013-11-27 02:25:43 +00004821 if (sizeof(unsigned long) == 4)
dirk5dcb7622013-12-01 10:43:43 +00004822RestoreMSCWarning
cristye85d0f72013-11-27 02:25:43 +00004823 filteredImage = ComputeAddNoiseImageOptRandomNum(image,channel,noise_type,exception);
4824 else
4825 filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception);
4826
4827 OpenCLLogException(__FUNCTION__,__LINE__,exception);
4828 return filteredImage;
4829}
4830
4831
cristyf034abb2013-11-24 14:16:14 +00004832#else /* MAGICKCORE_OPENCL_SUPPORT */
4833
4834MagickExport Image *AccelerateConvolveImageChannel(
4835 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4836 const KernelInfo *magick_unused(kernel),
4837 ExceptionInfo *magick_unused(exception))
4838{
4839 magick_unreferenced(image);
4840 magick_unreferenced(channel);
4841 magick_unreferenced(kernel);
4842 magick_unreferenced(exception);
4843
4844 return NULL;
4845}
4846
4847MagickExport MagickBooleanType AccelerateFunctionImage(
4848 Image *magick_unused(image),const ChannelType magick_unused(channel),
4849 const MagickFunction magick_unused(function),
4850 const size_t magick_unused(number_parameters),
4851 const double *magick_unused(parameters),
4852 ExceptionInfo *magick_unused(exception))
4853{
4854 magick_unreferenced(image);
4855 magick_unreferenced(channel);
4856 magick_unreferenced(function);
4857 magick_unreferenced(number_parameters);
4858 magick_unreferenced(parameters);
4859 magick_unreferenced(exception);
4860
4861 return MagickFalse;
4862}
4863
4864MagickExport Image *AccelerateBlurImage(const Image *magick_unused(image),
4865 const ChannelType magick_unused(channel),const double magick_unused(radius),
4866 const double magick_unused(sigma),ExceptionInfo *magick_unused(exception))
4867{
4868 magick_unreferenced(image);
4869 magick_unreferenced(channel);
4870 magick_unreferenced(radius);
4871 magick_unreferenced(sigma);
4872 magick_unreferenced(exception);
4873
4874 return NULL;
4875}
4876
4877MagickExport Image *AccelerateRadialBlurImage(
4878 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4879 const double magick_unused(angle),ExceptionInfo *magick_unused(exception))
4880{
4881 magick_unreferenced(image);
4882 magick_unreferenced(channel);
4883 magick_unreferenced(angle);
4884 magick_unreferenced(exception);
4885
4886 return NULL;
4887}
4888
4889
4890MagickExport Image *AccelerateUnsharpMaskImage(
4891 const Image *magick_unused(image),const ChannelType magick_unused(channel),
4892 const double magick_unused(radius),const double magick_unused(sigma),
4893 const double magick_unused(gain),const double magick_unused(threshold),
4894 ExceptionInfo *magick_unused(exception))
4895{
4896 magick_unreferenced(image);
4897 magick_unreferenced(channel);
4898 magick_unreferenced(radius);
4899 magick_unreferenced(sigma);
4900 magick_unreferenced(gain);
4901 magick_unreferenced(threshold);
4902 magick_unreferenced(exception);
4903
4904 return NULL;
4905}
4906
4907
4908MagickExport MagickBooleanType AccelerateContrastImage(
4909 Image* magick_unused(image),const MagickBooleanType magick_unused(sharpen),
4910 ExceptionInfo* magick_unused(exception))
4911{
4912 magick_unreferenced(image);
4913 magick_unreferenced(sharpen);
4914 magick_unreferenced(exception);
4915
4916 return MagickFalse;
4917}
4918
4919MagickExport MagickBooleanType AccelerateEqualizeImage(
4920 Image* magick_unused(image), const ChannelType magick_unused(channel),
4921 ExceptionInfo* magick_unused(exception))
4922{
4923 magick_unreferenced(image);
4924 magick_unreferenced(channel);
4925 magick_unreferenced(exception);
4926
4927 return MagickFalse;
4928}
4929
4930MagickExport Image *AccelerateDespeckleImage(const Image* magick_unused(image),
4931 ExceptionInfo* magick_unused(exception))
4932{
4933 magick_unreferenced(image);
4934 magick_unreferenced(exception);
4935
4936 return NULL;
4937}
4938
4939MagickExport Image *AccelerateResizeImage(const Image* magick_unused(image),
4940 const size_t magick_unused(resizedColumns),
4941 const size_t magick_unused(resizedRows),
4942 const ResizeFilter* magick_unused(resizeFilter),
4943 ExceptionInfo *magick_unused(exception))
4944{
4945 magick_unreferenced(image);
4946 magick_unreferenced(resizedColumns);
4947 magick_unreferenced(resizedRows);
4948 magick_unreferenced(resizeFilter);
4949 magick_unreferenced(exception);
4950
4951 return NULL;
4952}
4953
4954
4955MagickExport
4956MagickBooleanType AccelerateModulateImage(
4957 Image* image, double percent_brightness, double percent_hue,
4958 double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
4959{
4960 magick_unreferenced(image);
4961 magick_unreferenced(percent_brightness);
4962 magick_unreferenced(percent_hue);
4963 magick_unreferenced(percent_saturation);
4964 magick_unreferenced(colorspace);
4965 magick_unreferenced(exception);
4966 return(MagickFalse);
4967}
4968
cristye85d0f72013-11-27 02:25:43 +00004969MagickExport Image *AccelerateAddNoiseImage(const Image *image,
4970 const ChannelType channel, const NoiseType noise_type,ExceptionInfo *exception)
4971{
4972 magick_unreferenced(image);
4973 magick_unreferenced(channel);
4974 magick_unreferenced(noise_type);
4975 magick_unreferenced(exception);
4976 return NULL;
4977}
cristyf034abb2013-11-24 14:16:14 +00004978
4979#endif /* MAGICKCORE_OPENCL_SUPPORT */
4980
4981MagickExport MagickBooleanType AccelerateConvolveImage(
4982 const Image *magick_unused(image),const KernelInfo *magick_unused(kernel),
4983 Image *magick_unused(convolve_image),ExceptionInfo *magick_unused(exception))
4984{
4985 magick_unreferenced(image);
4986 magick_unreferenced(kernel);
4987 magick_unreferenced(convolve_image);
4988 magick_unreferenced(exception);
4989
4990 /* legacy, do not use */
4991 return(MagickFalse);
4992}
4993