blob: fda8472822eb74d0a417884890dd36a5d1ae6567 [file] [log] [blame]
cristye1101892013-08-18 00:53:18 +00001/*
cristyfe676ee2013-11-18 13:03:38 +00002 Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization
cristye1101892013-08-18 00:53:18 +00003 dedicated to making software imaging solutions freely available.
cristyf034abb2013-11-24 14:16:14 +00004
cristye1101892013-08-18 00:53:18 +00005 You may not use this file except in compliance with the License.
6 obtain a copy of the License at
cristyf034abb2013-11-24 14:16:14 +00007
cristye1101892013-08-18 00:53:18 +00008 http://www.imagemagick.org/script/license.php
cristyf034abb2013-11-24 14:16:14 +00009
cristye1101892013-08-18 00:53:18 +000010 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
cristyf034abb2013-11-24 14:16:14 +000016 MagickCore private methods for accelerated functions.
cristye1101892013-08-18 00:53:18 +000017*/
cristyf034abb2013-11-24 14:16:14 +000018
cristye1101892013-08-18 00:53:18 +000019#ifndef _MAGICKCORE_ACCELERATE_PRIVATE_H
20#define _MAGICKCORE_ACCELERATE_PRIVATE_H
21
22#if defined(__cplusplus) || defined(c_plusplus)
23extern "C" {
24#endif
25
cristyf034abb2013-11-24 14:16:14 +000026
27#if defined(MAGICKCORE_OPENCL_SUPPORT)
28
29#define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
30#define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
31#define OPENCL_ELSE() "\n #""else " " \n"
32#define OPENCL_ENDIF() "\n #""endif " " \n"
33#define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
34#define STRINGIFY(...) #__VA_ARGS__ "\n"
35
36typedef struct _FloatPixelPacket
37{
cristya22457d2013-12-07 14:03:06 +000038#ifdef MAGICK_PIXEL_RGBA
cristyf034abb2013-11-24 14:16:14 +000039 MagickRealType
40 red,
41 green,
42 blue,
cristya22457d2013-12-07 14:03:06 +000043 opacity;
44#endif
45#ifdef MAGICK_PIXEL_BGRA
46 MagickRealType
47 blue,
48 green,
49 red,
50 opacity;
51#endif
cristyf034abb2013-11-24 14:16:14 +000052} FloatPixelPacket;
53
54const char* accelerateKernels =
55 STRINGIFY(
56 typedef enum
57 {
58 UndefinedChannel,
59 RedChannel = 0x0001,
60 GrayChannel = 0x0001,
61 CyanChannel = 0x0001,
62 GreenChannel = 0x0002,
63 MagentaChannel = 0x0002,
64 BlueChannel = 0x0004,
65 YellowChannel = 0x0004,
66 AlphaChannel = 0x0008,
67 OpacityChannel = 0x0008,
68 MatteChannel = 0x0008, /* deprecated */
69 BlackChannel = 0x0020,
70 IndexChannel = 0x0020,
71 CompositeChannels = 0x002F,
72 AllChannels = 0x7ffffff,
73 /*
74 Special purpose channel types.
75 */
76 TrueAlphaChannel = 0x0040, /* extract actual alpha channel from opacity */
77 RGBChannels = 0x0080, /* set alpha from grayscale mask in RGB */
78 GrayChannels = 0x0080,
79 SyncChannels = 0x0100, /* channels should be modified equally */
80 DefaultChannels = ((AllChannels | SyncChannels) &~ OpacityChannel)
81 } ChannelType;
82 )
83
84 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
85
86 STRINGIFY(
87 inline CLQuantum ScaleCharToQuantum(const unsigned char value)
88 {
89 return((CLQuantum) value);
90 }
91 )
92
93 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
94
95 STRINGIFY(
96 inline CLQuantum ScaleCharToQuantum(const unsigned char value)
97 {
98 return((CLQuantum) (257.0f*value));
99 }
100 )
101
102 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
103
104 STRINGIFY(
105 inline CLQuantum ScaleCharToQuantum(const unsigned char value)
106 {
107 return((Quantum) (16843009.0*value));
108 }
109 )
110
111 OPENCL_ENDIF()
112
113
114 STRINGIFY(
115 inline int ClampToCanvas(const int offset,const int range)
116 {
117 return clamp(offset, (int)0, range-1);
118 }
119 )
120
121 STRINGIFY(
122 inline int ClampToCanvasWithHalo(const int offset,const int range, const int edge, const int section)
123 {
124 return clamp(offset, section?(int)(0-edge):(int)0, section?(range-1):(range-1+edge));
125 }
126 )
127
128 STRINGIFY(
129 inline CLQuantum ClampToQuantum(const float value)
130 {
131 return (CLQuantum) (clamp(value, 0.0f, (float) QuantumRange) + 0.5f);
132 }
133 )
134
135 STRINGIFY(
136 inline uint ScaleQuantumToMap(CLQuantum value)
137 {
138 if (value >= (CLQuantum) MaxMap)
139 return ((uint)MaxMap);
140 else
141 return ((uint)value);
142 }
143 )
144
145 STRINGIFY(
146 inline float PerceptibleReciprocal(const float x)
147 {
148 float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
149 return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
150 }
151 )
152
153 OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
154
155 STRINGIFY(
156
157 inline CLQuantum getBlue(CLPixelType p) { return p.x; }
158 inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
159 inline float getBlueF4(float4 p) { return p.x; }
160 inline void setBlueF4(float4* p, float value) { (*p).x = value; }
161
162 inline CLQuantum getGreen(CLPixelType p) { return p.y; }
163 inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
164 inline float getGreenF4(float4 p) { return p.y; }
165 inline void setGreenF4(float4* p, float value) { (*p).y = value; }
166
167 inline CLQuantum getRed(CLPixelType p) { return p.z; }
168 inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
169 inline float getRedF4(float4 p) { return p.z; }
170 inline void setRedF4(float4* p, float value) { (*p).z = value; }
171
172 inline CLQuantum getOpacity(CLPixelType p) { return p.w; }
173 inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
174 inline float getOpacityF4(float4 p) { return p.w; }
175 inline void setOpacityF4(float4* p, float value) { (*p).w = value; }
176
177 inline float GetPixelIntensity(int colorspace, CLPixelType p)
178 {
179 // this is for default intensity and sRGB (not RGB) color space
180 float red = getRed(p);
181 float green = getGreen(p);
182 float blue = getBlue(p);
183
184 if (colorspace == 0)
185 return 0.212656*red+0.715158*green+0.072186*blue;
186 else
187 {
188 // need encode gamma
189 }
190 return 0.0;
191 }
192 )
193
194 STRINGIFY(
195 __kernel
cristya22457d2013-12-07 14:03:06 +0000196 void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
cristyf034abb2013-11-24 14:16:14 +0000197 const unsigned int imageWidth, const unsigned int imageHeight,
198 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
199 const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
200
201 int2 blockID;
202 blockID.x = get_group_id(0);
203 blockID.y = get_group_id(1);
204
205 // image area processed by this workgroup
206 int2 imageAreaOrg;
207 imageAreaOrg.x = blockID.x * get_local_size(0);
208 imageAreaOrg.y = blockID.y * get_local_size(1);
209
210 int2 midFilterDimen;
211 midFilterDimen.x = (filterWidth-1)/2;
212 midFilterDimen.y = (filterHeight-1)/2;
213
214 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
215
216 // dimension of the local cache
217 int2 cachedAreaDimen;
218 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
219 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
220
221 // cache the pixels accessed by this workgroup in local memory
222 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
223 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
224 int groupSize = get_local_size(0) * get_local_size(1);
225 for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
226
227 int2 cachedAreaIndex;
228 cachedAreaIndex.x = i % cachedAreaDimen.x;
229 cachedAreaIndex.y = i / cachedAreaDimen.x;
230
231 int2 imagePixelIndex;
232 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
233
234 // only support EdgeVirtualPixelMethod through ClampToCanvas
235 // TODO: implement other virtual pixel method
236 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
237 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
238
239 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
240 }
241
242 // cache the filter
243 for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
244 filterCache[i] = filter[i];
245 }
246 barrier(CLK_LOCAL_MEM_FENCE);
247
248
249 int2 imageIndex;
250 imageIndex.x = imageAreaOrg.x + get_local_id(0);
251 imageIndex.y = imageAreaOrg.y + get_local_id(1);
252
253 // if out-of-range, stops here and quit
254 if (imageIndex.x >= imageWidth
255 || imageIndex.y >= imageHeight) {
256 return;
257 }
258
259 int filterIndex = 0;
260 float4 sum = (float4)0.0f;
261 float gamma = 0.0f;
262 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
263 int cacheIndexY = get_local_id(1);
264 for (int j = 0; j < filterHeight; j++) {
265 int cacheIndexX = get_local_id(0);
266 for (int i = 0; i < filterWidth; i++) {
267 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
268 float f = filterCache[filterIndex];
269
270 sum.x += f * p.x;
271 sum.y += f * p.y;
272 sum.z += f * p.z;
273 sum.w += f * p.w;
274
275 gamma += f;
276 filterIndex++;
277 cacheIndexX++;
278 }
279 cacheIndexY++;
280 }
281 }
282 else {
283 int cacheIndexY = get_local_id(1);
284 for (int j = 0; j < filterHeight; j++) {
285 int cacheIndexX = get_local_id(0);
286 for (int i = 0; i < filterWidth; i++) {
287
288 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
289 float alpha = QuantumScale*(QuantumRange-p.w);
290 float f = filterCache[filterIndex];
291 float g = alpha * f;
292
293 sum.x += g*p.x;
294 sum.y += g*p.y;
295 sum.z += g*p.z;
296 sum.w += f*p.w;
297
298 gamma += g;
299 filterIndex++;
300 cacheIndexX++;
301 }
302 cacheIndexY++;
303 }
304 gamma = PerceptibleReciprocal(gamma);
305 sum.xyz = gamma*sum.xyz;
306 }
307 CLPixelType outputPixel;
308 outputPixel.x = ClampToQuantum(sum.x);
309 outputPixel.y = ClampToQuantum(sum.y);
310 outputPixel.z = ClampToQuantum(sum.z);
311 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
312
313 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
314 }
315 )
316
317 STRINGIFY(
cristya22457d2013-12-07 14:03:06 +0000318 __kernel
319 void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
320 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
321 const uint matte, const ChannelType channel) {
322
323 int2 imageIndex;
324 imageIndex.x = get_global_id(0);
325 imageIndex.y = get_global_id(1);
326
327 unsigned int imageWidth = get_global_size(0);
328 unsigned int imageHeight = get_global_size(1);
329
330 if (imageIndex.x >= imageWidth
331 || imageIndex.y >= imageHeight)
332 return;
333
334 int2 midFilterDimen;
335 midFilterDimen.x = (filterWidth-1)/2;
336 midFilterDimen.y = (filterHeight-1)/2;
337
338 int filterIndex = 0;
339 float4 sum = (float4)0.0f;
340 float gamma = 0.0f;
341 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
342 for (int j = 0; j < filterHeight; j++) {
343 int2 inputPixelIndex;
344 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
345 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
346 for (int i = 0; i < filterWidth; i++) {
347 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
348 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
349
350 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
351 float f = filter[filterIndex];
352
353 sum.x += f * p.x;
354 sum.y += f * p.y;
355 sum.z += f * p.z;
356 sum.w += f * p.w;
357
358 gamma += f;
359
360 filterIndex++;
361 }
362 }
363 }
364 else {
365
366 for (int j = 0; j < filterHeight; j++) {
367 int2 inputPixelIndex;
368 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
369 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
370 for (int i = 0; i < filterWidth; i++) {
371 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
372 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
373
374 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
375 float alpha = QuantumScale*(QuantumRange-p.w);
376 float f = filter[filterIndex];
377 float g = alpha * f;
378
379 sum.x += g*p.x;
380 sum.y += g*p.y;
381 sum.z += g*p.z;
382 sum.w += f*p.w;
383
384 gamma += g;
385
386
387 filterIndex++;
388 }
389 }
390 gamma = PerceptibleReciprocal(gamma);
391 sum.xyz = gamma*sum.xyz;
392 }
393
394 CLPixelType outputPixel;
395 outputPixel.x = ClampToQuantum(sum.x);
396 outputPixel.y = ClampToQuantum(sum.y);
397 outputPixel.z = ClampToQuantum(sum.z);
398 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
399
400 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
401 }
402 )
403
404 STRINGIFY(
cristyf034abb2013-11-24 14:16:14 +0000405 typedef enum
406 {
407 UndefinedFunction,
408 PolynomialFunction,
409 SinusoidFunction,
410 ArcsinFunction,
411 ArctanFunction
412 } MagickFunction;
413 )
414
415 STRINGIFY(
416
417 /*
418 apply FunctionImageChannel(braightness-contrast)
419 */
420 CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function,
421 const unsigned int number_parameters,
422 __constant float *parameters)
423 {
424 float4 result = (float4) 0.0f;
425 switch (function)
426 {
427 case PolynomialFunction:
428 {
429 for (unsigned int i=0; i < number_parameters; i++)
430 result = result*QuantumScale*convert_float4(pixel) + parameters[i];
431 result *= QuantumRange;
432 break;
433 }
434 case SinusoidFunction:
435 {
436 float freq,phase,ampl,bias;
437 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
438 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
439 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
440 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
441 result = QuantumRange*(ampl*sin(2.0f*MagickPI*
442 (freq*QuantumScale*convert_float4(pixel) + phase/360.0f)) + bias);
443 break;
444 }
445 case ArcsinFunction:
446 {
447 float width,range,center,bias;
448 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
449 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
450 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
451 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
452 result = 2.0f/width*(QuantumScale*convert_float4(pixel) - center);
453 result = range/MagickPI*asin(result)+bias;
454 result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
455 result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
456 result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
457 result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
458 result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
459 result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
460 result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
461 result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
462
463 result *= QuantumRange;
464 break;
465 }
466 case ArctanFunction:
467 {
468 float slope,range,center,bias;
469 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
470 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
471 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
472 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
473 result = MagickPI*slope*(QuantumScale*convert_float4(pixel)-center);
474 result = QuantumRange*(range/MagickPI*atan(result) + bias);
475 break;
476 }
477 case UndefinedFunction:
478 break;
479 }
480 return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
481 ClampToQuantum(result.z), ClampToQuantum(result.w));
482 }
483 )
484
485 STRINGIFY(
486 /*
487 Improve brightness / contrast of the image
488 channel : define which channel is improved
489 function : the function called to enchance the brightness contrast
490 number_parameters : numbers of parameters
491 parameters : the parameter
492 */
493 __kernel void FunctionImage(__global CLPixelType *im,
494 const ChannelType channel, const MagickFunction function,
495 const unsigned int number_parameters, __constant float *parameters)
496 {
497 const int x = get_global_id(0);
498 const int y = get_global_id(1);
499 const int columns = get_global_size(0);
500 const int c = x + y * columns;
501 im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
502 }
503 )
504
505 STRINGIFY(
506 /*
507 */
508 __kernel void Equalize(__global CLPixelType * restrict im,
509 const ChannelType channel,
510 __global CLPixelType * restrict equalize_map,
511 const float4 white, const float4 black)
512 {
513 const int x = get_global_id(0);
514 const int y = get_global_id(1);
515 const int columns = get_global_size(0);
516 const int c = x + y * columns;
517
518 uint ePos;
519 CLPixelType oValue, eValue;
520 CLQuantum red, green, blue, opacity;
521
522 //read from global
523 oValue=im[c];
524
525 if ((channel & SyncChannels) != 0)
526 {
527 if (getRedF4(white) != getRedF4(black))
528 {
529 ePos = ScaleQuantumToMap(getRed(oValue));
530 eValue = equalize_map[ePos];
531 red = getRed(eValue);
532 ePos = ScaleQuantumToMap(getGreen(oValue));
533 eValue = equalize_map[ePos];
534 green = getRed(eValue);
535 ePos = ScaleQuantumToMap(getBlue(oValue));
536 eValue = equalize_map[ePos];
537 blue = getRed(eValue);
538 ePos = ScaleQuantumToMap(getOpacity(oValue));
539 eValue = equalize_map[ePos];
540 opacity = getRed(eValue);
541
542 //write back
543 im[c]=(CLPixelType)(blue, green, red, opacity);
544 }
545
546 }
547
548 // for equalizing, we always need all channels?
549 // otherwise something more
550
551 }
552 )
553
554 STRINGIFY(
555 /*
556 */
557 __kernel void Histogram(__global CLPixelType * restrict im,
558 const ChannelType channel, const int colorspace,
559 __global uint4 * restrict histogram)
560 {
561 const int x = get_global_id(0);
562 const int y = get_global_id(1);
563 const int columns = get_global_size(0);
564 const int c = x + y * columns;
565 if ((channel & SyncChannels) != 0)
566 {
567 float intensity = GetPixelIntensity(colorspace,im[c]);
568 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
569 atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
570 }
571 else
572 {
573 // for equalizing, we always need all channels?
574 // otherwise something more
575 }
576 }
577 )
578
579 STRINGIFY(
580 /*
581 Reduce image noise and reduce detail levels by row
582 im: input pixels filtered_in filtered_im: output pixels
583 filter : convolve kernel width: convolve kernel size
584 channel : define which channel is blured
585 is_RGBA_BGRA : define the input is RGBA or BGRA
586 */
587 __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
588 const ChannelType channel, __constant float *filter,
589 const unsigned int width,
590 const unsigned int imageColumns, const unsigned int imageRows,
591 __local CLPixelType *temp)
592 {
593 const int x = get_global_id(0);
594 const int y = get_global_id(1);
595
cristyf034abb2013-11-24 14:16:14 +0000596 const int columns = imageColumns;
cristyf034abb2013-11-24 14:16:14 +0000597
598 const unsigned int radius = (width-1)/2;
599 const int wsize = get_local_size(0);
600 const unsigned int loadSize = wsize+width;
601
602 //load chunk only for now
603 //event_t e = async_work_group_copy(temp+radius, im+x+y*columns, wsize, 0);
604 //wait_group_events(1,&e);
605
606 //parallel load and clamp
607 /*
608 int count = 0;
609 for (int i=0; i < loadSize; i=i+wsize)
610 {
611 int currentX = x + wsize*(count++);
612
613 int localId = get_local_id(0);
614
615 if ((localId+i) > loadSize)
616 break;
617
618 temp[localId+i] = im[y*columns+ClampToCanvas(currentX-radius, columns)];
619
620 if (y==0 && get_group_id(0) == 0)
621 {
622 printf("(%d %d) temp %d load %d currentX %d\n", x, y, localId+i, ClampToCanvas(currentX-radius, columns), currentX);
623 }
624 }
625 */
626
627 //group coordinate
628 const int groupX=get_local_size(0)*get_group_id(0);
629 const int groupY=get_local_size(1)*get_group_id(1);
630
631 //parallel load and clamp
632 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
633 {
634 //int cx = ClampToCanvas(groupX+i, columns);
635 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
636
637 if (0 && y==0 && get_group_id(1) == 0)
638 {
639 printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
640 }
641 }
642
643 // barrier
644 barrier(CLK_LOCAL_MEM_FENCE);
645
646 // only do the work if this is not a patched item
647 if (get_global_id(0) < columns)
648 {
649 // compute
650 float4 result = (float4) 0;
651
652 int i = 0;
653
654 \n #ifndef UFACTOR \n
655 \n #define UFACTOR 8 \n
656 \n #endif \n
657
658 for ( ; i+UFACTOR < width; )
659 {
660 \n #pragma unroll UFACTOR\n
661 for (int j=0; j < UFACTOR; j++, i++)
662 {
663 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
664 }
665 }
666
667 for ( ; i < width; i++)
668 {
669 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
670 }
671
672 result.x = ClampToQuantum(result.x);
673 result.y = ClampToQuantum(result.y);
674 result.z = ClampToQuantum(result.z);
675 result.w = ClampToQuantum(result.w);
676
677 // write back to global
678 filtered_im[y*columns+x] = result;
679 }
680 }
681 )
682
683 STRINGIFY(
684 /*
685 Reduce image noise and reduce detail levels by row
686 im: input pixels filtered_in filtered_im: output pixels
687 filter : convolve kernel width: convolve kernel size
688 channel : define which channel is blured
689 is_RGBA_BGRA : define the input is RGBA or BGRA
690 */
691 __kernel void BlurRowSection(__global CLPixelType *im, __global float4 *filtered_im,
692 const ChannelType channel, __constant float *filter,
693 const unsigned int width,
694 const unsigned int imageColumns, const unsigned int imageRows,
695 __local CLPixelType *temp,
696 const unsigned int offsetRows, const unsigned int section)
697 {
698 const int x = get_global_id(0);
699 const int y = get_global_id(1);
700
cristyf034abb2013-11-24 14:16:14 +0000701 const int columns = imageColumns;
cristyf034abb2013-11-24 14:16:14 +0000702
703 const unsigned int radius = (width-1)/2;
704 const int wsize = get_local_size(0);
705 const unsigned int loadSize = wsize+width;
706
707 //group coordinate
708 const int groupX=get_local_size(0)*get_group_id(0);
709 const int groupY=get_local_size(1)*get_group_id(1);
710
711 //offset the input data, assuming section is 0, 1
712 im += imageColumns * (offsetRows - radius * section);
713
714 //parallel load and clamp
715 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
716 {
717 //int cx = ClampToCanvas(groupX+i, columns);
718 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
719
720 if (0 && y==0 && get_group_id(1) == 0)
721 {
722 printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
723 }
724 }
725
726 // barrier
727 barrier(CLK_LOCAL_MEM_FENCE);
728
729 // only do the work if this is not a patched item
730 if (get_global_id(0) < columns)
731 {
732 // compute
733 float4 result = (float4) 0;
734
735 int i = 0;
736
737 \n #ifndef UFACTOR \n
738 \n #define UFACTOR 8 \n
739 \n #endif \n
740
741 for ( ; i+UFACTOR < width; )
742 {
743 \n #pragma unroll UFACTOR\n
744 for (int j=0; j < UFACTOR; j++, i++)
745 {
746 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
747 }
748 }
749
750 for ( ; i < width; i++)
751 {
752 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
753 }
754
755 result.x = ClampToQuantum(result.x);
756 result.y = ClampToQuantum(result.y);
757 result.z = ClampToQuantum(result.z);
758 result.w = ClampToQuantum(result.w);
759
760 // write back to global
761 filtered_im[y*columns+x] = result;
762 }
763
764 }
765 )
766
767 STRINGIFY(
768 /*
769 Reduce image noise and reduce detail levels by line
770 im: input pixels filtered_in filtered_im: output pixels
771 filter : convolve kernel width: convolve kernel size
772 channel : define which channel is blured\
773 is_RGBA_BGRA : define the input is RGBA or BGRA
774 */
775 __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
776 const ChannelType channel, __constant float *filter,
777 const unsigned int width,
778 const unsigned int imageColumns, const unsigned int imageRows,
779 __local float4 *temp)
780 {
781 const int x = get_global_id(0);
782 const int y = get_global_id(1);
783
784 //const int columns = get_global_size(0);
785 //const int rows = get_global_size(1);
786 const int columns = imageColumns;
787 const int rows = imageRows;
788
789 unsigned int radius = (width-1)/2;
790 const int wsize = get_local_size(1);
791 const unsigned int loadSize = wsize+width;
792
793 //group coordinate
794 const int groupX=get_local_size(0)*get_group_id(0);
795 const int groupY=get_local_size(1)*get_group_id(1);
796 //notice that get_local_size(0) is 1, so
797 //groupX=get_group_id(0);
798
799 //parallel load and clamp
800 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
801 {
802 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
803 }
804
805 // barrier
806 barrier(CLK_LOCAL_MEM_FENCE);
807
808 // only do the work if this is not a patched item
809 if (get_global_id(1) < rows)
810 {
811 // compute
812 float4 result = (float4) 0;
813
814 int i = 0;
815
816 \n #ifndef UFACTOR \n
817 \n #define UFACTOR 8 \n
818 \n #endif \n
819
820 for ( ; i+UFACTOR < width; )
821 {
822 \n #pragma unroll UFACTOR \n
823 for (int j=0; j < UFACTOR; j++, i++)
824 {
825 result+=filter[i]*temp[i+get_local_id(1)];
826 }
827 }
828
829 for ( ; i < width; i++)
830 {
831 result+=filter[i]*temp[i+get_local_id(1)];
832 }
833
834 result.x = ClampToQuantum(result.x);
835 result.y = ClampToQuantum(result.y);
836 result.z = ClampToQuantum(result.z);
837 result.w = ClampToQuantum(result.w);
838
839 // write back to global
840 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
841 }
842
843 }
844 )
845
846
847 STRINGIFY(
848 /*
849 Reduce image noise and reduce detail levels by line
850 im: input pixels filtered_in filtered_im: output pixels
851 filter : convolve kernel width: convolve kernel size
852 channel : define which channel is blured\
853 is_RGBA_BGRA : define the input is RGBA or BGRA
854 */
855 __kernel void BlurColumnSection(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
856 const ChannelType channel, __constant float *filter,
857 const unsigned int width,
858 const unsigned int imageColumns, const unsigned int imageRows,
859 __local float4 *temp,
860 const unsigned int offsetRows, const unsigned int section)
861 {
862 const int x = get_global_id(0);
863 const int y = get_global_id(1);
864
865 //const int columns = get_global_size(0);
866 //const int rows = get_global_size(1);
867 const int columns = imageColumns;
868 const int rows = imageRows;
869
870 unsigned int radius = (width-1)/2;
871 const int wsize = get_local_size(1);
872 const unsigned int loadSize = wsize+width;
873
874 //group coordinate
875 const int groupX=get_local_size(0)*get_group_id(0);
876 const int groupY=get_local_size(1)*get_group_id(1);
877 //notice that get_local_size(0) is 1, so
878 //groupX=get_group_id(0);
879
880 // offset the input data
881 blurRowData += imageColumns * radius * section;
882
883 //parallel load and clamp
884 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
885 {
886 int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
887 temp[i] = *(blurRowData+pos);
888 }
889
890 // barrier
891 barrier(CLK_LOCAL_MEM_FENCE);
892
893 // only do the work if this is not a patched item
894 if (get_global_id(1) < rows)
895 {
896 // compute
897 float4 result = (float4) 0;
898
899 int i = 0;
900
901 \n #ifndef UFACTOR \n
902 \n #define UFACTOR 8 \n
903 \n #endif \n
904
905 for ( ; i+UFACTOR < width; )
906 {
907 \n #pragma unroll UFACTOR \n
908 for (int j=0; j < UFACTOR; j++, i++)
909 {
910 result+=filter[i]*temp[i+get_local_id(1)];
911 }
912 }
913 for ( ; i < width; i++)
914 {
915 result+=filter[i]*temp[i+get_local_id(1)];
916 }
917
918 result.x = ClampToQuantum(result.x);
919 result.y = ClampToQuantum(result.y);
920 result.z = ClampToQuantum(result.z);
921 result.w = ClampToQuantum(result.w);
922
923 // offset the output data
924 filtered_im += imageColumns * offsetRows;
925
926 // write back to global
927 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
928 }
929
930 }
931 )
932
933
934 STRINGIFY(
935 __kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage,
936 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
937 const unsigned int imageColumns, const unsigned int imageRows,
938 __local float4* cachedData, __local float* cachedFilter,
939 const ChannelType channel, const __global float *filter, const unsigned int width,
940 const float gain, const float threshold)
941 {
942 const unsigned int radius = (width-1)/2;
943
944 // cache the pixel shared by the workgroup
945 const int groupX = get_group_id(0);
946 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
947 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
948
949 if (groupStartY >= 0
950 && groupStopY < imageRows) {
951 event_t e = async_work_group_strided_copy(cachedData
952 ,blurRowData+groupStartY*imageColumns+groupX
953 ,groupStopY-groupStartY,imageColumns,0);
954 wait_group_events(1,&e);
955 }
956 else {
957 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
958 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
959 }
960 barrier(CLK_LOCAL_MEM_FENCE);
961 }
962 // cache the filter as well
963 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
964 wait_group_events(1,&e);
965
966 // only do the work if this is not a patched item
967 //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
968 const int cy = get_global_id(1);
969
970 if (cy < imageRows) {
971 float4 blurredPixel = (float4) 0.0f;
972
973 int i = 0;
974
975 \n #ifndef UFACTOR \n
976 \n #define UFACTOR 8 \n
977 \n #endif \n
978
979 for ( ; i+UFACTOR < width; )
980 {
981 \n #pragma unroll UFACTOR \n
982 for (int j=0; j < UFACTOR; j++, i++)
983 {
984 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
985 }
986 }
987
988 for ( ; i < width; i++)
989 {
990 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
991 }
992
993 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
994 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
995
996 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
997 float4 outputPixel = inputImagePixel - blurredPixel;
998
999 float quantumThreshold = QuantumRange*threshold;
1000
1001 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
1002 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
1003
1004 //write back
1005 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
1006 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
1007
1008 }
1009 }
1010
1011 __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage,
1012 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1013 const unsigned int imageColumns, const unsigned int imageRows,
1014 __local float4* cachedData, __local float* cachedFilter,
1015 const ChannelType channel, const __global float *filter, const unsigned int width,
1016 const float gain, const float threshold,
1017 const unsigned int offsetRows, const unsigned int section)
1018 {
1019 const unsigned int radius = (width-1)/2;
1020
1021 // cache the pixel shared by the workgroup
1022 const int groupX = get_group_id(0);
1023 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
1024 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
1025
1026 // offset the input data
1027 blurRowData += imageColumns * radius * section;
1028
1029 if (groupStartY >= 0
1030 && groupStopY < imageRows) {
1031 event_t e = async_work_group_strided_copy(cachedData
1032 ,blurRowData+groupStartY*imageColumns+groupX
1033 ,groupStopY-groupStartY,imageColumns,0);
1034 wait_group_events(1,&e);
1035 }
1036 else {
1037 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
1038 int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX;
1039 cachedData[i] = *(blurRowData + pos);
1040 }
1041 barrier(CLK_LOCAL_MEM_FENCE);
1042 }
1043 // cache the filter as well
1044 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
1045 wait_group_events(1,&e);
1046
1047 // only do the work if this is not a patched item
1048 //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
1049 const int cy = get_global_id(1);
1050
1051 if (cy < imageRows) {
1052 float4 blurredPixel = (float4) 0.0f;
1053
1054 int i = 0;
1055
1056 \n #ifndef UFACTOR \n
1057 \n #define UFACTOR 8 \n
1058 \n #endif \n
1059
1060 for ( ; i+UFACTOR < width; )
1061 {
1062 \n #pragma unroll UFACTOR \n
1063 for (int j=0; j < UFACTOR; j++, i++)
1064 {
1065 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1066 }
1067 }
1068
1069 for ( ; i < width; i++)
1070 {
1071 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1072 }
1073
1074 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
1075 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
1076
1077 // offset the output data
1078 inputImage += imageColumns * offsetRows;
1079 filtered_im += imageColumns * offsetRows;
1080
1081 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
1082 float4 outputPixel = inputImagePixel - blurredPixel;
1083
1084 float quantumThreshold = QuantumRange*threshold;
1085
1086 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
1087 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
1088
1089 //write back
1090 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
1091 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
1092
1093 }
1094
1095 }
1096 )
1097
1098
1099
1100 STRINGIFY(
1101
1102 __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1103 , const unsigned int imageWidth, const unsigned int imageHeight
1104 , const int2 offset, const int polarity, const int matte) {
1105
1106 int x = get_global_id(0);
1107 int y = get_global_id(1);
1108
1109 CLPixelType v = inputImage[y*imageWidth+x];
1110
1111 int2 neighbor;
1112 neighbor.y = y + offset.y;
1113 neighbor.x = x + offset.x;
1114
1115 int2 clampedNeighbor;
1116 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1117 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1118
1119 CLPixelType r = (clampedNeighbor.x == neighbor.x
1120 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1121 :(CLPixelType)0;
1122
1123 int sv[4];
1124 sv[0] = (int)v.x;
1125 sv[1] = (int)v.y;
1126 sv[2] = (int)v.z;
1127 sv[3] = (int)v.w;
1128
1129 int sr[4];
1130 sr[0] = (int)r.x;
1131 sr[1] = (int)r.y;
1132 sr[2] = (int)r.z;
1133 sr[3] = (int)r.w;
1134
1135 if (polarity > 0) {
1136 \n #pragma unroll 4\n
1137 for (unsigned int i = 0; i < 4; i++) {
1138 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1139 }
1140 }
1141 else {
1142 \n #pragma unroll 4\n
1143 for (unsigned int i = 0; i < 4; i++) {
1144 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1145 }
1146
1147 }
1148
1149 v.x = (CLQuantum)sv[0];
1150 v.y = (CLQuantum)sv[1];
1151 v.z = (CLQuantum)sv[2];
1152
1153 if (matte!=0)
1154 v.w = (CLQuantum)sv[3];
1155
1156 outputImage[y*imageWidth+x] = v;
1157
1158 }
1159
1160
1161 )
1162
1163
1164
1165 STRINGIFY(
1166
1167 __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1168 , const unsigned int imageWidth, const unsigned int imageHeight
1169 , const int2 offset, const int polarity, const int matte) {
1170
1171 int x = get_global_id(0);
1172 int y = get_global_id(1);
1173
1174 CLPixelType v = inputImage[y*imageWidth+x];
1175
1176 int2 neighbor, clampedNeighbor;
1177
1178 neighbor.y = y + offset.y;
1179 neighbor.x = x + offset.x;
1180 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1181 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1182
1183 CLPixelType r = (clampedNeighbor.x == neighbor.x
1184 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1185 :(CLPixelType)0;
1186
1187
1188 neighbor.y = y - offset.y;
1189 neighbor.x = x - offset.x;
1190 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1191 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1192
1193 CLPixelType s = (clampedNeighbor.x == neighbor.x
1194 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1195 :(CLPixelType)0;
1196
1197
1198 int sv[4];
1199 sv[0] = (int)v.x;
1200 sv[1] = (int)v.y;
1201 sv[2] = (int)v.z;
1202 sv[3] = (int)v.w;
1203
1204 int sr[4];
1205 sr[0] = (int)r.x;
1206 sr[1] = (int)r.y;
1207 sr[2] = (int)r.z;
1208 sr[3] = (int)r.w;
1209
1210 int ss[4];
1211 ss[0] = (int)s.x;
1212 ss[1] = (int)s.y;
1213 ss[2] = (int)s.z;
1214 ss[3] = (int)s.w;
1215
1216 if (polarity > 0) {
1217 \n #pragma unroll 4\n
1218 for (unsigned int i = 0; i < 4; i++) {
1219 //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1220 //
1221 //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1222 //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1223 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1224 }
1225 }
1226 else {
1227 \n #pragma unroll 4\n
1228 for (unsigned int i = 0; i < 4; i++) {
1229 //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1230 //
1231 //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1232 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1233 }
1234 }
1235
1236 v.x = (CLQuantum)sv[0];
1237 v.y = (CLQuantum)sv[1];
1238 v.z = (CLQuantum)sv[2];
1239
1240 if (matte!=0)
1241 v.w = (CLQuantum)sv[3];
1242
1243 outputImage[y*imageWidth+x] = v;
1244
1245 }
1246
1247
1248 )
1249
1250
1251 STRINGIFY(
1252 __kernel void RadialBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im,
1253 const float4 bias,
1254 const unsigned int channel, const unsigned int matte,
1255 const float2 blurCenter,
1256 __constant float *cos_theta, __constant float *sin_theta,
1257 const unsigned int cossin_theta_size)
1258 {
1259 const int x = get_global_id(0);
1260 const int y = get_global_id(1);
1261 const int columns = get_global_size(0);
1262 const int rows = get_global_size(1);
1263 unsigned int step = 1;
1264 float center_x = (float) x - blurCenter.x;
1265 float center_y = (float) y - blurCenter.y;
1266 float radius = hypot(center_x, center_y);
1267
1268 //float blur_radius = hypot((float) columns/2.0f, (float) rows/2.0f);
1269 float blur_radius = hypot(blurCenter.x, blurCenter.y);
1270
1271 if (radius > MagickEpsilon)
1272 {
1273 step = (unsigned int) (blur_radius / radius);
1274 if (step == 0)
1275 step = 1;
1276 if (step >= cossin_theta_size)
1277 step = cossin_theta_size-1;
1278 }
1279
1280 float4 result;
1281 result.x = (float)bias.x;
1282 result.y = (float)bias.y;
1283 result.z = (float)bias.z;
1284 result.w = (float)bias.w;
1285 float normalize = 0.0f;
1286
1287 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1288 for (unsigned int i=0; i<cossin_theta_size; i+=step)
1289 {
1290 result += convert_float4(im[
1291 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
1292 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
1293 normalize += 1.0f;
1294 }
1295 normalize = PerceptibleReciprocal(normalize);
1296 result = result * normalize;
1297 }
1298 else {
1299 float gamma = 0.0f;
1300 for (unsigned int i=0; i<cossin_theta_size; i+=step)
1301 {
1302 float4 p = convert_float4(im[
1303 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
1304 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
1305
1306 float alpha = (float)(QuantumScale*(QuantumRange-p.w));
1307 result.x += alpha * p.x;
1308 result.y += alpha * p.y;
1309 result.z += alpha * p.z;
1310 result.w += p.w;
1311 gamma+=alpha;
1312 normalize += 1.0f;
1313 }
1314 gamma = PerceptibleReciprocal(gamma);
1315 normalize = PerceptibleReciprocal(normalize);
1316 result.x = gamma*result.x;
1317 result.y = gamma*result.y;
1318 result.z = gamma*result.z;
1319 result.w = normalize*result.w;
1320 }
1321 filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
1322 ClampToQuantum(result.z), ClampToQuantum(result.w));
1323 }
1324 )
1325
1326 STRINGIFY(
1327 typedef enum
1328 {
1329 UndefinedColorspace,
1330 RGBColorspace, /* Linear RGB colorspace */
1331 GRAYColorspace, /* greyscale (linear) image (faked 1 channel) */
1332 TransparentColorspace,
1333 OHTAColorspace,
1334 LabColorspace,
1335 XYZColorspace,
1336 YCbCrColorspace,
1337 YCCColorspace,
1338 YIQColorspace,
1339 YPbPrColorspace,
1340 YUVColorspace,
1341 CMYKColorspace, /* negared linear RGB with black separated */
1342 sRGBColorspace, /* Default: non-lienar sRGB colorspace */
1343 HSBColorspace,
1344 HSLColorspace,
1345 HWBColorspace,
1346 Rec601LumaColorspace,
1347 Rec601YCbCrColorspace,
1348 Rec709LumaColorspace,
1349 Rec709YCbCrColorspace,
1350 LogColorspace,
1351 CMYColorspace, /* negated linear RGB colorspace */
1352 LuvColorspace,
1353 HCLColorspace,
1354 LCHColorspace, /* alias for LCHuv */
1355 LMSColorspace,
1356 LCHabColorspace, /* Cylindrical (Polar) Lab */
1357 LCHuvColorspace, /* Cylindrical (Polar) Luv */
1358 scRGBColorspace,
1359 HSIColorspace,
1360 HSVColorspace, /* alias for HSB */
1361 HCLpColorspace,
1362 YDbDrColorspace
1363 } ColorspaceType;
1364 )
1365
1366
1367 STRINGIFY(
1368
1369 inline float3 ConvertRGBToHSB(CLPixelType pixel) {
1370 float3 HueSaturationBrightness;
1371 HueSaturationBrightness.x = 0.0f; // Hue
1372 HueSaturationBrightness.y = 0.0f; // Saturation
1373 HueSaturationBrightness.z = 0.0f; // Brightness
1374
1375 float r=(float) getRed(pixel);
1376 float g=(float) getGreen(pixel);
1377 float b=(float) getBlue(pixel);
1378
1379 float tmin=min(min(r,g),b);
1380 float tmax=max(max(r,g),b);
1381
1382 if (tmax!=0.0f) {
1383 float delta=tmax-tmin;
1384 HueSaturationBrightness.y=delta/tmax;
1385 HueSaturationBrightness.z=QuantumScale*tmax;
1386
1387 if (delta != 0.0f) {
1388 HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1389 HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1390 HueSaturationBrightness.x/=6.0f;
1391 HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1392 }
1393 }
1394 return HueSaturationBrightness;
1395 }
1396
1397 inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
1398
1399 float hue = HueSaturationBrightness.x;
1400 float brightness = HueSaturationBrightness.z;
1401 float saturation = HueSaturationBrightness.y;
1402
1403 CLPixelType rgb;
1404
1405 if (saturation == 0.0f) {
1406 setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
1407 setGreen(&rgb,getRed(rgb));
1408 setBlue(&rgb,getRed(rgb));
1409 }
1410 else {
1411
1412 float h=6.0f*(hue-floor(hue));
1413 float f=h-floor(h);
1414 float p=brightness*(1.0f-saturation);
1415 float q=brightness*(1.0f-saturation*f);
1416 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1417
1418 float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
1419 float clamped_t = ClampToQuantum(QuantumRange*t);
1420 float clamped_p = ClampToQuantum(QuantumRange*p);
1421 float clamped_q = ClampToQuantum(QuantumRange*q);
1422 int ih = (int)h;
1423 setRed(&rgb, (ih == 1)?clamped_q:
1424 (ih == 2 || ih == 3)?clamped_p:
1425 (ih == 4)?clamped_t:
1426 clampedBrightness);
1427
1428 setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1429 (ih == 3)?clamped_q:
1430 (ih == 4 || ih == 5)?clamped_p:
1431 clamped_t);
1432
1433 setBlue(&rgb, (ih == 2)?clamped_t:
1434 (ih == 3 || ih == 4)?clampedBrightness:
1435 (ih == 5)?clamped_q:
1436 clamped_p);
1437 }
1438 return rgb;
1439 }
1440
1441 __kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen)
1442 {
1443
1444 const int sign = sharpen!=0?1:-1;
1445 const int x = get_global_id(0);
1446 const int y = get_global_id(1);
1447 const int columns = get_global_size(0);
1448 const int c = x + y * columns;
1449
1450 CLPixelType pixel = im[c];
1451 float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
1452 float brightness = HueSaturationBrightness.z;
1453 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1454 brightness = clamp(brightness,0.0f,1.0f);
1455 HueSaturationBrightness.z = brightness;
1456
1457 CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
1458 filteredPixel.w = pixel.w;
1459 im[c] = filteredPixel;
1460 }
1461
1462
1463 )
1464
1465 STRINGIFY(
1466
1467 inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
1468 float *hue, float *saturation, float *lightness)
1469 {
1470 float
1471 c,
1472 tmax,
1473 tmin;
1474
1475 /*
1476 Convert RGB to HSL colorspace.
1477 */
1478 tmax=max(QuantumScale*red,max(QuantumScale*green, QuantumScale*blue));
1479 tmin=min(QuantumScale*red,min(QuantumScale*green, QuantumScale*blue));
1480
1481 c=tmax-tmin;
1482
1483 *lightness=(tmax+tmin)/2.0;
1484 if (c <= 0.0)
1485 {
1486 *hue=0.0;
1487 *saturation=0.0;
1488 return;
1489 }
1490
1491 if (tmax == (QuantumScale*red))
1492 {
1493 *hue=(QuantumScale*green-QuantumScale*blue)/c;
1494 if ((QuantumScale*green) < (QuantumScale*blue))
1495 *hue+=6.0;
1496 }
1497 else
1498 if (tmax == (QuantumScale*green))
1499 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
1500 else
1501 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
1502
1503 *hue*=60.0/360.0;
1504 if (*lightness <= 0.5)
1505 *saturation=c/(2.0*(*lightness));
1506 else
1507 *saturation=c/(2.0-2.0*(*lightness));
1508 }
1509
1510 inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
1511 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
1512 {
1513 float
1514 b,
1515 c,
1516 g,
1517 h,
1518 tmin,
1519 r,
1520 x;
1521
1522 /*
1523 Convert HSL to RGB colorspace.
1524 */
1525 h=hue*360.0;
1526 if (lightness <= 0.5)
1527 c=2.0*lightness*saturation;
1528 else
1529 c=(2.0-2.0*lightness)*saturation;
1530 tmin=lightness-0.5*c;
1531 h-=360.0*floor(h/360.0);
1532 h/=60.0;
1533 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
1534 switch ((int) floor(h))
1535 {
1536 case 0:
1537 {
1538 r=tmin+c;
1539 g=tmin+x;
1540 b=tmin;
1541 break;
1542 }
1543 case 1:
1544 {
1545 r=tmin+x;
1546 g=tmin+c;
1547 b=tmin;
1548 break;
1549 }
1550 case 2:
1551 {
1552 r=tmin;
1553 g=tmin+c;
1554 b=tmin+x;
1555 break;
1556 }
1557 case 3:
1558 {
1559 r=tmin;
1560 g=tmin+x;
1561 b=tmin+c;
1562 break;
1563 }
1564 case 4:
1565 {
1566 r=tmin+x;
1567 g=tmin;
1568 b=tmin+c;
1569 break;
1570 }
1571 case 5:
1572 {
1573 r=tmin+c;
1574 g=tmin;
1575 b=tmin+x;
1576 break;
1577 }
1578 default:
1579 {
1580 r=0.0;
1581 g=0.0;
1582 b=0.0;
1583 }
1584 }
1585 *red=ClampToQuantum(QuantumRange*r);
1586 *green=ClampToQuantum(QuantumRange*g);
1587 *blue=ClampToQuantum(QuantumRange*b);
1588 }
1589
1590 inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
1591 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
1592 {
1593 float
1594 hue,
1595 lightness,
1596 saturation;
1597
1598 /*
1599 Increase or decrease color lightness, saturation, or hue.
1600 */
1601 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
1602 hue+=0.5*(0.01*percent_hue-1.0);
1603 while (hue < 0.0)
1604 hue+=1.0;
1605 while (hue >= 1.0)
1606 hue-=1.0;
1607 saturation*=0.01*percent_saturation;
1608 lightness*=0.01*percent_lightness;
1609 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
1610 }
1611
1612 __kernel void Modulate(__global CLPixelType *im,
1613 const float percent_brightness,
1614 const float percent_hue,
1615 const float percent_saturation,
1616 const int colorspace)
1617 {
1618
1619 const int x = get_global_id(0);
1620 const int y = get_global_id(1);
1621 const int columns = get_global_size(0);
1622 const int c = x + y * columns;
1623
1624 CLPixelType pixel = im[c];
1625
1626 CLQuantum
1627 blue,
1628 green,
1629 red;
1630
1631 red=getRed(pixel);
1632 green=getGreen(pixel);
1633 blue=getBlue(pixel);
1634
1635 switch (colorspace)
1636 {
1637 case HSLColorspace:
1638 default:
1639 {
1640 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
1641 &red, &green, &blue);
1642 }
1643
1644 }
1645
1646 CLPixelType filteredPixel;
1647
1648 setRed(&filteredPixel, red);
1649 setGreen(&filteredPixel, green);
1650 setBlue(&filteredPixel, blue);
1651 filteredPixel.w = pixel.w;
1652
1653 im[c] = filteredPixel;
1654 }
1655 )
1656
1657 STRINGIFY(
1658 // Based on Box from resize.c
1659 float BoxResizeFilter(const float x)
1660 {
1661 return 1.0f;
1662 }
1663 )
1664
1665 STRINGIFY(
1666 // Based on CubicBC from resize.c
1667 float CubicBC(const float x,const __global float* resizeFilterCoefficients)
1668 {
1669 /*
1670 Cubic Filters using B,C determined values:
1671 Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
1672 Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
1673 Spline B = 1 C = 0 B-Spline Gaussian approximation
1674 Hermite B = 0 C = 0 B-Spline interpolator
1675
1676 See paper by Mitchell and Netravali, Reconstruction Filters in Computer
1677 Graphics Computer Graphics, Volume 22, Number 4, August 1988
1678 http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
1679 Mitchell.pdf.
1680
1681 Coefficents are determined from B,C values:
1682 P0 = ( 6 - 2*B )/6 = coeff[0]
1683 P1 = 0
1684 P2 = (-18 +12*B + 6*C )/6 = coeff[1]
1685 P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
1686 Q0 = ( 8*B +24*C )/6 = coeff[3]
1687 Q1 = ( -12*B -48*C )/6 = coeff[4]
1688 Q2 = ( 6*B +30*C )/6 = coeff[5]
1689 Q3 = ( - 1*B - 6*C )/6 = coeff[6]
1690
1691 which are used to define the filter:
1692
1693 P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
1694 Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
1695
1696 which ensures function is continuous in value and derivative (slope).
1697 */
1698 if (x < 1.0)
1699 return(resizeFilterCoefficients[0]+x*(x*
1700 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
1701 if (x < 2.0)
1702 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
1703 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
1704 return(0.0);
1705 }
1706 )
1707
1708 STRINGIFY(
1709 float Sinc(const float x)
1710 {
1711 if (x != 0.0f)
1712 {
1713 const float alpha=(float) (MagickPI*x);
1714 return sinpi(x)/alpha;
1715 }
1716 return(1.0f);
1717 }
1718 )
1719
1720 STRINGIFY(
1721 float Triangle(const float x)
1722 {
1723 /*
1724 1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
1725 a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
1726 for Sinc().
1727 */
1728 return ((x<1.0f)?(1.0f-x):0.0f);
1729 }
1730 )
1731
1732
1733 STRINGIFY(
1734 float Hanning(const float x)
1735 {
1736 /*
1737 Cosine window function:
1738 0.5+0.5*cos(pi*x).
1739 */
1740 const float cosine=cos((MagickPI*x));
1741 return(0.5f+0.5f*cosine);
1742 }
1743 )
1744
1745 STRINGIFY(
1746 float Hamming(const float x)
1747 {
1748 /*
1749 Offset cosine window function:
1750 .54 + .46 cos(pi x).
1751 */
1752 const float cosine=cos((MagickPI*x));
1753 return(0.54f+0.46f*cosine);
1754 }
1755 )
1756
1757 STRINGIFY(
1758 float Blackman(const float x)
1759 {
1760 /*
1761 Blackman: 2nd order cosine windowing function:
1762 0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
1763
1764 Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
1765 five flops.
1766 */
1767 const float cosine=cos((MagickPI*x));
1768 return(0.34f+cosine*(0.5f+cosine*0.16f));
1769 }
1770 )
1771
1772
1773 STRINGIFY(
1774 typedef enum {
1775 BoxWeightingFunction = 0,
1776 TriangleWeightingFunction,
1777 CubicBCWeightingFunction,
1778 HanningWeightingFunction,
1779 HammingWeightingFunction,
1780 BlackmanWeightingFunction,
1781 GaussianWeightingFunction,
1782 QuadraticWeightingFunction,
1783 JincWeightingFunction,
1784 SincWeightingFunction,
1785 SincFastWeightingFunction,
1786 KaiserWeightingFunction,
1787 WelshWeightingFunction,
1788 BohmanWeightingFunction,
1789 LagrangeWeightingFunction,
1790 CosineWeightingFunction,
1791 } ResizeWeightingFunctionType;
1792 )
1793
1794 STRINGIFY(
1795 inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
1796 {
1797 switch (filterType)
1798 {
1799 /* Call Sinc even for SincFast to get better precision on GPU
1800 and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
1801 case SincWeightingFunction:
1802 case SincFastWeightingFunction:
1803 return Sinc(x);
1804 case CubicBCWeightingFunction:
1805 return CubicBC(x,filterCoefficients);
1806 case BoxWeightingFunction:
1807 return BoxResizeFilter(x);
1808 case TriangleWeightingFunction:
1809 return Triangle(x);
1810 case HanningWeightingFunction:
1811 return Hanning(x);
1812 case HammingWeightingFunction:
1813 return Hamming(x);
1814 case BlackmanWeightingFunction:
1815 return Blackman(x);
1816
1817 default:
1818 return 0.0f;
1819 }
1820 }
1821 )
1822
1823
1824 STRINGIFY(
1825 inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
1826 , const ResizeWeightingFunctionType resizeWindowType
1827 , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
1828 {
1829 float scale;
1830 float xBlur = fabs(x/resizeFilterBlur);
1831 if (resizeWindowSupport < MagickEpsilon
1832 || resizeWindowType == BoxWeightingFunction)
1833 {
1834 scale = 1.0f;
1835 }
1836 else
1837 {
1838 scale = resizeFilterScale;
1839 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
1840 }
1841 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
1842 return weight;
1843 }
1844
1845 )
1846
1847 ;
1848 const char* accelerateKernels2 =
1849
1850 STRINGIFY(
1851
1852 inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
1853 return (numWorkItems/pixelPerWorkgroup);
1854 }
1855
1856 // returns the index of the pixel for the current workitem to compute.
1857 // returns -1 if this workitem doesn't need to participate in any computation
1858 inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
1859 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
1860 int pixelIndex = itemID/numWorkItemsPerPixel;
1861 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
1862 return pixelIndex;
1863 }
1864
1865 )
1866
1867 STRINGIFY(
cristye85d0f72013-11-27 02:25:43 +00001868 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
cristyf034abb2013-11-24 14:16:14 +00001869 void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
1870 , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
1871 , const int resizeFilterType, const int resizeWindowType
1872 , const __global float* resizeFilterCubicCoefficients
1873 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
1874 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
1875 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
1876
1877
1878 // calculate the range of resized image pixels computed by this workgroup
1879 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
1880 const unsigned int stopX = min(startX + pixelPerWorkgroup,filteredColumns);
1881 const unsigned int actualNumPixelToCompute = stopX - startX;
1882
1883 // calculate the range of input image pixels to cache
1884 float scale = max(1.0/xFactor+MagickEpsilon ,1.0f);
1885 const float support = max(scale*resizeFilterSupport,0.5f);
1886 scale = PerceptibleReciprocal(scale);
1887
1888 const int cacheRangeStartX = max((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
1889 const int cacheRangeEndX = min((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
1890
1891 // cache the input pixels into local memory
1892 const unsigned int y = get_global_id(1);
1893 event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
1894 wait_group_events(1,&e);
1895
1896 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
1897 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
1898 {
1899
1900 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
1901 const unsigned int chunkStopX = min(chunkStartX + pixelChunkSize, stopX);
1902 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
1903
1904 // determine which resized pixel computed by this workitem
1905 const unsigned int itemID = get_local_id(0);
1906 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
1907
1908 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
1909
1910 float4 filteredPixel = (float4)0.0f;
1911 float density = 0.0f;
1912 float gamma = 0.0f;
1913 // -1 means this workitem doesn't participate in the computation
1914 if (pixelIndex != -1) {
1915
1916 // x coordinated of the resized pixel computed by this workitem
1917 const int x = chunkStartX + pixelIndex;
1918
1919 // calculate how many steps required for this pixel
1920 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
1921 const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
1922 const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputColumns);
1923 const unsigned int n = stop - start;
1924
1925 // calculate how many steps this workitem will contribute
1926 unsigned int numStepsPerWorkItem = n / numItems;
1927 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
1928
1929 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
1930 if (startStep < n) {
1931 const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
1932
1933 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
1934 if (matte == 0) {
1935
1936 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
1937 float4 cp = convert_float4(inputImageCache[cacheIndex]);
1938
1939 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
1940 , (ResizeWeightingFunctionType)resizeWindowType
1941 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
1942
1943 filteredPixel += ((float4)weight)*cp;
1944 density+=weight;
1945 }
1946
1947
1948 }
1949 else {
1950 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
1951 CLPixelType p = inputImageCache[cacheIndex];
1952
1953 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
1954 , (ResizeWeightingFunctionType)resizeWindowType
1955 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
1956
1957 float alpha = weight * QuantumScale * GetPixelAlpha(p);
1958 float4 cp = convert_float4(p);
1959
1960 filteredPixel.x += alpha * cp.x;
1961 filteredPixel.y += alpha * cp.y;
1962 filteredPixel.z += alpha * cp.z;
1963 filteredPixel.w += weight * cp.w;
1964
1965 density+=weight;
1966 gamma+=alpha;
1967 }
1968 }
1969 }
1970 }
1971
1972 // initialize the accumulators to zero
1973 if (itemID < actualNumPixelInThisChunk) {
1974 outputPixelCache[itemID] = (float4)0.0f;
1975 densityCache[itemID] = 0.0f;
1976 if (matte != 0)
1977 gammaCache[itemID] = 0.0f;
1978 }
1979 barrier(CLK_LOCAL_MEM_FENCE);
1980
1981 // accumulatte the filtered pixel value and the density
1982 for (unsigned int i = 0; i < numItems; i++) {
1983 if (pixelIndex != -1) {
1984 if (itemID%numItems == i) {
1985 outputPixelCache[pixelIndex]+=filteredPixel;
1986 densityCache[pixelIndex]+=density;
1987 if (matte!=0) {
1988 gammaCache[pixelIndex]+=gamma;
1989 }
1990 }
1991 }
1992 barrier(CLK_LOCAL_MEM_FENCE);
1993 }
1994
1995 if (itemID < actualNumPixelInThisChunk) {
1996 if (matte==0) {
1997 float density = densityCache[itemID];
1998 float4 filteredPixel = outputPixelCache[itemID];
1999 if (density!= 0.0f && density != 1.0)
2000 {
2001 density = PerceptibleReciprocal(density);
2002 filteredPixel *= (float4)density;
2003 }
2004 filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2005 , ClampToQuantum(filteredPixel.y)
2006 , ClampToQuantum(filteredPixel.z)
2007 , ClampToQuantum(filteredPixel.w));
2008 }
2009 else {
2010 float density = densityCache[itemID];
2011 float gamma = gammaCache[itemID];
2012 float4 filteredPixel = outputPixelCache[itemID];
2013
2014 if (density!= 0.0f && density != 1.0) {
2015 density = PerceptibleReciprocal(density);
2016 filteredPixel *= (float4)density;
2017 gamma *= density;
2018 }
2019 gamma = PerceptibleReciprocal(gamma);
2020
2021 CLPixelType fp;
2022 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2023 , ClampToQuantum(gamma*filteredPixel.y)
2024 , ClampToQuantum(gamma*filteredPixel.z)
2025 , ClampToQuantum(filteredPixel.w));
2026
2027 filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
2028
2029 }
2030 }
2031
2032 } // end of chunking loop
2033 }
2034 )
2035
2036
2037
2038 STRINGIFY(
2039 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2040 void ResizeHorizontalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2041 , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2042 , const int resizeFilterType, const int resizeWindowType
2043 , const __global float* resizeFilterCubicCoefficients
2044 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2045 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2046 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2047
2048 ResizeHorizontalFilter(inputImage,inputColumns,inputRows,matte
2049 ,xFactor, filteredImage, filteredColumns, filteredRows
2050 ,SincWeightingFunction, SincWeightingFunction
2051 ,resizeFilterCubicCoefficients
2052 ,resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur
2053 ,inputImageCache, numCachedPixels, pixelPerWorkgroup, pixelChunkSize
2054 ,outputPixelCache, densityCache, gammaCache);
2055
2056 }
2057 )
2058
2059
2060 STRINGIFY(
cristye85d0f72013-11-27 02:25:43 +00002061 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
cristyf034abb2013-11-24 14:16:14 +00002062 void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2063 , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2064 , const int resizeFilterType, const int resizeWindowType
2065 , const __global float* resizeFilterCubicCoefficients
2066 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2067 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2068 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2069
2070
2071 // calculate the range of resized image pixels computed by this workgroup
2072 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2073 const unsigned int stopY = min(startY + pixelPerWorkgroup,filteredRows);
2074 const unsigned int actualNumPixelToCompute = stopY - startY;
2075
2076 // calculate the range of input image pixels to cache
2077 float scale = max(1.0/yFactor+MagickEpsilon ,1.0f);
2078 const float support = max(scale*resizeFilterSupport,0.5f);
2079 scale = PerceptibleReciprocal(scale);
2080
2081 const int cacheRangeStartY = max((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
2082 const int cacheRangeEndY = min((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
2083
2084 // cache the input pixels into local memory
2085 const unsigned int x = get_global_id(0);
2086 event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
2087 wait_group_events(1,&e);
2088
2089 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2090 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2091 {
2092
2093 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2094 const unsigned int chunkStopY = min(chunkStartY + pixelChunkSize, stopY);
2095 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2096
2097 // determine which resized pixel computed by this workitem
2098 const unsigned int itemID = get_local_id(1);
2099 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2100
2101 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2102
2103 float4 filteredPixel = (float4)0.0f;
2104 float density = 0.0f;
2105 float gamma = 0.0f;
2106 // -1 means this workitem doesn't participate in the computation
2107 if (pixelIndex != -1) {
2108
2109 // x coordinated of the resized pixel computed by this workitem
2110 const int y = chunkStartY + pixelIndex;
2111
2112 // calculate how many steps required for this pixel
2113 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
2114 const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
2115 const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputRows);
2116 const unsigned int n = stop - start;
2117
2118 // calculate how many steps this workitem will contribute
2119 unsigned int numStepsPerWorkItem = n / numItems;
2120 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2121
2122 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2123 if (startStep < n) {
2124 const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
2125
2126 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2127 if (matte == 0) {
2128
2129 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2130 float4 cp = convert_float4(inputImageCache[cacheIndex]);
2131
2132 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2133 , (ResizeWeightingFunctionType)resizeWindowType
2134 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2135
2136 filteredPixel += ((float4)weight)*cp;
2137 density+=weight;
2138 }
2139
2140
2141 }
2142 else {
2143 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2144 CLPixelType p = inputImageCache[cacheIndex];
2145
2146 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2147 , (ResizeWeightingFunctionType)resizeWindowType
2148 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2149
2150 float alpha = weight * QuantumScale * GetPixelAlpha(p);
2151 float4 cp = convert_float4(p);
2152
2153 filteredPixel.x += alpha * cp.x;
2154 filteredPixel.y += alpha * cp.y;
2155 filteredPixel.z += alpha * cp.z;
2156 filteredPixel.w += weight * cp.w;
2157
2158 density+=weight;
2159 gamma+=alpha;
2160 }
2161 }
2162 }
2163 }
2164
2165 // initialize the accumulators to zero
2166 if (itemID < actualNumPixelInThisChunk) {
2167 outputPixelCache[itemID] = (float4)0.0f;
2168 densityCache[itemID] = 0.0f;
2169 if (matte != 0)
2170 gammaCache[itemID] = 0.0f;
2171 }
2172 barrier(CLK_LOCAL_MEM_FENCE);
2173
2174 // accumulatte the filtered pixel value and the density
2175 for (unsigned int i = 0; i < numItems; i++) {
2176 if (pixelIndex != -1) {
2177 if (itemID%numItems == i) {
2178 outputPixelCache[pixelIndex]+=filteredPixel;
2179 densityCache[pixelIndex]+=density;
2180 if (matte!=0) {
2181 gammaCache[pixelIndex]+=gamma;
2182 }
2183 }
2184 }
2185 barrier(CLK_LOCAL_MEM_FENCE);
2186 }
2187
2188 if (itemID < actualNumPixelInThisChunk) {
2189 if (matte==0) {
2190 float density = densityCache[itemID];
2191 float4 filteredPixel = outputPixelCache[itemID];
2192 if (density!= 0.0f && density != 1.0)
2193 {
2194 density = PerceptibleReciprocal(density);
2195 filteredPixel *= (float4)density;
2196 }
2197 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2198 , ClampToQuantum(filteredPixel.y)
2199 , ClampToQuantum(filteredPixel.z)
2200 , ClampToQuantum(filteredPixel.w));
2201 }
2202 else {
2203 float density = densityCache[itemID];
2204 float gamma = gammaCache[itemID];
2205 float4 filteredPixel = outputPixelCache[itemID];
2206
2207 if (density!= 0.0f && density != 1.0) {
2208 density = PerceptibleReciprocal(density);
2209 filteredPixel *= (float4)density;
2210 gamma *= density;
2211 }
2212 gamma = PerceptibleReciprocal(gamma);
2213
2214 CLPixelType fp;
2215 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2216 , ClampToQuantum(gamma*filteredPixel.y)
2217 , ClampToQuantum(gamma*filteredPixel.z)
2218 , ClampToQuantum(filteredPixel.w));
2219
2220 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
2221
2222 }
2223 }
2224
2225 } // end of chunking loop
2226 }
2227 )
2228
2229
2230
2231 STRINGIFY(
cristye85d0f72013-11-27 02:25:43 +00002232 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
cristyf034abb2013-11-24 14:16:14 +00002233 void ResizeVerticalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2234 , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2235 , const int resizeFilterType, const int resizeWindowType
2236 , const __global float* resizeFilterCubicCoefficients
2237 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2238 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2239 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2240 ResizeVerticalFilter(inputImage,inputColumns,inputRows,matte
2241 ,yFactor,filteredImage,filteredColumns,filteredRows
2242 ,SincWeightingFunction, SincWeightingFunction
2243 ,resizeFilterCubicCoefficients
2244 ,resizeFilterScale,resizeFilterSupport,resizeFilterWindowSupport,resizeFilterBlur
2245 ,inputImageCache,numCachedPixels,pixelPerWorkgroup,pixelChunkSize
2246 ,outputPixelCache,densityCache,gammaCache);
2247 }
2248 )
cristye85d0f72013-11-27 02:25:43 +00002249
2250 STRINGIFY(
2251
2252
2253 __kernel void randomNumberGeneratorKernel(__global uint* seeds, const float normalizeRand
2254 , __global float* randomNumbers, const uint init
2255 ,const uint numRandomNumbers) {
2256
2257 unsigned int id = get_global_id(0);
2258 unsigned int seed[4];
2259
2260 if (init!=0) {
2261 seed[0] = seeds[id*4];
2262 seed[1] = 0x50a7f451;
2263 seed[2] = 0x5365417e;
2264 seed[3] = 0xc3a4171a;
2265 }
2266 else {
2267 seed[0] = seeds[id*4];
2268 seed[1] = seeds[id*4+1];
2269 seed[2] = seeds[id*4+2];
2270 seed[3] = seeds[id*4+3];
2271 }
2272
2273 unsigned int numRandomNumbersPerItem = (numRandomNumbers+get_global_size(0)-1)/get_global_size(0);
2274 for (unsigned int i = 0; i < numRandomNumbersPerItem; i++) {
2275 do
2276 {
2277 unsigned int alpha=(unsigned int) (seed[1] ^ (seed[1] << 11));
2278 seed[1]=seed[2];
2279 seed[2]=seed[3];
2280 seed[3]=seed[0];
2281 seed[0]=(seed[0] ^ (seed[0] >> 19)) ^ (alpha ^ (alpha >> 8));
2282 } while (seed[0] == ~0UL);
2283 unsigned int pos = (get_group_id(0)*get_local_size(0)*numRandomNumbersPerItem)
2284 + get_local_size(0) * i + get_local_id(0);
2285
2286 if (pos >= numRandomNumbers)
2287 break;
2288 randomNumbers[pos] = normalizeRand*seed[0];
2289 }
2290
2291 /* save the seeds for the time*/
2292 seeds[id*4] = seed[0];
2293 seeds[id*4+1] = seed[1];
2294 seeds[id*4+2] = seed[2];
2295 seeds[id*4+3] = seed[3];
2296 }
2297
2298 )
2299
2300
2301 STRINGIFY(
2302
2303 typedef enum
2304 {
2305 UndefinedNoise,
2306 UniformNoise,
2307 GaussianNoise,
2308 MultiplicativeGaussianNoise,
2309 ImpulseNoise,
2310 LaplacianNoise,
2311 PoissonNoise,
2312 RandomNoise
2313 } NoiseType;
2314
2315 typedef struct {
2316 const global float* rns;
2317 } RandomNumbers;
2318
2319
2320 float GetPseudoRandomValue(RandomNumbers* r) {
2321 float v = *r->rns;
2322 r->rns++;
2323 return v;
2324 }
2325 )
2326
2327 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
2328 OPENCL_DEFINE(SigmaGaussian,(attenuate*0.015625f))
2329 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
2330 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
2331 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
2332 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
2333 OPENCL_DEFINE(SigmaRandom, (attenuate))
2334 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
2335
2336 STRINGIFY(
2337 float GenerateDifferentialNoise(RandomNumbers* r, CLQuantum pixel, NoiseType noise_type, float attenuate) {
2338
2339 float
2340 alpha,
2341 beta,
2342 noise,
2343 sigma;
2344
2345 noise = 0.0f;
2346 alpha=GetPseudoRandomValue(r);
2347 switch(noise_type) {
2348 case UniformNoise:
2349 default:
2350 {
2351 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
2352 break;
2353 }
2354 case GaussianNoise:
2355 {
2356 float
2357 gamma,
2358 tau;
2359
2360 if (alpha == 0.0f)
2361 alpha=1.0f;
2362 beta=GetPseudoRandomValue(r);
2363 gamma=sqrt(-2.0f*log(alpha));
2364 sigma=gamma*cospi((2.0f*beta));
2365 tau=gamma*sinpi((2.0f*beta));
2366 noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+
2367 QuantumRange*TauGaussian*tau);
2368 break;
2369 }
2370
2371
2372 case ImpulseNoise:
2373 {
2374 if (alpha < (SigmaImpulse/2.0f))
2375 noise=0.0f;
2376 else
2377 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
2378 noise=(float)QuantumRange;
2379 else
2380 noise=(float)pixel;
2381 break;
2382 }
2383 case LaplacianNoise:
2384 {
2385 if (alpha <= 0.5f)
2386 {
2387 if (alpha <= MagickEpsilon)
2388 noise=(float) (pixel-QuantumRange);
2389 else
2390 noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
2391 0.5f);
2392 break;
2393 }
2394 beta=1.0f-alpha;
2395 if (beta <= (0.5f*MagickEpsilon))
2396 noise=(float) (pixel+QuantumRange);
2397 else
2398 noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
2399 break;
2400 }
2401 case MultiplicativeGaussianNoise:
2402 {
2403 sigma=1.0f;
2404 if (alpha > MagickEpsilon)
2405 sigma=sqrt(-2.0f*log(alpha));
2406 beta=GetPseudoRandomValue(r);
2407 noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
2408 cospi((float) (2.0f*beta))/2.0f);
2409 break;
2410 }
2411 case PoissonNoise:
2412 {
2413 float
2414 poisson;
2415 unsigned int i;
2416 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
2417 for (i=0; alpha > poisson; i++)
2418 {
2419 beta=GetPseudoRandomValue(r);
2420 alpha*=beta;
2421 }
2422 noise=(float) (QuantumRange*i/SigmaPoisson);
2423 break;
2424 }
2425 case RandomNoise:
2426 {
2427 noise=(float) (QuantumRange*SigmaRandom*alpha);
2428 break;
2429 }
2430
2431 };
2432 return noise;
2433 }
2434
2435 __kernel
2436 void AddNoiseImage(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
2437 ,const unsigned int inputColumns, const unsigned int inputRows
2438 ,const ChannelType channel
2439 ,const NoiseType noise_type, const float attenuate
2440 ,const __global float* randomNumbers, const unsigned int numRandomNumbersPerPixel
2441 ,const unsigned int rowOffset) {
2442
2443 unsigned int x = get_global_id(0);
2444 unsigned int y = get_global_id(1) + rowOffset;
2445 RandomNumbers r;
2446 r.rns = randomNumbers + (get_global_id(1) * inputColumns + get_global_id(0))*numRandomNumbersPerPixel;
2447
2448 CLPixelType p = inputImage[y*inputColumns+x];
2449 CLPixelType q = filteredImage[y*inputColumns+x];
2450
2451 if ((channel&RedChannel)!=0) {
2452 setRed(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getRed(p),noise_type,attenuate)));
2453 }
2454
2455 if ((channel&GreenChannel)!=0) {
2456 setGreen(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getGreen(p),noise_type,attenuate)));
2457 }
2458
2459 if ((channel&BlueChannel)!=0) {
2460 setBlue(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getBlue(p),noise_type,attenuate)));
2461 }
2462
2463 if ((channel & OpacityChannel) != 0) {
2464 setOpacity(&q,ClampToQuantum(GenerateDifferentialNoise(&r,getOpacity(p),noise_type,attenuate)));
2465 }
2466
2467 filteredImage[y*inputColumns+x] = q;
2468 }
2469
2470 )
cristyf034abb2013-11-24 14:16:14 +00002471 ;
2472
2473
cristye85d0f72013-11-27 02:25:43 +00002474
2475
cristyf034abb2013-11-24 14:16:14 +00002476#endif // MAGICKCORE_OPENCL_SUPPORT
2477
cristye1101892013-08-18 00:53:18 +00002478#if defined(__cplusplus) || defined(c_plusplus)
2479}
2480#endif
2481
cristyf034abb2013-11-24 14:16:14 +00002482#endif // _MAGICKCORE_ACCELERATE_PRIVATE_H