cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 3 | % % |
| 4 | % % |
| 5 | % % |
| 6 | % AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE % |
| 7 | % A A C C E L E R R A A T E % |
| 8 | % AAAAA C C EEE L EEE RRRR AAAAA T EEE % |
| 9 | % A A C C E L E R R A A T E % |
| 10 | % A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE % |
| 11 | % % |
| 12 | % % |
| 13 | % MagickCore Acceleration Methods % |
| 14 | % % |
| 15 | % Software Design % |
cristy | 0d127ab | 2010-05-14 23:29:46 +0000 | [diff] [blame] | 16 | % John Cristy % |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 17 | % January 2010 % |
| 18 | % % |
| 19 | % % |
cristy | 7e41fe8 | 2010-12-04 23:12:08 +0000 | [diff] [blame] | 20 | % Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization % |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 21 | % dedicated to making software imaging solutions freely available. % |
| 22 | % % |
| 23 | % You may not use this file except in compliance with the License. You may % |
| 24 | % obtain a copy of the License at % |
| 25 | % % |
| 26 | % http://www.imagemagick.org/script/license.php % |
| 27 | % % |
| 28 | % Unless required by applicable law or agreed to in writing, software % |
| 29 | % distributed under the License is distributed on an "AS IS" BASIS, % |
| 30 | % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. % |
| 31 | % See the License for the specific language governing permissions and % |
| 32 | % limitations under the License. % |
| 33 | % % |
| 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 35 | % |
cristy | 0d127ab | 2010-05-14 23:29:46 +0000 | [diff] [blame] | 36 | % Morphology is the the application of various kernals, of any size and even |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 37 | % shape, to a image in various ways (typically binary, but not always). |
| 38 | % |
| 39 | % Convolution (weighted sum or average) is just one specific type of |
| 40 | % accelerate. Just one that is very common for image bluring and sharpening |
| 41 | % effects. Not only 2D Gaussian blurring, but also 2-pass 1D Blurring. |
| 42 | % |
| 43 | % This module provides not only a general accelerate function, and the ability |
| 44 | % to apply more advanced or iterative morphologies, but also functions for the |
| 45 | % generation of many different types of kernel arrays from user supplied |
| 46 | % arguments. Prehaps even the generation of a kernel from a small image. |
| 47 | */ |
| 48 | |
| 49 | /* |
| 50 | Include declarations. |
| 51 | */ |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 52 | #include "MagickCore/studio.h" |
| 53 | #include "MagickCore/accelerate.h" |
| 54 | #include "MagickCore/artifact.h" |
cristy | 35f3349 | 2011-07-07 16:54:49 +0000 | [diff] [blame] | 55 | #include "MagickCore/cache.h" |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 56 | #include "MagickCore/cache-view.h" |
| 57 | #include "MagickCore/color-private.h" |
| 58 | #include "MagickCore/enhance.h" |
| 59 | #include "MagickCore/exception.h" |
| 60 | #include "MagickCore/exception-private.h" |
| 61 | #include "MagickCore/gem.h" |
| 62 | #include "MagickCore/hashmap.h" |
| 63 | #include "MagickCore/image.h" |
| 64 | #include "MagickCore/image-private.h" |
| 65 | #include "MagickCore/list.h" |
| 66 | #include "MagickCore/memory_.h" |
| 67 | #include "MagickCore/monitor-private.h" |
| 68 | #include "MagickCore/accelerate.h" |
| 69 | #include "MagickCore/option.h" |
| 70 | #include "MagickCore/pixel-accessor.h" |
| 71 | #include "MagickCore/prepress.h" |
| 72 | #include "MagickCore/quantize.h" |
| 73 | #include "MagickCore/registry.h" |
| 74 | #include "MagickCore/semaphore.h" |
| 75 | #include "MagickCore/splay-tree.h" |
| 76 | #include "MagickCore/statistic.h" |
| 77 | #include "MagickCore/string_.h" |
| 78 | #include "MagickCore/string-private.h" |
| 79 | #include "MagickCore/token.h" |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 80 | |
| 81 | /* |
| 82 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 83 | % % |
| 84 | % % |
| 85 | % % |
| 86 | % A c c e l e r a t e C o n v o l v e I m a g e % |
| 87 | % % |
| 88 | % % |
| 89 | % % |
| 90 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 91 | % |
| 92 | % AccelerateConvolveImage() applies a custom convolution kernel to the image. |
| 93 | % It is accelerated by taking advantage of speed-ups offered by executing in |
| 94 | % concert across heterogeneous platforms consisting of CPUs, GPUs, and other |
| 95 | % processors. |
| 96 | % |
| 97 | % The format of the AccelerateConvolveImage method is: |
| 98 | % |
| 99 | % Image *AccelerateConvolveImage(const Image *image, |
cristy | 2be1538 | 2010-01-21 02:38:03 +0000 | [diff] [blame] | 100 | % const KernelInfo *kernel,Image *convolve_image, |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 101 | % ExceptionInfo *exception) |
| 102 | % |
| 103 | % A description of each parameter follows: |
| 104 | % |
| 105 | % o image: the image. |
| 106 | % |
| 107 | % o kernel: the convolution kernel. |
| 108 | % |
| 109 | % o convole_image: the convoleed image. |
| 110 | % |
| 111 | % o exception: return any errors or warnings in this structure. |
| 112 | % |
| 113 | */ |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 114 | |
| 115 | #if defined(MAGICKCORE_OPENCL_SUPPORT) |
| 116 | |
| 117 | #if defined(MAGICKCORE_HDRI_SUPPORT) |
| 118 | #define CLOptions "-DMAGICKCORE_HDRI_SUPPORT=1 -DCLQuantum=float " \ |
| 119 | "-DCLPixelType=float4 -DQuantumRange=%g -DMagickEpsilon=%g" |
| 120 | #define CLPixelPacket cl_float4 |
| 121 | #else |
| 122 | #if (MAGICKCORE_QUANTUM_DEPTH == 8) |
| 123 | #define CLOptions "-DCLQuantum=uchar -DCLPixelType=uchar4 " \ |
| 124 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
| 125 | #define CLPixelPacket cl_uchar4 |
| 126 | #elif (MAGICKCORE_QUANTUM_DEPTH == 16) |
| 127 | #define CLOptions "-DCLQuantum=ushort -DCLPixelType=ushort4 " \ |
| 128 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
| 129 | #define CLPixelPacket cl_ushort4 |
| 130 | #elif (MAGICKCORE_QUANTUM_DEPTH == 32) |
| 131 | #define CLOptions "-DCLQuantum=uint -DCLPixelType=uint4 " \ |
| 132 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
| 133 | #define CLPixelPacket cl_uint4 |
| 134 | #elif (MAGICKCORE_QUANTUM_DEPTH == 32) |
cristy | bb50337 | 2010-05-27 20:51:26 +0000 | [diff] [blame] | 135 | #define CLOptions "-DCLQuantum=ussize_t -DCLPixelType=ussize_t4 " \ |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 136 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 137 | #define CLPixelPacket cl_ulong4 |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 138 | #endif |
| 139 | #endif |
| 140 | |
| 141 | typedef struct _ConvolveInfo |
| 142 | { |
| 143 | cl_context |
| 144 | context; |
| 145 | |
| 146 | cl_device_id |
| 147 | *devices; |
| 148 | |
| 149 | cl_command_queue |
| 150 | command_queue; |
| 151 | |
| 152 | cl_kernel |
| 153 | kernel; |
| 154 | |
| 155 | cl_program |
| 156 | program; |
| 157 | |
| 158 | cl_mem |
| 159 | pixels, |
| 160 | convolve_pixels; |
| 161 | |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 162 | cl_ulong |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 163 | width, |
| 164 | height; |
| 165 | |
| 166 | cl_bool |
| 167 | matte; |
| 168 | |
| 169 | cl_mem |
| 170 | filter; |
| 171 | } ConvolveInfo; |
| 172 | |
| 173 | static char |
| 174 | *ConvolveKernel = |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 175 | "static inline long ClampToCanvas(const long offset,const unsigned long range)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 176 | "{\n" |
| 177 | " if (offset < 0L)\n" |
| 178 | " return(0L);\n" |
| 179 | " if (offset >= range)\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 180 | " return((long) (range-1L));\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 181 | " return(offset);\n" |
| 182 | "}\n" |
| 183 | "\n" |
| 184 | "static inline CLQuantum ClampToQuantum(const double value)\n" |
| 185 | "{\n" |
| 186 | "#if defined(MAGICKCORE_HDRI_SUPPORT)\n" |
| 187 | " return((CLQuantum) value)\n" |
| 188 | "#else\n" |
| 189 | " if (value < 0.0)\n" |
| 190 | " return((CLQuantum) 0);\n" |
| 191 | " if (value >= (double) QuantumRange)\n" |
| 192 | " return((CLQuantum) QuantumRange);\n" |
| 193 | " return((CLQuantum) (value+0.5));\n" |
| 194 | "#endif\n" |
| 195 | "}\n" |
| 196 | "\n" |
| 197 | "__kernel void Convolve(const __global CLPixelType *input,\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 198 | " __constant double *filter,const unsigned long width,const unsigned long height,\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 199 | " const bool matte,__global CLPixelType *output)\n" |
| 200 | "{\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 201 | " const unsigned long columns = get_global_size(0);\n" |
| 202 | " const unsigned long rows = get_global_size(1);\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 203 | "\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 204 | " const long x = get_global_id(0);\n" |
| 205 | " const long y = get_global_id(1);\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 206 | "\n" |
| 207 | " const double scale = (1.0/QuantumRange);\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 208 | " const long mid_width = (width-1)/2;\n" |
| 209 | " const long mid_height = (height-1)/2;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 210 | " double4 sum = { 0.0, 0.0, 0.0, 0.0 };\n" |
| 211 | " double gamma = 0.0;\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 212 | " register unsigned long i = 0;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 213 | "\n" |
| 214 | " int method = 0;\n" |
| 215 | " if (matte != false)\n" |
| 216 | " method=1;\n" |
| 217 | " if ((x >= width) && (x < (columns-width-1)) &&\n" |
| 218 | " (y >= height) && (y < (rows-height-1)))\n" |
| 219 | " {\n" |
| 220 | " method=2;\n" |
| 221 | " if (matte != false)\n" |
| 222 | " method=3;\n" |
| 223 | " }\n" |
| 224 | " switch (method)\n" |
| 225 | " {\n" |
| 226 | " case 0:\n" |
| 227 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 228 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 229 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 230 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 231 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 232 | " const long index=ClampToCanvas(y+v,rows)*columns+\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 233 | " ClampToCanvas(x+u,columns);\n" |
| 234 | " sum.x+=filter[i]*input[index].x;\n" |
| 235 | " sum.y+=filter[i]*input[index].y;\n" |
| 236 | " sum.z+=filter[i]*input[index].z;\n" |
| 237 | " gamma+=filter[i];\n" |
| 238 | " i++;\n" |
| 239 | " }\n" |
| 240 | " }\n" |
| 241 | " break;\n" |
| 242 | " }\n" |
| 243 | " case 1:\n" |
| 244 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 245 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 246 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 247 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 248 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 249 | " const unsigned long index=ClampToCanvas(y+v,rows)*columns+\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 250 | " ClampToCanvas(x+u,columns);\n" |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 251 | " const double alpha=scale*input[index].w;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 252 | " sum.x+=alpha*filter[i]*input[index].x;\n" |
| 253 | " sum.y+=alpha*filter[i]*input[index].y;\n" |
| 254 | " sum.z+=alpha*filter[i]*input[index].z;\n" |
| 255 | " sum.w+=filter[i]*input[index].w;\n" |
| 256 | " gamma+=alpha*filter[i];\n" |
| 257 | " i++;\n" |
| 258 | " }\n" |
| 259 | " }\n" |
| 260 | " break;\n" |
| 261 | " }\n" |
| 262 | " case 2:\n" |
| 263 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 264 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 265 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 266 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 267 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 268 | " const unsigned long index=(y+v)*columns+(x+u);\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 269 | " sum.x+=filter[i]*input[index].x;\n" |
| 270 | " sum.y+=filter[i]*input[index].y;\n" |
| 271 | " sum.z+=filter[i]*input[index].z;\n" |
| 272 | " gamma+=filter[i];\n" |
| 273 | " i++;\n" |
| 274 | " }\n" |
| 275 | " }\n" |
| 276 | " break;\n" |
| 277 | " }\n" |
| 278 | " case 3:\n" |
| 279 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 280 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 281 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 282 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 283 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 284 | " const unsigned long index=(y+v)*columns+(x+u);\n" |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 285 | " const double alpha=scale*input[index].w;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 286 | " sum.x+=alpha*filter[i]*input[index].x;\n" |
| 287 | " sum.y+=alpha*filter[i]*input[index].y;\n" |
| 288 | " sum.z+=alpha*filter[i]*input[index].z;\n" |
| 289 | " sum.w+=filter[i]*input[index].w;\n" |
| 290 | " gamma+=alpha*filter[i];\n" |
| 291 | " i++;\n" |
| 292 | " }\n" |
| 293 | " }\n" |
| 294 | " break;\n" |
| 295 | " }\n" |
| 296 | " }\n" |
| 297 | " gamma=1.0/(fabs(gamma) <= MagickEpsilon ? 1.0 : gamma);\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 298 | " const unsigned long index = y*columns+x;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 299 | " output[index].x=ClampToQuantum(gamma*sum.x);\n" |
| 300 | " output[index].y=ClampToQuantum(gamma*sum.y);\n" |
| 301 | " output[index].z=ClampToQuantum(gamma*sum.z);\n" |
| 302 | " if (matte == false)\n" |
| 303 | " output[index].w=input[index].w;\n" |
| 304 | " else\n" |
| 305 | " output[index].w=ClampToQuantum(sum.w);\n" |
| 306 | "}\n"; |
| 307 | |
| 308 | static void ConvolveNotify(const char *message,const void *data,size_t length, |
| 309 | void *user_context) |
| 310 | { |
| 311 | ExceptionInfo |
| 312 | *exception; |
| 313 | |
| 314 | (void) data; |
| 315 | (void) length; |
| 316 | exception=(ExceptionInfo *) user_context; |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 317 | (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning, |
| 318 | "DelegateFailed","`%s'",message); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 319 | } |
| 320 | |
| 321 | static MagickBooleanType BindConvolveParameters(ConvolveInfo *convolve_info, |
| 322 | const Image *image,const void *pixels,double *filter, |
cristy | bb50337 | 2010-05-27 20:51:26 +0000 | [diff] [blame] | 323 | const size_t width,const size_t height,void *convolve_pixels) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 324 | { |
| 325 | cl_int |
| 326 | status; |
| 327 | |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 328 | register cl_uint |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 329 | i; |
| 330 | |
| 331 | size_t |
| 332 | length; |
| 333 | |
| 334 | /* |
| 335 | Allocate OpenCL buffers. |
| 336 | */ |
| 337 | length=image->columns*image->rows; |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 338 | convolve_info->pixels=clCreateBuffer(convolve_info->context,(cl_mem_flags) |
| 339 | (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(CLPixelPacket), |
| 340 | (void *) pixels,&status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 341 | if ((convolve_info->pixels == (cl_mem) NULL) || (status != CL_SUCCESS)) |
| 342 | return(MagickFalse); |
| 343 | length=width*height; |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 344 | convolve_info->filter=clCreateBuffer(convolve_info->context,(cl_mem_flags) |
| 345 | (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(cl_double),filter, |
| 346 | &status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 347 | if ((convolve_info->filter == (cl_mem) NULL) || (status != CL_SUCCESS)) |
| 348 | return(MagickFalse); |
| 349 | length=image->columns*image->rows; |
| 350 | convolve_info->convolve_pixels=clCreateBuffer(convolve_info->context, |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 351 | (cl_mem_flags) (CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR),length* |
| 352 | sizeof(CLPixelPacket),convolve_pixels,&status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 353 | if ((convolve_info->convolve_pixels == (cl_mem) NULL) || |
| 354 | (status != CL_SUCCESS)) |
| 355 | return(MagickFalse); |
| 356 | /* |
| 357 | Bind OpenCL buffers. |
| 358 | */ |
| 359 | i=0; |
| 360 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *) |
| 361 | &convolve_info->pixels); |
| 362 | if (status != CL_SUCCESS) |
| 363 | return(MagickFalse); |
| 364 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *) |
| 365 | &convolve_info->filter); |
| 366 | if (status != CL_SUCCESS) |
| 367 | return(MagickFalse); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 368 | convolve_info->width=(cl_ulong) width; |
| 369 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 370 | &convolve_info->width); |
| 371 | if (status != CL_SUCCESS) |
| 372 | return(MagickFalse); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 373 | convolve_info->height=(cl_ulong) height; |
| 374 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 375 | &convolve_info->height); |
| 376 | if (status != CL_SUCCESS) |
| 377 | return(MagickFalse); |
| 378 | convolve_info->matte=(cl_bool) image->matte; |
| 379 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_bool),(void *) |
| 380 | &convolve_info->matte); |
| 381 | if (status != CL_SUCCESS) |
| 382 | return(MagickFalse); |
| 383 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *) |
| 384 | &convolve_info->convolve_pixels); |
| 385 | if (status != CL_SUCCESS) |
| 386 | return(MagickFalse); |
| 387 | status=clFinish(convolve_info->command_queue); |
| 388 | if (status != CL_SUCCESS) |
| 389 | return(MagickFalse); |
| 390 | return(MagickTrue); |
| 391 | } |
| 392 | |
| 393 | static void DestroyConvolveBuffers(ConvolveInfo *convolve_info) |
| 394 | { |
| 395 | cl_int |
| 396 | status; |
| 397 | |
| 398 | if (convolve_info->convolve_pixels != (cl_mem) NULL) |
| 399 | status=clReleaseMemObject(convolve_info->convolve_pixels); |
| 400 | if (convolve_info->pixels != (cl_mem) NULL) |
| 401 | status=clReleaseMemObject(convolve_info->pixels); |
| 402 | if (convolve_info->filter != (cl_mem) NULL) |
| 403 | status=clReleaseMemObject(convolve_info->filter); |
| 404 | } |
| 405 | |
| 406 | static ConvolveInfo *DestroyConvolveInfo(ConvolveInfo *convolve_info) |
| 407 | { |
| 408 | cl_int |
| 409 | status; |
| 410 | |
| 411 | if (convolve_info->kernel != (cl_kernel) NULL) |
| 412 | status=clReleaseKernel(convolve_info->kernel); |
| 413 | if (convolve_info->program != (cl_program) NULL) |
| 414 | status=clReleaseProgram(convolve_info->program); |
| 415 | if (convolve_info->command_queue != (cl_command_queue) NULL) |
| 416 | status=clReleaseCommandQueue(convolve_info->command_queue); |
| 417 | if (convolve_info->context != (cl_context) NULL) |
| 418 | status=clReleaseContext(convolve_info->context); |
| 419 | convolve_info=(ConvolveInfo *) RelinquishMagickMemory(convolve_info); |
| 420 | return(convolve_info); |
| 421 | } |
| 422 | |
| 423 | static MagickBooleanType EnqueueConvolveKernel(ConvolveInfo *convolve_info, |
| 424 | const Image *image,const void *pixels,double *filter, |
cristy | bb50337 | 2010-05-27 20:51:26 +0000 | [diff] [blame] | 425 | const size_t width,const size_t height,void *convolve_pixels) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 426 | { |
| 427 | cl_int |
| 428 | status; |
| 429 | |
| 430 | size_t |
| 431 | global_work_size[2], |
| 432 | length; |
| 433 | |
| 434 | length=image->columns*image->rows; |
| 435 | status=clEnqueueWriteBuffer(convolve_info->command_queue, |
| 436 | convolve_info->pixels,CL_TRUE,0,length*sizeof(CLPixelPacket),pixels,0,NULL, |
| 437 | NULL); |
| 438 | length=width*height; |
| 439 | status=clEnqueueWriteBuffer(convolve_info->command_queue, |
| 440 | convolve_info->filter,CL_TRUE,0,length*sizeof(cl_double),filter,0,NULL, |
| 441 | NULL); |
| 442 | if (status != CL_SUCCESS) |
| 443 | return(MagickFalse); |
| 444 | global_work_size[0]=image->columns; |
| 445 | global_work_size[1]=image->rows; |
| 446 | status=clEnqueueNDRangeKernel(convolve_info->command_queue, |
| 447 | convolve_info->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL); |
| 448 | if (status != CL_SUCCESS) |
| 449 | return(MagickFalse); |
| 450 | length=image->columns*image->rows; |
| 451 | status=clEnqueueReadBuffer(convolve_info->command_queue, |
| 452 | convolve_info->convolve_pixels,CL_TRUE,0,length*sizeof(CLPixelPacket), |
| 453 | convolve_pixels,0,NULL,NULL); |
| 454 | if (status != CL_SUCCESS) |
| 455 | return(MagickFalse); |
| 456 | status=clFinish(convolve_info->command_queue); |
| 457 | if (status != CL_SUCCESS) |
| 458 | return(MagickFalse); |
| 459 | return(MagickTrue); |
| 460 | } |
| 461 | |
| 462 | static ConvolveInfo *GetConvolveInfo(const Image *image,const char *name, |
| 463 | const char *source,ExceptionInfo *exception) |
| 464 | { |
| 465 | char |
| 466 | options[MaxTextExtent]; |
| 467 | |
| 468 | cl_int |
| 469 | status; |
| 470 | |
| 471 | ConvolveInfo |
| 472 | *convolve_info; |
| 473 | |
| 474 | size_t |
| 475 | length, |
| 476 | lengths[] = { strlen(source) }; |
| 477 | |
| 478 | /* |
| 479 | Create OpenCL info. |
| 480 | */ |
cristy | 73bd4a5 | 2010-10-05 11:24:23 +0000 | [diff] [blame] | 481 | convolve_info=(ConvolveInfo *) AcquireMagickMemory(sizeof(*convolve_info)); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 482 | if (convolve_info == (ConvolveInfo *) NULL) |
| 483 | { |
| 484 | (void) ThrowMagickException(exception,GetMagickModule(), |
| 485 | ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); |
| 486 | return((ConvolveInfo *) NULL); |
| 487 | } |
| 488 | (void) ResetMagickMemory(convolve_info,0,sizeof(*convolve_info)); |
| 489 | /* |
| 490 | Create OpenCL context. |
| 491 | */ |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 492 | convolve_info->context=clCreateContextFromType((cl_context_properties *) |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 493 | NULL,(cl_device_type) CL_DEVICE_TYPE_GPU,ConvolveNotify,exception,&status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 494 | if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS)) |
| 495 | convolve_info->context=clCreateContextFromType((cl_context_properties *) |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 496 | NULL,(cl_device_type) CL_DEVICE_TYPE_CPU,ConvolveNotify,exception, |
| 497 | &status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 498 | if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS)) |
| 499 | convolve_info->context=clCreateContextFromType((cl_context_properties *) |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 500 | NULL,(cl_device_type) CL_DEVICE_TYPE_DEFAULT,ConvolveNotify,exception, |
| 501 | &status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 502 | if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS)) |
| 503 | { |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 504 | (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning, |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 505 | "failed to create OpenCL context","`%s' (%d)",image->filename,status); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 506 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 507 | return((ConvolveInfo *) NULL); |
| 508 | } |
| 509 | /* |
| 510 | Detect OpenCL devices. |
| 511 | */ |
| 512 | status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,0,NULL, |
| 513 | &length); |
| 514 | if ((status != CL_SUCCESS) || (length == 0)) |
| 515 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 516 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 517 | return((ConvolveInfo *) NULL); |
| 518 | } |
| 519 | convolve_info->devices=(cl_device_id *) AcquireMagickMemory(length); |
| 520 | if (convolve_info->devices == (cl_device_id *) NULL) |
| 521 | { |
| 522 | (void) ThrowMagickException(exception,GetMagickModule(), |
| 523 | ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 524 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 525 | return((ConvolveInfo *) NULL); |
| 526 | } |
| 527 | status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,length, |
| 528 | convolve_info->devices,NULL); |
| 529 | if (status != CL_SUCCESS) |
| 530 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 531 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 532 | return((ConvolveInfo *) NULL); |
| 533 | } |
| 534 | /* |
| 535 | Create OpenCL command queue. |
| 536 | */ |
| 537 | convolve_info->command_queue=clCreateCommandQueue(convolve_info->context, |
| 538 | convolve_info->devices[0],0,&status); |
| 539 | if ((convolve_info->command_queue == (cl_command_queue) NULL) || |
| 540 | (status != CL_SUCCESS)) |
| 541 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 542 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 543 | return((ConvolveInfo *) NULL); |
| 544 | } |
| 545 | /* |
| 546 | Build OpenCL program. |
| 547 | */ |
| 548 | convolve_info->program=clCreateProgramWithSource(convolve_info->context,1, |
| 549 | &source,lengths,&status); |
| 550 | if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS)) |
| 551 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 552 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 553 | return((ConvolveInfo *) NULL); |
| 554 | } |
cristy | b51dff5 | 2011-05-19 16:55:47 +0000 | [diff] [blame] | 555 | (void) FormatLocaleString(options,MaxTextExtent,CLOptions,(double) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 556 | QuantumRange,MagickEpsilon); |
cristy | 00243d1 | 2010-01-21 02:45:27 +0000 | [diff] [blame] | 557 | status=clBuildProgram(convolve_info->program,1,convolve_info->devices,options, |
| 558 | NULL,NULL); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 559 | if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS)) |
| 560 | { |
| 561 | char |
| 562 | *log; |
| 563 | |
| 564 | status=clGetProgramBuildInfo(convolve_info->program, |
| 565 | convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,0,NULL,&length); |
| 566 | log=(char *) AcquireMagickMemory(length); |
| 567 | if (log == (char *) NULL) |
| 568 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 569 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 570 | return((ConvolveInfo *) NULL); |
| 571 | } |
| 572 | status=clGetProgramBuildInfo(convolve_info->program, |
| 573 | convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,length,log,&length); |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 574 | (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning, |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 575 | "failed to build OpenCL program","`%s' (%s)",image->filename,log); |
| 576 | log=DestroyString(log); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 577 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 578 | return((ConvolveInfo *) NULL); |
| 579 | } |
| 580 | /* |
| 581 | Get a kernel object. |
| 582 | */ |
| 583 | convolve_info->kernel=clCreateKernel(convolve_info->program,name,&status); |
| 584 | if ((convolve_info->kernel == (cl_kernel) NULL) || (status != CL_SUCCESS)) |
| 585 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 586 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 587 | return((ConvolveInfo *) NULL); |
| 588 | } |
| 589 | return(convolve_info); |
| 590 | } |
| 591 | |
| 592 | #endif |
| 593 | |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 594 | MagickExport MagickBooleanType AccelerateConvolveImage(const Image *image, |
cristy | 2be1538 | 2010-01-21 02:38:03 +0000 | [diff] [blame] | 595 | const KernelInfo *kernel,Image *convolve_image,ExceptionInfo *exception) |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 596 | { |
| 597 | assert(image != (Image *) NULL); |
| 598 | assert(image->signature == MagickSignature); |
| 599 | if (image->debug != MagickFalse) |
| 600 | (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename); |
cristy | 2be1538 | 2010-01-21 02:38:03 +0000 | [diff] [blame] | 601 | assert(kernel != (KernelInfo *) NULL); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 602 | assert(kernel->signature == MagickSignature); |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 603 | assert(convolve_image != (Image *) NULL); |
| 604 | assert(convolve_image->signature == MagickSignature); |
| 605 | assert(exception != (ExceptionInfo *) NULL); |
| 606 | assert(exception->signature == MagickSignature); |
cristy | 394651a | 2010-01-23 21:05:55 +0000 | [diff] [blame] | 607 | if ((image->storage_class != DirectClass) || |
| 608 | (image->colorspace == CMYKColorspace)) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 609 | if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) && |
| 610 | (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod)) |
| 611 | return(MagickFalse); |
| 612 | #if !defined(MAGICKCORE_OPENCL_SUPPORT) |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 613 | return(MagickFalse); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 614 | #else |
| 615 | { |
| 616 | const void |
| 617 | *pixels; |
| 618 | |
| 619 | ConvolveInfo |
| 620 | *convolve_info; |
| 621 | |
| 622 | MagickBooleanType |
| 623 | status; |
| 624 | |
| 625 | MagickSizeType |
| 626 | length; |
| 627 | |
| 628 | void |
| 629 | *convolve_pixels; |
| 630 | |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 631 | convolve_info=GetConvolveInfo(image,"Convolve",ConvolveKernel,exception); |
| 632 | if (convolve_info == (ConvolveInfo *) NULL) |
| 633 | return(MagickFalse); |
| 634 | pixels=AcquirePixelCachePixels(image,&length,exception); |
| 635 | if (pixels == (const void *) NULL) |
| 636 | { |
| 637 | (void) ThrowMagickException(exception,GetMagickModule(),CacheError, |
| 638 | "UnableToReadPixelCache","`%s'",image->filename); |
| 639 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 640 | return(MagickFalse); |
| 641 | } |
| 642 | convolve_pixels=GetPixelCachePixels(convolve_image,&length,exception); |
| 643 | if (convolve_pixels == (void *) NULL) |
| 644 | { |
| 645 | (void) ThrowMagickException(exception,GetMagickModule(),CacheError, |
| 646 | "UnableToReadPixelCache","`%s'",image->filename); |
| 647 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 648 | return(MagickFalse); |
| 649 | } |
| 650 | status=BindConvolveParameters(convolve_info,image,pixels,kernel->values, |
| 651 | kernel->width,kernel->height,convolve_pixels); |
| 652 | if (status == MagickFalse) |
| 653 | { |
| 654 | DestroyConvolveBuffers(convolve_info); |
| 655 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 656 | return(MagickFalse); |
| 657 | } |
| 658 | status=EnqueueConvolveKernel(convolve_info,image,pixels,kernel->values, |
| 659 | kernel->width,kernel->height,convolve_pixels); |
| 660 | if (status == MagickFalse) |
| 661 | { |
| 662 | DestroyConvolveBuffers(convolve_info); |
| 663 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 664 | return(MagickFalse); |
| 665 | } |
| 666 | DestroyConvolveBuffers(convolve_info); |
| 667 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 668 | return(MagickTrue); |
| 669 | } |
| 670 | #endif |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 671 | } |