cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 3 | % % |
| 4 | % % |
| 5 | % % |
| 6 | % AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE % |
| 7 | % A A C C E L E R R A A T E % |
| 8 | % AAAAA C C EEE L EEE RRRR AAAAA T EEE % |
| 9 | % A A C C E L E R R A A T E % |
| 10 | % A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE % |
| 11 | % % |
| 12 | % % |
| 13 | % MagickCore Acceleration Methods % |
| 14 | % % |
| 15 | % Software Design % |
cristy | 0d127ab | 2010-05-14 23:29:46 +0000 | [diff] [blame] | 16 | % John Cristy % |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 17 | % January 2010 % |
| 18 | % % |
| 19 | % % |
cristy | 7e41fe8 | 2010-12-04 23:12:08 +0000 | [diff] [blame] | 20 | % Copyright 1999-2011 ImageMagick Studio LLC, a non-profit organization % |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 21 | % dedicated to making software imaging solutions freely available. % |
| 22 | % % |
| 23 | % You may not use this file except in compliance with the License. You may % |
| 24 | % obtain a copy of the License at % |
| 25 | % % |
| 26 | % http://www.imagemagick.org/script/license.php % |
| 27 | % % |
| 28 | % Unless required by applicable law or agreed to in writing, software % |
| 29 | % distributed under the License is distributed on an "AS IS" BASIS, % |
| 30 | % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. % |
| 31 | % See the License for the specific language governing permissions and % |
| 32 | % limitations under the License. % |
| 33 | % % |
| 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 35 | % |
cristy | 0d127ab | 2010-05-14 23:29:46 +0000 | [diff] [blame] | 36 | % Morphology is the the application of various kernals, of any size and even |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 37 | % shape, to a image in various ways (typically binary, but not always). |
| 38 | % |
| 39 | % Convolution (weighted sum or average) is just one specific type of |
| 40 | % accelerate. Just one that is very common for image bluring and sharpening |
| 41 | % effects. Not only 2D Gaussian blurring, but also 2-pass 1D Blurring. |
| 42 | % |
| 43 | % This module provides not only a general accelerate function, and the ability |
| 44 | % to apply more advanced or iterative morphologies, but also functions for the |
| 45 | % generation of many different types of kernel arrays from user supplied |
| 46 | % arguments. Prehaps even the generation of a kernel from a small image. |
| 47 | */ |
| 48 | |
| 49 | /* |
| 50 | Include declarations. |
| 51 | */ |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 52 | #include "MagickCore/studio.h" |
| 53 | #include "MagickCore/accelerate.h" |
| 54 | #include "MagickCore/artifact.h" |
cristy | 35f3349 | 2011-07-07 16:54:49 +0000 | [diff] [blame] | 55 | #include "MagickCore/cache.h" |
cristy | d1dd6e4 | 2011-09-04 01:46:08 +0000 | [diff] [blame^] | 56 | #include "MagickCore/cache-private.h" |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 57 | #include "MagickCore/cache-view.h" |
| 58 | #include "MagickCore/color-private.h" |
| 59 | #include "MagickCore/enhance.h" |
| 60 | #include "MagickCore/exception.h" |
| 61 | #include "MagickCore/exception-private.h" |
| 62 | #include "MagickCore/gem.h" |
| 63 | #include "MagickCore/hashmap.h" |
| 64 | #include "MagickCore/image.h" |
| 65 | #include "MagickCore/image-private.h" |
| 66 | #include "MagickCore/list.h" |
| 67 | #include "MagickCore/memory_.h" |
| 68 | #include "MagickCore/monitor-private.h" |
| 69 | #include "MagickCore/accelerate.h" |
| 70 | #include "MagickCore/option.h" |
| 71 | #include "MagickCore/pixel-accessor.h" |
| 72 | #include "MagickCore/prepress.h" |
| 73 | #include "MagickCore/quantize.h" |
| 74 | #include "MagickCore/registry.h" |
| 75 | #include "MagickCore/semaphore.h" |
| 76 | #include "MagickCore/splay-tree.h" |
| 77 | #include "MagickCore/statistic.h" |
| 78 | #include "MagickCore/string_.h" |
| 79 | #include "MagickCore/string-private.h" |
| 80 | #include "MagickCore/token.h" |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 81 | |
| 82 | /* |
| 83 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 84 | % % |
| 85 | % % |
| 86 | % % |
| 87 | % A c c e l e r a t e C o n v o l v e I m a g e % |
| 88 | % % |
| 89 | % % |
| 90 | % % |
| 91 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 92 | % |
| 93 | % AccelerateConvolveImage() applies a custom convolution kernel to the image. |
| 94 | % It is accelerated by taking advantage of speed-ups offered by executing in |
| 95 | % concert across heterogeneous platforms consisting of CPUs, GPUs, and other |
| 96 | % processors. |
| 97 | % |
| 98 | % The format of the AccelerateConvolveImage method is: |
| 99 | % |
| 100 | % Image *AccelerateConvolveImage(const Image *image, |
cristy | 2be1538 | 2010-01-21 02:38:03 +0000 | [diff] [blame] | 101 | % const KernelInfo *kernel,Image *convolve_image, |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 102 | % ExceptionInfo *exception) |
| 103 | % |
| 104 | % A description of each parameter follows: |
| 105 | % |
| 106 | % o image: the image. |
| 107 | % |
| 108 | % o kernel: the convolution kernel. |
| 109 | % |
| 110 | % o convole_image: the convoleed image. |
| 111 | % |
| 112 | % o exception: return any errors or warnings in this structure. |
| 113 | % |
| 114 | */ |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 115 | |
| 116 | #if defined(MAGICKCORE_OPENCL_SUPPORT) |
| 117 | |
| 118 | #if defined(MAGICKCORE_HDRI_SUPPORT) |
| 119 | #define CLOptions "-DMAGICKCORE_HDRI_SUPPORT=1 -DCLQuantum=float " \ |
| 120 | "-DCLPixelType=float4 -DQuantumRange=%g -DMagickEpsilon=%g" |
| 121 | #define CLPixelPacket cl_float4 |
| 122 | #else |
| 123 | #if (MAGICKCORE_QUANTUM_DEPTH == 8) |
| 124 | #define CLOptions "-DCLQuantum=uchar -DCLPixelType=uchar4 " \ |
| 125 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
| 126 | #define CLPixelPacket cl_uchar4 |
| 127 | #elif (MAGICKCORE_QUANTUM_DEPTH == 16) |
| 128 | #define CLOptions "-DCLQuantum=ushort -DCLPixelType=ushort4 " \ |
| 129 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
| 130 | #define CLPixelPacket cl_ushort4 |
| 131 | #elif (MAGICKCORE_QUANTUM_DEPTH == 32) |
| 132 | #define CLOptions "-DCLQuantum=uint -DCLPixelType=uint4 " \ |
| 133 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
| 134 | #define CLPixelPacket cl_uint4 |
cristy | 4434d7b | 2011-09-01 18:19:57 +0000 | [diff] [blame] | 135 | #elif (MAGICKCORE_QUANTUM_DEPTH == 64) |
cristy | bb50337 | 2010-05-27 20:51:26 +0000 | [diff] [blame] | 136 | #define CLOptions "-DCLQuantum=ussize_t -DCLPixelType=ussize_t4 " \ |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 137 | "-DQuantumRange=%g -DMagickEpsilon=%g" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 138 | #define CLPixelPacket cl_ulong4 |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 139 | #endif |
| 140 | #endif |
| 141 | |
| 142 | typedef struct _ConvolveInfo |
| 143 | { |
| 144 | cl_context |
| 145 | context; |
| 146 | |
| 147 | cl_device_id |
| 148 | *devices; |
| 149 | |
| 150 | cl_command_queue |
| 151 | command_queue; |
| 152 | |
| 153 | cl_kernel |
| 154 | kernel; |
| 155 | |
| 156 | cl_program |
| 157 | program; |
| 158 | |
| 159 | cl_mem |
| 160 | pixels, |
| 161 | convolve_pixels; |
| 162 | |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 163 | cl_ulong |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 164 | width, |
| 165 | height; |
| 166 | |
| 167 | cl_bool |
| 168 | matte; |
| 169 | |
| 170 | cl_mem |
| 171 | filter; |
| 172 | } ConvolveInfo; |
| 173 | |
| 174 | static char |
| 175 | *ConvolveKernel = |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 176 | "static inline long ClampToCanvas(const long offset,const unsigned long range)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 177 | "{\n" |
| 178 | " if (offset < 0L)\n" |
| 179 | " return(0L);\n" |
| 180 | " if (offset >= range)\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 181 | " return((long) (range-1L));\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 182 | " return(offset);\n" |
| 183 | "}\n" |
| 184 | "\n" |
| 185 | "static inline CLQuantum ClampToQuantum(const double value)\n" |
| 186 | "{\n" |
| 187 | "#if defined(MAGICKCORE_HDRI_SUPPORT)\n" |
| 188 | " return((CLQuantum) value)\n" |
| 189 | "#else\n" |
| 190 | " if (value < 0.0)\n" |
| 191 | " return((CLQuantum) 0);\n" |
| 192 | " if (value >= (double) QuantumRange)\n" |
| 193 | " return((CLQuantum) QuantumRange);\n" |
| 194 | " return((CLQuantum) (value+0.5));\n" |
| 195 | "#endif\n" |
| 196 | "}\n" |
| 197 | "\n" |
| 198 | "__kernel void Convolve(const __global CLPixelType *input,\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 199 | " __constant double *filter,const unsigned long width,const unsigned long height,\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 200 | " const bool matte,__global CLPixelType *output)\n" |
| 201 | "{\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 202 | " const unsigned long columns = get_global_size(0);\n" |
| 203 | " const unsigned long rows = get_global_size(1);\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 204 | "\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 205 | " const long x = get_global_id(0);\n" |
| 206 | " const long y = get_global_id(1);\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 207 | "\n" |
| 208 | " const double scale = (1.0/QuantumRange);\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 209 | " const long mid_width = (width-1)/2;\n" |
| 210 | " const long mid_height = (height-1)/2;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 211 | " double4 sum = { 0.0, 0.0, 0.0, 0.0 };\n" |
| 212 | " double gamma = 0.0;\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 213 | " register unsigned long i = 0;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 214 | "\n" |
| 215 | " int method = 0;\n" |
| 216 | " if (matte != false)\n" |
| 217 | " method=1;\n" |
| 218 | " if ((x >= width) && (x < (columns-width-1)) &&\n" |
| 219 | " (y >= height) && (y < (rows-height-1)))\n" |
| 220 | " {\n" |
| 221 | " method=2;\n" |
| 222 | " if (matte != false)\n" |
| 223 | " method=3;\n" |
| 224 | " }\n" |
| 225 | " switch (method)\n" |
| 226 | " {\n" |
| 227 | " case 0:\n" |
| 228 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 229 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 230 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 231 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 232 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 233 | " const long index=ClampToCanvas(y+v,rows)*columns+\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 234 | " ClampToCanvas(x+u,columns);\n" |
| 235 | " sum.x+=filter[i]*input[index].x;\n" |
| 236 | " sum.y+=filter[i]*input[index].y;\n" |
| 237 | " sum.z+=filter[i]*input[index].z;\n" |
| 238 | " gamma+=filter[i];\n" |
| 239 | " i++;\n" |
| 240 | " }\n" |
| 241 | " }\n" |
| 242 | " break;\n" |
| 243 | " }\n" |
| 244 | " case 1:\n" |
| 245 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 246 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 247 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 248 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 249 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 250 | " const unsigned long index=ClampToCanvas(y+v,rows)*columns+\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 251 | " ClampToCanvas(x+u,columns);\n" |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 252 | " const double alpha=scale*input[index].w;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 253 | " sum.x+=alpha*filter[i]*input[index].x;\n" |
| 254 | " sum.y+=alpha*filter[i]*input[index].y;\n" |
| 255 | " sum.z+=alpha*filter[i]*input[index].z;\n" |
| 256 | " sum.w+=filter[i]*input[index].w;\n" |
| 257 | " gamma+=alpha*filter[i];\n" |
| 258 | " i++;\n" |
| 259 | " }\n" |
| 260 | " }\n" |
| 261 | " break;\n" |
| 262 | " }\n" |
| 263 | " case 2:\n" |
| 264 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 265 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 266 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 267 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 268 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 269 | " const unsigned long index=(y+v)*columns+(x+u);\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 270 | " sum.x+=filter[i]*input[index].x;\n" |
| 271 | " sum.y+=filter[i]*input[index].y;\n" |
| 272 | " sum.z+=filter[i]*input[index].z;\n" |
| 273 | " gamma+=filter[i];\n" |
| 274 | " i++;\n" |
| 275 | " }\n" |
| 276 | " }\n" |
| 277 | " break;\n" |
| 278 | " }\n" |
| 279 | " case 3:\n" |
| 280 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 281 | " for (long v=(-mid_height); v <= mid_height; v++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 282 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 283 | " for (long u=(-mid_width); u <= mid_width; u++)\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 284 | " {\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 285 | " const unsigned long index=(y+v)*columns+(x+u);\n" |
cristy | 4c08aed | 2011-07-01 19:47:50 +0000 | [diff] [blame] | 286 | " const double alpha=scale*input[index].w;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 287 | " sum.x+=alpha*filter[i]*input[index].x;\n" |
| 288 | " sum.y+=alpha*filter[i]*input[index].y;\n" |
| 289 | " sum.z+=alpha*filter[i]*input[index].z;\n" |
| 290 | " sum.w+=filter[i]*input[index].w;\n" |
| 291 | " gamma+=alpha*filter[i];\n" |
| 292 | " i++;\n" |
| 293 | " }\n" |
| 294 | " }\n" |
| 295 | " break;\n" |
| 296 | " }\n" |
| 297 | " }\n" |
| 298 | " gamma=1.0/(fabs(gamma) <= MagickEpsilon ? 1.0 : gamma);\n" |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 299 | " const unsigned long index = y*columns+x;\n" |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 300 | " output[index].x=ClampToQuantum(gamma*sum.x);\n" |
| 301 | " output[index].y=ClampToQuantum(gamma*sum.y);\n" |
| 302 | " output[index].z=ClampToQuantum(gamma*sum.z);\n" |
| 303 | " if (matte == false)\n" |
| 304 | " output[index].w=input[index].w;\n" |
| 305 | " else\n" |
| 306 | " output[index].w=ClampToQuantum(sum.w);\n" |
| 307 | "}\n"; |
| 308 | |
| 309 | static void ConvolveNotify(const char *message,const void *data,size_t length, |
| 310 | void *user_context) |
| 311 | { |
| 312 | ExceptionInfo |
| 313 | *exception; |
| 314 | |
| 315 | (void) data; |
| 316 | (void) length; |
| 317 | exception=(ExceptionInfo *) user_context; |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 318 | (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning, |
| 319 | "DelegateFailed","`%s'",message); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 320 | } |
| 321 | |
| 322 | static MagickBooleanType BindConvolveParameters(ConvolveInfo *convolve_info, |
| 323 | const Image *image,const void *pixels,double *filter, |
cristy | bb50337 | 2010-05-27 20:51:26 +0000 | [diff] [blame] | 324 | const size_t width,const size_t height,void *convolve_pixels) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 325 | { |
| 326 | cl_int |
| 327 | status; |
| 328 | |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 329 | register cl_uint |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 330 | i; |
| 331 | |
| 332 | size_t |
| 333 | length; |
| 334 | |
| 335 | /* |
| 336 | Allocate OpenCL buffers. |
| 337 | */ |
| 338 | length=image->columns*image->rows; |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 339 | convolve_info->pixels=clCreateBuffer(convolve_info->context,(cl_mem_flags) |
| 340 | (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(CLPixelPacket), |
| 341 | (void *) pixels,&status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 342 | if ((convolve_info->pixels == (cl_mem) NULL) || (status != CL_SUCCESS)) |
| 343 | return(MagickFalse); |
| 344 | length=width*height; |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 345 | convolve_info->filter=clCreateBuffer(convolve_info->context,(cl_mem_flags) |
| 346 | (CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),length*sizeof(cl_double),filter, |
| 347 | &status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 348 | if ((convolve_info->filter == (cl_mem) NULL) || (status != CL_SUCCESS)) |
| 349 | return(MagickFalse); |
| 350 | length=image->columns*image->rows; |
| 351 | convolve_info->convolve_pixels=clCreateBuffer(convolve_info->context, |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 352 | (cl_mem_flags) (CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR),length* |
| 353 | sizeof(CLPixelPacket),convolve_pixels,&status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 354 | if ((convolve_info->convolve_pixels == (cl_mem) NULL) || |
| 355 | (status != CL_SUCCESS)) |
| 356 | return(MagickFalse); |
| 357 | /* |
| 358 | Bind OpenCL buffers. |
| 359 | */ |
| 360 | i=0; |
| 361 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *) |
| 362 | &convolve_info->pixels); |
| 363 | if (status != CL_SUCCESS) |
| 364 | return(MagickFalse); |
| 365 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *) |
| 366 | &convolve_info->filter); |
| 367 | if (status != CL_SUCCESS) |
| 368 | return(MagickFalse); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 369 | convolve_info->width=(cl_ulong) width; |
| 370 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 371 | &convolve_info->width); |
| 372 | if (status != CL_SUCCESS) |
| 373 | return(MagickFalse); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 374 | convolve_info->height=(cl_ulong) height; |
| 375 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_ulong),(void *) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 376 | &convolve_info->height); |
| 377 | if (status != CL_SUCCESS) |
| 378 | return(MagickFalse); |
| 379 | convolve_info->matte=(cl_bool) image->matte; |
| 380 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_bool),(void *) |
| 381 | &convolve_info->matte); |
| 382 | if (status != CL_SUCCESS) |
| 383 | return(MagickFalse); |
| 384 | status=clSetKernelArg(convolve_info->kernel,i++,sizeof(cl_mem),(void *) |
| 385 | &convolve_info->convolve_pixels); |
| 386 | if (status != CL_SUCCESS) |
| 387 | return(MagickFalse); |
| 388 | status=clFinish(convolve_info->command_queue); |
| 389 | if (status != CL_SUCCESS) |
| 390 | return(MagickFalse); |
| 391 | return(MagickTrue); |
| 392 | } |
| 393 | |
| 394 | static void DestroyConvolveBuffers(ConvolveInfo *convolve_info) |
| 395 | { |
| 396 | cl_int |
| 397 | status; |
| 398 | |
| 399 | if (convolve_info->convolve_pixels != (cl_mem) NULL) |
| 400 | status=clReleaseMemObject(convolve_info->convolve_pixels); |
| 401 | if (convolve_info->pixels != (cl_mem) NULL) |
| 402 | status=clReleaseMemObject(convolve_info->pixels); |
| 403 | if (convolve_info->filter != (cl_mem) NULL) |
| 404 | status=clReleaseMemObject(convolve_info->filter); |
| 405 | } |
| 406 | |
| 407 | static ConvolveInfo *DestroyConvolveInfo(ConvolveInfo *convolve_info) |
| 408 | { |
| 409 | cl_int |
| 410 | status; |
| 411 | |
| 412 | if (convolve_info->kernel != (cl_kernel) NULL) |
| 413 | status=clReleaseKernel(convolve_info->kernel); |
| 414 | if (convolve_info->program != (cl_program) NULL) |
| 415 | status=clReleaseProgram(convolve_info->program); |
| 416 | if (convolve_info->command_queue != (cl_command_queue) NULL) |
| 417 | status=clReleaseCommandQueue(convolve_info->command_queue); |
| 418 | if (convolve_info->context != (cl_context) NULL) |
| 419 | status=clReleaseContext(convolve_info->context); |
| 420 | convolve_info=(ConvolveInfo *) RelinquishMagickMemory(convolve_info); |
| 421 | return(convolve_info); |
| 422 | } |
| 423 | |
| 424 | static MagickBooleanType EnqueueConvolveKernel(ConvolveInfo *convolve_info, |
| 425 | const Image *image,const void *pixels,double *filter, |
cristy | bb50337 | 2010-05-27 20:51:26 +0000 | [diff] [blame] | 426 | const size_t width,const size_t height,void *convolve_pixels) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 427 | { |
| 428 | cl_int |
| 429 | status; |
| 430 | |
| 431 | size_t |
| 432 | global_work_size[2], |
| 433 | length; |
| 434 | |
| 435 | length=image->columns*image->rows; |
| 436 | status=clEnqueueWriteBuffer(convolve_info->command_queue, |
| 437 | convolve_info->pixels,CL_TRUE,0,length*sizeof(CLPixelPacket),pixels,0,NULL, |
| 438 | NULL); |
| 439 | length=width*height; |
| 440 | status=clEnqueueWriteBuffer(convolve_info->command_queue, |
| 441 | convolve_info->filter,CL_TRUE,0,length*sizeof(cl_double),filter,0,NULL, |
| 442 | NULL); |
| 443 | if (status != CL_SUCCESS) |
| 444 | return(MagickFalse); |
| 445 | global_work_size[0]=image->columns; |
| 446 | global_work_size[1]=image->rows; |
| 447 | status=clEnqueueNDRangeKernel(convolve_info->command_queue, |
| 448 | convolve_info->kernel,2,NULL,global_work_size,NULL,0,NULL,NULL); |
| 449 | if (status != CL_SUCCESS) |
| 450 | return(MagickFalse); |
| 451 | length=image->columns*image->rows; |
| 452 | status=clEnqueueReadBuffer(convolve_info->command_queue, |
| 453 | convolve_info->convolve_pixels,CL_TRUE,0,length*sizeof(CLPixelPacket), |
| 454 | convolve_pixels,0,NULL,NULL); |
| 455 | if (status != CL_SUCCESS) |
| 456 | return(MagickFalse); |
| 457 | status=clFinish(convolve_info->command_queue); |
| 458 | if (status != CL_SUCCESS) |
| 459 | return(MagickFalse); |
| 460 | return(MagickTrue); |
| 461 | } |
| 462 | |
| 463 | static ConvolveInfo *GetConvolveInfo(const Image *image,const char *name, |
| 464 | const char *source,ExceptionInfo *exception) |
| 465 | { |
| 466 | char |
| 467 | options[MaxTextExtent]; |
| 468 | |
| 469 | cl_int |
| 470 | status; |
| 471 | |
| 472 | ConvolveInfo |
| 473 | *convolve_info; |
| 474 | |
| 475 | size_t |
| 476 | length, |
| 477 | lengths[] = { strlen(source) }; |
| 478 | |
| 479 | /* |
| 480 | Create OpenCL info. |
| 481 | */ |
cristy | 73bd4a5 | 2010-10-05 11:24:23 +0000 | [diff] [blame] | 482 | convolve_info=(ConvolveInfo *) AcquireMagickMemory(sizeof(*convolve_info)); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 483 | if (convolve_info == (ConvolveInfo *) NULL) |
| 484 | { |
| 485 | (void) ThrowMagickException(exception,GetMagickModule(), |
| 486 | ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); |
| 487 | return((ConvolveInfo *) NULL); |
| 488 | } |
| 489 | (void) ResetMagickMemory(convolve_info,0,sizeof(*convolve_info)); |
| 490 | /* |
| 491 | Create OpenCL context. |
| 492 | */ |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 493 | convolve_info->context=clCreateContextFromType((cl_context_properties *) |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 494 | NULL,(cl_device_type) CL_DEVICE_TYPE_GPU,ConvolveNotify,exception,&status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 495 | if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS)) |
| 496 | convolve_info->context=clCreateContextFromType((cl_context_properties *) |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 497 | NULL,(cl_device_type) CL_DEVICE_TYPE_CPU,ConvolveNotify,exception, |
| 498 | &status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 499 | if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS)) |
| 500 | convolve_info->context=clCreateContextFromType((cl_context_properties *) |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 501 | NULL,(cl_device_type) CL_DEVICE_TYPE_DEFAULT,ConvolveNotify,exception, |
| 502 | &status); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 503 | if ((convolve_info->context == (cl_context) NULL) || (status != CL_SUCCESS)) |
| 504 | { |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 505 | (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning, |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 506 | "failed to create OpenCL context","`%s' (%d)",image->filename,status); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 507 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 508 | return((ConvolveInfo *) NULL); |
| 509 | } |
| 510 | /* |
| 511 | Detect OpenCL devices. |
| 512 | */ |
| 513 | status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,0,NULL, |
| 514 | &length); |
| 515 | if ((status != CL_SUCCESS) || (length == 0)) |
| 516 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 517 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 518 | return((ConvolveInfo *) NULL); |
| 519 | } |
| 520 | convolve_info->devices=(cl_device_id *) AcquireMagickMemory(length); |
| 521 | if (convolve_info->devices == (cl_device_id *) NULL) |
| 522 | { |
| 523 | (void) ThrowMagickException(exception,GetMagickModule(), |
| 524 | ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 525 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 526 | return((ConvolveInfo *) NULL); |
| 527 | } |
| 528 | status=clGetContextInfo(convolve_info->context,CL_CONTEXT_DEVICES,length, |
| 529 | convolve_info->devices,NULL); |
| 530 | if (status != CL_SUCCESS) |
| 531 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 532 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 533 | return((ConvolveInfo *) NULL); |
| 534 | } |
| 535 | /* |
| 536 | Create OpenCL command queue. |
| 537 | */ |
| 538 | convolve_info->command_queue=clCreateCommandQueue(convolve_info->context, |
| 539 | convolve_info->devices[0],0,&status); |
| 540 | if ((convolve_info->command_queue == (cl_command_queue) NULL) || |
| 541 | (status != CL_SUCCESS)) |
| 542 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 543 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 544 | return((ConvolveInfo *) NULL); |
| 545 | } |
| 546 | /* |
| 547 | Build OpenCL program. |
| 548 | */ |
| 549 | convolve_info->program=clCreateProgramWithSource(convolve_info->context,1, |
| 550 | &source,lengths,&status); |
| 551 | if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS)) |
| 552 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 553 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 554 | return((ConvolveInfo *) NULL); |
| 555 | } |
cristy | b51dff5 | 2011-05-19 16:55:47 +0000 | [diff] [blame] | 556 | (void) FormatLocaleString(options,MaxTextExtent,CLOptions,(double) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 557 | QuantumRange,MagickEpsilon); |
cristy | 00243d1 | 2010-01-21 02:45:27 +0000 | [diff] [blame] | 558 | status=clBuildProgram(convolve_info->program,1,convolve_info->devices,options, |
| 559 | NULL,NULL); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 560 | if ((convolve_info->program == (cl_program) NULL) || (status != CL_SUCCESS)) |
| 561 | { |
| 562 | char |
| 563 | *log; |
| 564 | |
| 565 | status=clGetProgramBuildInfo(convolve_info->program, |
| 566 | convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,0,NULL,&length); |
| 567 | log=(char *) AcquireMagickMemory(length); |
| 568 | if (log == (char *) NULL) |
| 569 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 570 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 571 | return((ConvolveInfo *) NULL); |
| 572 | } |
| 573 | status=clGetProgramBuildInfo(convolve_info->program, |
| 574 | convolve_info->devices[0],CL_PROGRAM_BUILD_LOG,length,log,&length); |
cristy | 32cca40 | 2010-01-23 04:02:23 +0000 | [diff] [blame] | 575 | (void) ThrowMagickException(exception,GetMagickModule(),DelegateWarning, |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 576 | "failed to build OpenCL program","`%s' (%s)",image->filename,log); |
| 577 | log=DestroyString(log); |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 578 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 579 | return((ConvolveInfo *) NULL); |
| 580 | } |
| 581 | /* |
| 582 | Get a kernel object. |
| 583 | */ |
| 584 | convolve_info->kernel=clCreateKernel(convolve_info->program,name,&status); |
| 585 | if ((convolve_info->kernel == (cl_kernel) NULL) || (status != CL_SUCCESS)) |
| 586 | { |
cristy | 5f95947 | 2010-05-27 22:19:46 +0000 | [diff] [blame] | 587 | convolve_info=DestroyConvolveInfo(convolve_info); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 588 | return((ConvolveInfo *) NULL); |
| 589 | } |
| 590 | return(convolve_info); |
| 591 | } |
| 592 | |
| 593 | #endif |
| 594 | |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 595 | MagickExport MagickBooleanType AccelerateConvolveImage(const Image *image, |
cristy | 2be1538 | 2010-01-21 02:38:03 +0000 | [diff] [blame] | 596 | const KernelInfo *kernel,Image *convolve_image,ExceptionInfo *exception) |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 597 | { |
| 598 | assert(image != (Image *) NULL); |
| 599 | assert(image->signature == MagickSignature); |
| 600 | if (image->debug != MagickFalse) |
| 601 | (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename); |
cristy | 2be1538 | 2010-01-21 02:38:03 +0000 | [diff] [blame] | 602 | assert(kernel != (KernelInfo *) NULL); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 603 | assert(kernel->signature == MagickSignature); |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 604 | assert(convolve_image != (Image *) NULL); |
| 605 | assert(convolve_image->signature == MagickSignature); |
| 606 | assert(exception != (ExceptionInfo *) NULL); |
| 607 | assert(exception->signature == MagickSignature); |
cristy | 394651a | 2010-01-23 21:05:55 +0000 | [diff] [blame] | 608 | if ((image->storage_class != DirectClass) || |
| 609 | (image->colorspace == CMYKColorspace)) |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 610 | if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) && |
| 611 | (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod)) |
| 612 | return(MagickFalse); |
| 613 | #if !defined(MAGICKCORE_OPENCL_SUPPORT) |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 614 | return(MagickFalse); |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 615 | #else |
| 616 | { |
| 617 | const void |
| 618 | *pixels; |
| 619 | |
| 620 | ConvolveInfo |
| 621 | *convolve_info; |
| 622 | |
| 623 | MagickBooleanType |
| 624 | status; |
| 625 | |
| 626 | MagickSizeType |
| 627 | length; |
| 628 | |
| 629 | void |
| 630 | *convolve_pixels; |
| 631 | |
cristy | d43a46b | 2010-01-21 02:13:41 +0000 | [diff] [blame] | 632 | convolve_info=GetConvolveInfo(image,"Convolve",ConvolveKernel,exception); |
| 633 | if (convolve_info == (ConvolveInfo *) NULL) |
| 634 | return(MagickFalse); |
| 635 | pixels=AcquirePixelCachePixels(image,&length,exception); |
| 636 | if (pixels == (const void *) NULL) |
| 637 | { |
| 638 | (void) ThrowMagickException(exception,GetMagickModule(),CacheError, |
| 639 | "UnableToReadPixelCache","`%s'",image->filename); |
| 640 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 641 | return(MagickFalse); |
| 642 | } |
| 643 | convolve_pixels=GetPixelCachePixels(convolve_image,&length,exception); |
| 644 | if (convolve_pixels == (void *) NULL) |
| 645 | { |
| 646 | (void) ThrowMagickException(exception,GetMagickModule(),CacheError, |
| 647 | "UnableToReadPixelCache","`%s'",image->filename); |
| 648 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 649 | return(MagickFalse); |
| 650 | } |
| 651 | status=BindConvolveParameters(convolve_info,image,pixels,kernel->values, |
| 652 | kernel->width,kernel->height,convolve_pixels); |
| 653 | if (status == MagickFalse) |
| 654 | { |
| 655 | DestroyConvolveBuffers(convolve_info); |
| 656 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 657 | return(MagickFalse); |
| 658 | } |
| 659 | status=EnqueueConvolveKernel(convolve_info,image,pixels,kernel->values, |
| 660 | kernel->width,kernel->height,convolve_pixels); |
| 661 | if (status == MagickFalse) |
| 662 | { |
| 663 | DestroyConvolveBuffers(convolve_info); |
| 664 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 665 | return(MagickFalse); |
| 666 | } |
| 667 | DestroyConvolveBuffers(convolve_info); |
| 668 | convolve_info=DestroyConvolveInfo(convolve_info); |
| 669 | return(MagickTrue); |
| 670 | } |
| 671 | #endif |
cristy | 3f6d148 | 2010-01-20 21:01:21 +0000 | [diff] [blame] | 672 | } |