diff --git a/MagickCore/accelerate-private.h b/MagickCore/accelerate-private.h
index fda8472..f882701 100644
--- a/MagickCore/accelerate-private.h
+++ b/MagickCore/accelerate-private.h
@@ -153,6 +153,69 @@
   OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
 
   STRINGIFY(
+  typedef enum
+  {
+    UndefinedPixelIntensityMethod = 0,
+    AveragePixelIntensityMethod,
+    BrightnessPixelIntensityMethod,
+    LightnessPixelIntensityMethod,
+    Rec601LumaPixelIntensityMethod,
+    Rec601LuminancePixelIntensityMethod,
+    Rec709LumaPixelIntensityMethod,
+    Rec709LuminancePixelIntensityMethod,
+    RMSPixelIntensityMethod,
+    MSPixelIntensityMethod
+  } PixelIntensityMethod;
+  )
+
+  STRINGIFY(
+  typedef enum
+  {
+    UndefinedColorspace,
+    RGBColorspace,            /* Linear RGB colorspace */
+    GRAYColorspace,           /* greyscale (linear) image (faked 1 channel) */
+    TransparentColorspace,
+    OHTAColorspace,
+    LabColorspace,
+    XYZColorspace,
+    YCbCrColorspace,
+    YCCColorspace,
+    YIQColorspace,
+    YPbPrColorspace,
+    YUVColorspace,
+    CMYKColorspace,           /* negared linear RGB with black separated */
+    sRGBColorspace,           /* Default: non-lienar sRGB colorspace */
+    HSBColorspace,
+    HSLColorspace,
+    HWBColorspace,
+    Rec601LumaColorspace,
+    Rec601YCbCrColorspace,
+    Rec709LumaColorspace,
+    Rec709YCbCrColorspace,
+    LogColorspace,
+    CMYColorspace,            /* negated linear RGB colorspace */
+    LuvColorspace,
+    HCLColorspace,
+    LCHColorspace,            /* alias for LCHuv */
+    LMSColorspace,
+    LCHabColorspace,          /* Cylindrical (Polar) Lab */
+    LCHuvColorspace,          /* Cylindrical (Polar) Luv */
+    scRGBColorspace,
+    HSIColorspace,
+    HSVColorspace,            /* alias for HSB */
+    HCLpColorspace,
+    YDbDrColorspace
+  } ColorspaceType;
+  )
+
+  STRINGIFY(
+  inline float RoundToUnity(const float value)
+   {
+     return clamp(value,0.0f,1.0f);
+   }
+  )
+
+  STRINGIFY(
 
   inline CLQuantum getBlue(CLPixelType p)		    { return p.x; }
   inline void setBlue(CLPixelType* p, CLQuantum value)	    { (*p).x = value; }
@@ -174,20 +237,106 @@
   inline float getOpacityF4(float4 p)			    { return p.w; }
   inline void setOpacityF4(float4* p, float value)          { (*p).w = value; }
 
-  inline float GetPixelIntensity(int colorspace, CLPixelType p)
+  inline void setGray(CLPixelType* p, CLQuantum value)	    { (*p).z = value; (*p).y = value; (*p).x = value; }
+
+  inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p)
   {
-    // this is for default intensity and sRGB (not RGB) color space
     float red = getRed(p);
     float green = getGreen(p);
     float blue = getBlue(p);
 
-    if (colorspace == 0)
-      return 0.212656*red+0.715158*green+0.072186*blue;
-    else
+    float intensity;
+
+    if (colorspace == GRAYColorspace)
+      return red;
+
+    switch (method)
     {
-      // need encode gamma
+      case AveragePixelIntensityMethod:
+        {
+          intensity=(red+green+blue)/3.0;
+          break;
+        }
+      case BrightnessPixelIntensityMethod:
+        {
+          intensity=max(max(red,green),blue);
+          break;
+        }
+      case LightnessPixelIntensityMethod:
+        {
+          intensity=(min(min(red,green),blue)+
+              max(max(red,green),blue))/2.0;
+          break;
+        }
+      case MSPixelIntensityMethod:
+        {
+          intensity=(float) (((float) red*red+green*green+blue*blue)/
+              (3.0*QuantumRange));
+          break;
+        }
+      case Rec601LumaPixelIntensityMethod:
+        {
+          /*
+          if (image->colorspace == RGBColorspace)
+          {
+            red=EncodePixelGamma(red);
+            green=EncodePixelGamma(green);
+            blue=EncodePixelGamma(blue);
+          }
+          */
+          intensity=0.298839*red+0.586811*green+0.114350*blue;
+          break;
+        }
+      case Rec601LuminancePixelIntensityMethod:
+        {
+          /*
+          if (image->colorspace == sRGBColorspace)
+          {
+            red=DecodePixelGamma(red);
+            green=DecodePixelGamma(green);
+            blue=DecodePixelGamma(blue);
+          }
+          */
+          intensity=0.298839*red+0.586811*green+0.114350*blue;
+          break;
+        }
+      case Rec709LumaPixelIntensityMethod:
+      default:
+        {
+          /*
+          if (image->colorspace == RGBColorspace)
+          {
+            red=EncodePixelGamma(red);
+            green=EncodePixelGamma(green);
+            blue=EncodePixelGamma(blue);
+          }
+          */
+          intensity=0.212656*red+0.715158*green+0.072186*blue;
+          break;
+        }
+      case Rec709LuminancePixelIntensityMethod:
+        {
+          /*
+          if (image->colorspace == sRGBColorspace)
+          {
+            red=DecodePixelGamma(red);
+            green=DecodePixelGamma(green);
+            blue=DecodePixelGamma(blue);
+          }
+          */
+          intensity=0.212656*red+0.715158*green+0.072186*blue;
+          break;
+        }
+      case RMSPixelIntensityMethod:
+        {
+          intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
+              sqrt(3.0));
+          break;
+        }
     }
-    return 0.0;
+
+    return intensity; 
+ 
   }
   )
 
@@ -317,6 +466,7 @@
   STRINGIFY(
     __kernel 
     void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
+                  const uint imageWidth, const uint imageHeight,
                   __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
                   const uint matte, const ChannelType channel) {
 
@@ -324,9 +474,10 @@
       imageIndex.x = get_global_id(0);
       imageIndex.y = get_global_id(1);
 
+      /*
       unsigned int imageWidth = get_global_size(0);
       unsigned int imageHeight = get_global_size(1);
-
+      */
       if (imageIndex.x >= imageWidth
           || imageIndex.y >= imageHeight)
           return;
@@ -427,8 +578,8 @@
         case PolynomialFunction:
           {
             for (unsigned int i=0; i < number_parameters; i++)
-              result = result*QuantumScale*convert_float4(pixel) + parameters[i];
-            result *= QuantumRange;
+              result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
+            result *= (float4)QuantumRange;
             break;
           }
         case SinusoidFunction:
@@ -438,8 +589,14 @@
             phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
             ampl  = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
             bias  = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
-            result = QuantumRange*(ampl*sin(2.0f*MagickPI*
-              (freq*QuantumScale*convert_float4(pixel) + phase/360.0f)) + bias);
+            result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
+              (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias);
+            result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
+              (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias);
+            result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
+              (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias);
+            result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
+              (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias);
             break;
           }
         case ArcsinFunction:
@@ -449,18 +606,29 @@
             center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
             range  = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
             bias   = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
-            result = 2.0f/width*(QuantumScale*convert_float4(pixel) - center);
-            result = range/MagickPI*asin(result)+bias;
+
+            result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
+            result.x = range/MagickPI*asin(result.x)+bias;
             result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
             result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
+
+            result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
+            result.y = range/MagickPI*asin(result.y)+bias;
             result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
             result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
+
+            result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
+            result.z = range/MagickPI*asin(result.z)+bias;
             result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
             result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
+
+
+            result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
+            result.w = range/MagickPI*asin(result.w)+bias;
             result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
             result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
-      
-            result *= QuantumRange;
+
+            result *= (float4)QuantumRange;
             break;
           }
         case ArctanFunction:
@@ -470,8 +638,8 @@
             center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
             range  = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
             bias   = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
-            result = MagickPI*slope*(QuantumScale*convert_float4(pixel)-center);
-            result = QuantumRange*(range/MagickPI*atan(result) + bias);
+            result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
+            result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
             break;
           }
         case UndefinedFunction:
@@ -505,6 +673,73 @@
     STRINGIFY(
     /*
     */
+    __kernel void Stretch(__global CLPixelType * restrict im,
+      const ChannelType channel,  
+      __global CLPixelType * restrict stretch_map,
+      const float4 white, const float4 black)
+      {
+        const int x = get_global_id(0);  
+        const int y = get_global_id(1);  
+        const int columns = get_global_size(0);  
+        const int c = x + y * columns;
+
+        uint ePos;
+        CLPixelType oValue, eValue;
+        CLQuantum red, green, blue, opacity;
+
+        //read from global
+        oValue=im[c];
+
+        if ((channel & RedChannel) != 0)
+        {
+          if (getRedF4(white) != getRedF4(black))
+          {
+            ePos = ScaleQuantumToMap(getRed(oValue)); 
+            eValue = stretch_map[ePos];
+            red = getRed(eValue);
+          }
+        }
+
+        if ((channel & GreenChannel) != 0)
+        {
+          if (getGreenF4(white) != getGreenF4(black))
+          {
+            ePos = ScaleQuantumToMap(getGreen(oValue)); 
+            eValue = stretch_map[ePos];
+            green = getGreen(eValue);
+          }
+        }
+
+        if ((channel & BlueChannel) != 0)
+        {
+          if (getBlueF4(white) != getBlueF4(black))
+          {
+            ePos = ScaleQuantumToMap(getBlue(oValue)); 
+            eValue = stretch_map[ePos];
+            blue = getBlue(eValue);
+          }
+        }
+
+        if ((channel & OpacityChannel) != 0)
+        {
+          if (getOpacityF4(white) != getOpacityF4(black))
+          {
+            ePos = ScaleQuantumToMap(getOpacity(oValue)); 
+            eValue = stretch_map[ePos];
+            opacity = getOpacity(eValue);
+          }
+        }
+
+        //write back
+        im[c]=(CLPixelType)(blue, green, red, opacity);
+
+      }
+    )
+
+
+    STRINGIFY(
+    /*
+    */
     __kernel void Equalize(__global CLPixelType * restrict im,
       const ChannelType channel,  
       __global CLPixelType * restrict equalize_map,
@@ -555,7 +790,9 @@
     /*
     */
     __kernel void Histogram(__global CLPixelType * restrict im,
-      const ChannelType channel, const int colorspace,
+      const ChannelType channel, 
+      const int method,
+      const int colorspace,
       __global uint4 * restrict histogram)
       {
         const int x = get_global_id(0);  
@@ -564,7 +801,7 @@
         const int c = x + y * columns;
         if ((channel & SyncChannels) != 0)
         {
-          float intensity = GetPixelIntensity(colorspace,im[c]);
+          float intensity = GetPixelIntensity(method, colorspace,im[c]);
           uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
           atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
         }
@@ -1324,47 +1561,6 @@
   )
  
   STRINGIFY(
-  typedef enum
-  {
-    UndefinedColorspace,
-    RGBColorspace,            /* Linear RGB colorspace */
-    GRAYColorspace,           /* greyscale (linear) image (faked 1 channel) */
-    TransparentColorspace,
-    OHTAColorspace,
-    LabColorspace,
-    XYZColorspace,
-    YCbCrColorspace,
-    YCCColorspace,
-    YIQColorspace,
-    YPbPrColorspace,
-    YUVColorspace,
-    CMYKColorspace,           /* negared linear RGB with black separated */
-    sRGBColorspace,           /* Default: non-lienar sRGB colorspace */
-    HSBColorspace,
-    HSLColorspace,
-    HWBColorspace,
-    Rec601LumaColorspace,
-    Rec601YCbCrColorspace,
-    Rec709LumaColorspace,
-    Rec709YCbCrColorspace,
-    LogColorspace,
-    CMYColorspace,            /* negated linear RGB colorspace */
-    LuvColorspace,
-    HCLColorspace,
-    LCHColorspace,            /* alias for LCHuv */
-    LMSColorspace,
-    LCHabColorspace,          /* Cylindrical (Polar) Lab */
-    LCHuvColorspace,          /* Cylindrical (Polar) Luv */
-    scRGBColorspace,
-    HSIColorspace,
-    HSVColorspace,            /* alias for HSB */
-    HCLpColorspace,
-    YDbDrColorspace
-  } ColorspaceType;
-  )
-
-
-  STRINGIFY(
 
   inline float3 ConvertRGBToHSB(CLPixelType pixel) {
     float3 HueSaturationBrightness;
@@ -1385,8 +1581,8 @@
       HueSaturationBrightness.z=QuantumScale*tmax;
 
       if (delta != 0.0f) {
-	HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
-	HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
+  HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
+  HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
         HueSaturationBrightness.x/=6.0f;
         HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
       }
@@ -1421,18 +1617,18 @@
       float clamped_q = ClampToQuantum(QuantumRange*q);     
       int ih = (int)h;
       setRed(&rgb, (ih == 1)?clamped_q:
-	      (ih == 2 || ih == 3)?clamped_p:
-	      (ih == 4)?clamped_t:
+        (ih == 2 || ih == 3)?clamped_p:
+        (ih == 4)?clamped_t:
                  clampedBrightness);
  
       setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
-	      (ih == 3)?clamped_q:
-	      (ih == 4 || ih == 5)?clamped_p:
+        (ih == 3)?clamped_q:
+        (ih == 4 || ih == 5)?clamped_p:
                  clamped_t);
 
       setBlue(&rgb, (ih == 2)?clamped_t:
-	      (ih == 3 || ih == 4)?clampedBrightness:
-	      (ih == 5)?clamped_q:
+        (ih == 3 || ih == 4)?clampedBrightness:
+        (ih == 5)?clamped_q:
                  clamped_p);
     }
     return rgb;
@@ -1655,6 +1851,162 @@
   )
 
   STRINGIFY(
+  __kernel void Negate(__global CLPixelType *im, 
+    const ChannelType channel)
+  {
+
+    const int x = get_global_id(0);  
+    const int y = get_global_id(1);
+    const int columns = get_global_size(0);
+    const int c = x + y * columns;
+
+    CLPixelType pixel = im[c];
+
+    CLQuantum
+        blue,
+        green,
+        red;
+
+    red=getRed(pixel);
+    green=getGreen(pixel);
+    blue=getBlue(pixel);
+
+    CLPixelType filteredPixel;
+  
+    if ((channel & RedChannel) !=0)
+      setRed(&filteredPixel, QuantumRange-red);
+    if ((channel & GreenChannel) !=0)
+      setGreen(&filteredPixel, QuantumRange-green);
+    if ((channel & BlueChannel) !=0)
+      setBlue(&filteredPixel, QuantumRange-blue);
+
+    filteredPixel.w = pixel.w;
+
+    im[c] = filteredPixel;
+  }
+  )
+
+  STRINGIFY(
+  __kernel void Grayscale(__global CLPixelType *im, 
+    const int method, const int colorspace)
+  {
+
+    const int x = get_global_id(0);  
+    const int y = get_global_id(1);
+    const int columns = get_global_size(0);
+    const int c = x + y * columns;
+
+    CLPixelType pixel = im[c];
+
+    float
+        blue,
+        green,
+        intensity,
+        red;
+
+    red=(float)getRed(pixel);
+    green=(float)getGreen(pixel);
+    blue=(float)getBlue(pixel);
+
+    intensity=0.0;
+
+    CLPixelType filteredPixel;
+ 
+    switch (method)
+    {
+      case AveragePixelIntensityMethod:
+        {
+          intensity=(red+green+blue)/3.0;
+          break;
+        }
+      case BrightnessPixelIntensityMethod:
+        {
+          intensity=max(max(red,green),blue);
+          break;
+        }
+      case LightnessPixelIntensityMethod:
+        {
+          intensity=(min(min(red,green),blue)+
+              max(max(red,green),blue))/2.0;
+          break;
+        }
+      case MSPixelIntensityMethod:
+        {
+          intensity=(float) (((float) red*red+green*green+
+                blue*blue)/(3.0*QuantumRange));
+          break;
+        }
+      case Rec601LumaPixelIntensityMethod:
+        {
+          /*
+          if (colorspace == RGBColorspace)
+          {
+            red=EncodePixelGamma(red);
+            green=EncodePixelGamma(green);
+            blue=EncodePixelGamma(blue);
+          }
+          */
+          intensity=0.298839*red+0.586811*green+0.114350*blue;
+          break;
+        }
+      case Rec601LuminancePixelIntensityMethod:
+        {
+          /*
+          if (image->colorspace == sRGBColorspace)
+          {
+            red=DecodePixelGamma(red);
+            green=DecodePixelGamma(green);
+            blue=DecodePixelGamma(blue);
+          }
+          */
+          intensity=0.298839*red+0.586811*green+0.114350*blue;
+          break;
+        }
+      case Rec709LumaPixelIntensityMethod:
+      default:
+        {
+          /*
+          if (image->colorspace == RGBColorspace)
+          {
+            red=EncodePixelGamma(red);
+            green=EncodePixelGamma(green);
+            blue=EncodePixelGamma(blue);
+          }
+          */
+          intensity=0.212656*red+0.715158*green+0.072186*blue;
+          break;
+        }
+      case Rec709LuminancePixelIntensityMethod:
+        {
+          /*
+          if (image->colorspace == sRGBColorspace)
+          {
+            red=DecodePixelGamma(red);
+            green=DecodePixelGamma(green);
+            blue=DecodePixelGamma(blue);
+          }
+          */
+          intensity=0.212656*red+0.715158*green+0.072186*blue;
+          break;
+        }
+      case RMSPixelIntensityMethod:
+        {
+          intensity=(float) (sqrt((float) red*red+green*green+
+                blue*blue)/sqrt(3.0));
+          break;
+        }
+
+    }
+
+    setGray(&filteredPixel, ClampToQuantum(intensity));
+
+    filteredPixel.w = pixel.w;
+
+    im[c] = filteredPixel;
+  }
+  )
+
+  STRINGIFY(
   // Based on Box from resize.c
   float BoxResizeFilter(const float x)
   {
@@ -1881,7 +2233,7 @@
     const unsigned int actualNumPixelToCompute = stopX - startX;
 
     // calculate the range of input image pixels to cache
-    float scale = max(1.0/xFactor+MagickEpsilon ,1.0f);
+    float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f);
     const float support = max(scale*resizeFilterSupport,0.5f);
     scale = PerceptibleReciprocal(scale);
 
@@ -2074,7 +2426,7 @@
     const unsigned int actualNumPixelToCompute = stopY - startY;
 
     // calculate the range of input image pixels to cache
-    float scale = max(1.0/yFactor+MagickEpsilon ,1.0f);
+    float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f);
     const float support = max(scale*resizeFilterSupport,0.5f);
     scale = PerceptibleReciprocal(scale);
 
@@ -2249,6 +2601,18 @@
 
   STRINGIFY(
 
+  inline float GetPseudoRandomValue(uint4* seed, const float normalizeRand) {
+    uint4 s = *seed;
+    do {
+      unsigned int alpha = (unsigned int) (s.y ^ (s.y << 11));
+      s.y=s.z;
+      s.z=s.w;
+      s.w=s.x;
+      s.x = (s.x ^ (s.x >> 19)) ^ (alpha ^ (alpha >> 8));
+    } while (s.x == ~0UL);
+    *seed = s;
+    return (normalizeRand*s.x);
+  }
 
   __kernel void randomNumberGeneratorKernel(__global uint* seeds, const float normalizeRand
                                            , __global float* randomNumbers, const uint init
@@ -2317,7 +2681,7 @@
   } RandomNumbers;
 
 
-  float GetPseudoRandomValue(RandomNumbers* r) {
+  float ReadPseudoRandomValue(RandomNumbers* r) {
     float v = *r->rns;
     r->rns++;
     return v;
@@ -2343,7 +2707,7 @@
       sigma;
 
     noise = 0.0f;
-    alpha=GetPseudoRandomValue(r);
+    alpha=ReadPseudoRandomValue(r);
     switch(noise_type) {
     case UniformNoise:
     default:
@@ -2359,7 +2723,7 @@
 
         if (alpha == 0.0f)
           alpha=1.0f;
-        beta=GetPseudoRandomValue(r);
+        beta=ReadPseudoRandomValue(r);
         gamma=sqrt(-2.0f*log(alpha));
         sigma=gamma*cospi((2.0f*beta));
         tau=gamma*sinpi((2.0f*beta));
@@ -2403,7 +2767,7 @@
       sigma=1.0f;
       if (alpha > MagickEpsilon)
         sigma=sqrt(-2.0f*log(alpha));
-      beta=GetPseudoRandomValue(r);
+      beta=ReadPseudoRandomValue(r);
       noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
         cospi((float) (2.0f*beta))/2.0f);
       break;
@@ -2416,7 +2780,7 @@
       poisson=exp(-SigmaPoisson*QuantumScale*pixel);
       for (i=0; alpha > poisson; i++)
       {
-        beta=GetPseudoRandomValue(r);
+        beta=ReadPseudoRandomValue(r);
         alpha*=beta;
       }
       noise=(float) (QuantumRange*i/SigmaPoisson);
@@ -2468,11 +2832,382 @@
   }
 
   )
+
+  STRINGIFY(
+  __kernel 
+  void RandomImage(__global CLPixelType* inputImage,
+                   const uint imageColumns, const uint imageRows,
+                   __global uint* seeds,
+                   const float randNormNumerator,
+                   const uint randNormDenominator) {
+
+    unsigned int numGenerators = get_global_size(0);
+    unsigned numRandPixelsPerWorkItem = ((imageColumns*imageRows) + (numGenerators-1))
+                                        / numGenerators;
+
+    uint4 s;
+    s.x = seeds[get_global_id(0)*4];
+    s.y = seeds[get_global_id(0)*4+1];
+    s.z = seeds[get_global_id(0)*4+2];
+    s.w = seeds[get_global_id(0)*4+3];
+
+    unsigned int offset = get_group_id(0) * get_local_size(0) * numRandPixelsPerWorkItem;
+    for (unsigned int n = 0; n < numRandPixelsPerWorkItem; n++)
+    {
+      int i = offset + n*get_local_size(0) + get_local_id(0);
+      if (i >= imageColumns*imageRows)
+        break;
+
+      float rand = GetPseudoRandomValue(&s,randNormNumerator/randNormDenominator);
+      CLQuantum v = ClampToQuantum(QuantumRange*rand);
+
+      CLPixelType p;
+      setRed(&p,v);
+      setGreen(&p,v);
+      setBlue(&p,v);
+      setOpacity(&p,0);
+
+      inputImage[i] = p;
+    }
+
+    seeds[get_global_id(0)*4]   = s.x;
+    seeds[get_global_id(0)*4+1] = s.y;
+    seeds[get_global_id(0)*4+2] = s.z;
+    seeds[get_global_id(0)*4+3] = s.w;
+  }
+  )
+
+  STRINGIFY(
+    __kernel 
+    void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
+                    const unsigned int imageWidth, const unsigned int imageHeight,
+                    const __global float *filter, const unsigned int width, const __global int2* offset,
+                    const float4 bias,
+                    const ChannelType channel, const unsigned int matte) {
+
+      int2 currentPixel;
+      currentPixel.x = get_global_id(0);
+      currentPixel.y = get_global_id(1);
+
+      if (currentPixel.x >= imageWidth
+          || currentPixel.y >= imageHeight)
+          return;
+
+      float4 pixel;
+      pixel.x = (float)bias.x;
+      pixel.y = (float)bias.y;
+      pixel.z = (float)bias.z;
+      pixel.w = (float)bias.w;
+
+      if (((channel & OpacityChannel) == 0) || (matte == 0)) {
+        
+        for (int i = 0; i < width; i++) {
+          // only support EdgeVirtualPixelMethod through ClampToCanvas
+          // TODO: implement other virtual pixel method
+          int2 samplePixel = currentPixel + offset[i];
+          samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
+          samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
+          CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
+
+          pixel.x += (filter[i] * (float)samplePixelValue.x);
+          pixel.y += (filter[i] * (float)samplePixelValue.y);
+          pixel.z += (filter[i] * (float)samplePixelValue.z);
+          pixel.w += (filter[i] * (float)samplePixelValue.w);
+        }
+
+        CLPixelType outputPixel;
+        outputPixel.x = ClampToQuantum(pixel.x);
+        outputPixel.y = ClampToQuantum(pixel.y);
+        outputPixel.z = ClampToQuantum(pixel.z);
+        outputPixel.w = ClampToQuantum(pixel.w);
+        output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
+      }
+      else {
+
+        float gamma = 0.0f;
+        for (int i = 0; i < width; i++) {
+          // only support EdgeVirtualPixelMethod through ClampToCanvas
+          // TODO: implement other virtual pixel method
+          int2 samplePixel = currentPixel + offset[i];
+          samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
+          samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
+
+          CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
+
+          float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
+          float k = filter[i];
+          pixel.x = pixel.x + k * alpha * samplePixelValue.x;
+          pixel.y = pixel.y + k * alpha * samplePixelValue.y;
+          pixel.z = pixel.z + k * alpha * samplePixelValue.z;
+
+          pixel.w += k * alpha * samplePixelValue.w;
+
+          gamma+=k*alpha;
+        }
+        gamma = PerceptibleReciprocal(gamma);
+        pixel.xyz = gamma*pixel.xyz;
+
+        CLPixelType outputPixel;
+        outputPixel.x = ClampToQuantum(pixel.x);
+        outputPixel.y = ClampToQuantum(pixel.y);
+        outputPixel.z = ClampToQuantum(pixel.z);
+        outputPixel.w = ClampToQuantum(pixel.w);
+        output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
+      }
+    }
+  )
+
+  STRINGIFY(
+    typedef enum
+    {
+      UndefinedCompositeOp,
+      NoCompositeOp,
+      ModulusAddCompositeOp,
+      AtopCompositeOp,
+      BlendCompositeOp,
+      BumpmapCompositeOp,
+      ChangeMaskCompositeOp,
+      ClearCompositeOp,
+      ColorBurnCompositeOp,
+      ColorDodgeCompositeOp,
+      ColorizeCompositeOp,
+      CopyBlackCompositeOp,
+      CopyBlueCompositeOp,
+      CopyCompositeOp,
+      CopyCyanCompositeOp,
+      CopyGreenCompositeOp,
+      CopyMagentaCompositeOp,
+      CopyOpacityCompositeOp,
+      CopyRedCompositeOp,
+      CopyYellowCompositeOp,
+      DarkenCompositeOp,
+      DstAtopCompositeOp,
+      DstCompositeOp,
+      DstInCompositeOp,
+      DstOutCompositeOp,
+      DstOverCompositeOp,
+      DifferenceCompositeOp,
+      DisplaceCompositeOp,
+      DissolveCompositeOp,
+      ExclusionCompositeOp,
+      HardLightCompositeOp,
+      HueCompositeOp,
+      InCompositeOp,
+      LightenCompositeOp,
+      LinearLightCompositeOp,
+      LuminizeCompositeOp,
+      MinusDstCompositeOp,
+      ModulateCompositeOp,
+      MultiplyCompositeOp,
+      OutCompositeOp,
+      OverCompositeOp,
+      OverlayCompositeOp,
+      PlusCompositeOp,
+      ReplaceCompositeOp,
+      SaturateCompositeOp,
+      ScreenCompositeOp,
+      SoftLightCompositeOp,
+      SrcAtopCompositeOp,
+      SrcCompositeOp,
+      SrcInCompositeOp,
+      SrcOutCompositeOp,
+      SrcOverCompositeOp,
+      ModulusSubtractCompositeOp,
+      ThresholdCompositeOp,
+      XorCompositeOp,
+      /* These are new operators, added after the above was last sorted.
+       * The list should be re-sorted only when a new library version is
+       * created.
+       */
+      DivideDstCompositeOp,
+      DistortCompositeOp,
+      BlurCompositeOp,
+      PegtopLightCompositeOp,
+      VividLightCompositeOp,
+      PinLightCompositeOp,
+      LinearDodgeCompositeOp,
+      LinearBurnCompositeOp,
+      MathematicsCompositeOp,
+      DivideSrcCompositeOp,
+      MinusSrcCompositeOp,
+      DarkenIntensityCompositeOp,
+      LightenIntensityCompositeOp
+    } CompositeOperator;
+  )
+
+  STRINGIFY(
+    inline float ColorDodge(const float Sca,
+      const float Sa,const float Dca,const float Da)
+    {
+      /*
+        Oct 2004 SVG specification.
+      */
+      if ((Sca*Da+Dca*Sa) >= Sa*Da)
+        return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
+      return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
+
+
+      /*
+        New specification, March 2009 SVG specification.  This specification was
+        also wrong of non-overlap cases.
+      */
+      /*
+      if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
+        return(Sca*(1.0-Da));
+      if (fabs(Sca-Sa) < MagickEpsilon)
+        return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
+      return(Sa*MagickMin(Da,Dca*Sa/(Sa-Sca)));
+      */
+
+      /*
+        Working from first principles using the original formula:
+
+           f(Sc,Dc) = Dc/(1-Sc)
+
+        This works correctly! Looks like the 2004 model was right but just
+        required a extra condition for correct handling.
+      */
+
+      /*
+      if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
+        return(Sca*(1.0-Da)+Dca*(1.0-Sa));
+      if (fabs(Sca-Sa) < MagickEpsilon)
+        return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
+      return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
+      */
+    }
+
+    inline void CompositeColorDodge(const float4 *p,
+      const float4 *q,float4 *composite) {
+
+      float 
+      Da,
+      gamma,
+      Sa;
+
+      Sa=1.0f-QuantumScale*getOpacityF4(*p);  /* simplify and speed up equations */
+      Da=1.0f-QuantumScale*getOpacityF4(*q);
+      gamma=RoundToUnity(Sa+Da-Sa*Da); /* over blend, as per SVG doc */
+      setOpacityF4(composite, QuantumRange*(1.0-gamma));
+      gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
+      setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
+        getRedF4(*q)*Da,Da));
+      setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
+        getGreenF4(*q)*Da,Da));
+      setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
+        getBlueF4(*q)*Da,Da));
+    }
+  )
+
+  STRINGIFY(
+    inline void MagickPixelCompositePlus(const float4 *p,
+      const float alpha,const float4 *q,
+      const float beta,float4 *composite)
+    {
+      float 
+        gamma;
+
+      float
+        Da,
+        Sa;
+      /*
+        Add two pixels with the given opacities.
+      */
+      Sa=1.0-QuantumScale*alpha;
+      Da=1.0-QuantumScale*beta;
+      gamma=RoundToUnity(Sa+Da);  /* 'Plus' blending -- not 'Over' blending */
+      setOpacityF4(composite,(float) QuantumRange*(1.0-gamma));
+      gamma=PerceptibleReciprocal(gamma);
+      setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
+      setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
+      setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
+    }
+  )
+
+  STRINGIFY(
+    inline void MagickPixelCompositeBlend(const float4 *p,
+      const float alpha,const float4 *q,
+      const float beta,float4 *composite)
+    {
+      MagickPixelCompositePlus(p,(float) (QuantumRange-alpha*
+      (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta*
+      (QuantumRange-getOpacityF4(*q))),composite);
+    }
+  )
+  
+  STRINGIFY(
+    __kernel 
+    void Composite(__global CLPixelType *image,
+                   const unsigned int imageWidth, 
+                   const unsigned int imageHeight,
+                   const __global CLPixelType *compositeImage,
+                   const unsigned int compositeWidth, 
+                   const unsigned int compositeHeight,
+                   const unsigned int compose,
+                   const ChannelType channel, 
+                   const unsigned int matte,
+                   const float destination_dissolve,
+                   const float source_dissolve) {
+
+      uint2 index;
+      index.x = get_global_id(0);
+      index.y = get_global_id(1);
+
+
+      if (index.x >= imageWidth
+        || index.y >= imageHeight) {
+          return;
+      }
+      const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
+      float4 destination;
+      setRedF4(&destination,getRed(inputPixel));
+      setGreenF4(&destination,getGreen(inputPixel));
+      setBlueF4(&destination,getBlue(inputPixel));
+
+      
+      const CLPixelType compositePixel 
+        = compositeImage[index.y*imageWidth+index.x];
+      float4 source;
+      setRedF4(&source,getRed(compositePixel));
+      setGreenF4(&source,getGreen(compositePixel));
+      setBlueF4(&source,getBlue(compositePixel));
+
+      if (matte != 0) {
+        setOpacityF4(&destination,getOpacity(inputPixel));
+        setOpacityF4(&source,getOpacity(compositePixel));
+      }
+      else {
+        setOpacityF4(&destination,0.0f);
+        setOpacityF4(&source,0.0f);
+      }
+
+      float4 composite=destination;
+
+      CompositeOperator op = (CompositeOperator)compose;
+      switch (op) {
+      case ColorDodgeCompositeOp:
+        CompositeColorDodge(&source,&destination,&composite);
+        break;
+      case BlendCompositeOp:
+        MagickPixelCompositeBlend(&source,source_dissolve,&destination,
+            destination_dissolve,&composite);
+        break;
+      default:
+        // unsupported operators
+        break;
+      };
+
+      CLPixelType outputPixel;
+      setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
+      setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
+      setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
+      setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
+      image[index.y*imageWidth+index.x] = outputPixel;
+    }
+  )   
+    
   ;
 
-
-
-
 #endif // MAGICKCORE_OPENCL_SUPPORT
 
 #if defined(__cplusplus) || defined(c_plusplus)
diff --git a/MagickCore/accelerate.c b/MagickCore/accelerate.c
index 34806e2..ce6e4f8 100644
--- a/MagickCore/accelerate.c
+++ b/MagickCore/accelerate.c
@@ -89,6 +89,15 @@
 #define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0)
 /*#define ALIGNED(pointer,type) (0) */
 
+/* pad the global workgroup size to the next multiple of 
+   the local workgroup size */
+inline static unsigned int 
+  padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,
+                                             const unsigned int localGroupSize) 
+{
+  return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
+}
+
 static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
 {
   MagickBooleanType flag;
@@ -122,7 +131,8 @@
 {
   /* check if the image's colorspace is supported */
   if (image->colorspace != RGBColorspace
-    && image->colorspace != sRGBColorspace)
+    && image->colorspace != sRGBColorspace
+    && image->colorspace != GRAYColorspace)
     return MagickFalse;
   
   /* check if the channel is supported */
@@ -142,6 +152,23 @@
   return MagickTrue;
 }
 
+static MagickBooleanType checkHistogramCondition(Image *image, const ChannelType channel)
+{
+
+  /* ensure this is the only pass get in for now. */
+  if ((channel & SyncChannels) == 0)
+    return MagickFalse;
+
+  if (image->intensity == Rec601LuminancePixelIntensityMethod ||
+    image->intensity == Rec709LuminancePixelIntensityMethod)
+    return MagickFalse;
+
+  if (image->colorspace != sRGBColorspace)
+    return MagickFalse;
+
+  return MagickTrue;
+}
+
 
 static Image* ComputeConvolveImage(const Image* inputImage, const ChannelType channel, const KernelInfo *kernel, ExceptionInfo *exception)
 {
@@ -149,8 +176,8 @@
   MagickCLEnv clEnv;
 
   cl_int clStatus;
-  size_t global_work_size[2];
-  size_t localGroupSize[2];
+  size_t global_work_size[3];
+  size_t localGroupSize[3];
   size_t localMemoryRequirement;
   Image* filteredImage;
   MagickSizeType length;
@@ -161,13 +188,14 @@
   unsigned kernelSize;
   unsigned int i;
   void *hostPtr;
-  unsigned int matte, filterWidth, filterHeight, imageWidth, imageHeight;
+  unsigned int matte,
+    filterWidth, filterHeight, 
+    imageWidth, imageHeight;
 
   cl_context context;
   cl_kernel clkernel;
   cl_mem inputImageBuffer, filteredImageBuffer, convolutionKernel;
   cl_ulong deviceLocalMemorySize;
-  cl_device_id device;
 
   cl_command_queue queue;
 
@@ -178,7 +206,6 @@
   convolutionKernel = NULL;
   clkernel = NULL;
   queue = NULL;
-  device = NULL;
 
   filteredImage = NULL;
   outputReady = MagickFalse;
@@ -209,16 +236,16 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
   filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
   assert(filteredImage != NULL);
-  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+  if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
     goto cleanup;
@@ -242,41 +269,43 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
   kernelSize = kernel->width * kernel->height;
-  convolutionKernel = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
+  convolutionKernel = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
   queue = AcquireOpenCLCommandQueue(clEnv);
 
-  kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
+  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
           , 0, NULL, NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
     goto cleanup;
   }
   for (i = 0; i < kernelSize; i++)
   {
     kernelBufferPtr[i] = (float) kernel->values[i];
   }
-  clStatus = clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
- if (clStatus != CL_SUCCESS)
+  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
+  if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
+
+  deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
 
   /* Compute the local memory requirement for a 16x16 workgroup.
      If it's larger than 16k, reduce the workgroup size to 8x8 */
@@ -284,19 +313,14 @@
   localGroupSize[1] = 16;
   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
     + kernel->width*kernel->height*sizeof(float);
-  if (localMemoryRequirement > 16384)
+
+  if (localMemoryRequirement > deviceLocalMemorySize)
   {
-
-
     localGroupSize[0] = 8;
     localGroupSize[1] = 8;
-
     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
       + kernel->width*kernel->height*sizeof(float);
   }
-
-  GetMagickOpenCLEnvParam(clEnv, MAGICK_OPENCL_ENV_PARAM_DEVICE, sizeof(cl_device_id), &device, exception);
-  clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &deviceLocalMemorySize, NULL);
   if (localMemoryRequirement <= deviceLocalMemorySize) 
   {
     /* get the OpenCL kernel */
@@ -309,25 +333,25 @@
 
     /* set the kernel arguments */
     i = 0;
-    clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
     imageWidth = inputImage->columns;
     imageHeight = inputImage->rows;
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
     filterWidth = kernel->width;
     filterHeight = kernel->height;
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
-    matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
-    clStatus|=clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
-    clStatus|=clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
+    matte = (inputImage->matte==MagickTrue)?1:0;
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
       goto cleanup;
     }
 
@@ -336,10 +360,10 @@
     global_work_size[1] = ((inputImage->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
 
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }
   }
@@ -355,44 +379,49 @@
 
     /* set the kernel arguments */
     i = 0;
-    clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
+    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+    imageWidth = inputImage->columns;
+    imageHeight = inputImage->rows;
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
     filterWidth = kernel->width;
     filterHeight = kernel->height;
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
-    matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
-    clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
+    matte = (inputImage->matte==MagickTrue)?1:0;
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
+    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
       goto cleanup;
     }
 
-    global_work_size[0] = inputImage->columns;
-    global_work_size[1] = inputImage->rows;
-
-    /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    localGroupSize[0] = 8;
+    localGroupSize[1] = 8;
+    global_work_size[0] = (inputImage->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
+    global_work_size[1] = (inputImage->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
+    
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
 
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -407,13 +436,13 @@
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
   if (inputImageBuffer != NULL)
-    clReleaseMemObject(inputImageBuffer);
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
 
   if (filteredImageBuffer != NULL)
-    clReleaseMemObject(filteredImageBuffer);
+    clEnv->library->clReleaseMemObject(filteredImageBuffer);
 
   if (convolutionKernel != NULL)
-    clReleaseMemObject(convolutionKernel);
+    clEnv->library->clReleaseMemObject(convolutionKernel);
 
   if (clkernel != NULL)
     RelinquishOpenCLKernel(clEnv, clkernel);
@@ -539,40 +568,40 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = image->columns * image->rows;
-  imageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
+  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
-  parametersBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
+  parametersBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
   queue = AcquireOpenCLCommandQueue(clEnv);
 
-  parametersBufferPtr = (float*)clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
+  parametersBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
                 , 0, NULL, NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
     goto cleanup;
   }
   for (i = 0; i < number_parameters; i++)
   {
     parametersBufferPtr[i] = (float)parameters[i];
   }
-  clStatus = clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
 
   clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage");
   if (clkernel == NULL)
@@ -583,38 +612,38 @@
 
   /* set the kernel arguments */
   i = 0;
-  clStatus =clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
-  clStatus|=clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
-  clStatus|=clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
-  clStatus|=clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
-  clStatus|=clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
+  clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
+  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
+  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
+  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
   globalWorkSize[0] = image->columns;
   globalWorkSize[1] = image->rows;
   /* launch the kernel */
-  clStatus = clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
 
 
   if (ALIGNED(pixels,CLPixelPacket)) 
   {
     length = image->columns * image->rows;
-    clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = image->columns * image->rows;
-    clStatus = clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -628,8 +657,8 @@
   
   if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
   if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
-  if (imageBuffer != NULL) clReleaseMemObject(imageBuffer);
-  if (parametersBuffer != NULL) clReleaseMemObject(parametersBuffer);
+  if (imageBuffer != NULL) clEnv->library->clReleaseMemObject(imageBuffer);
+  if (parametersBuffer != NULL) clEnv->library->clReleaseMemObject(parametersBuffer);
 
   return status;
 }
@@ -749,10 +778,10 @@
     }
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+    inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -761,7 +790,7 @@
   {
     filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
     assert(filteredImage != NULL);
-    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+    if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
     {
       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
       goto cleanup;
@@ -785,10 +814,10 @@
     }
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -803,16 +832,16 @@
       goto cleanup;
     }
 
-    imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
+    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
-    kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
+    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
       goto cleanup;
     }
 
@@ -821,10 +850,10 @@
       kernelBufferPtr[i] = (float) kernel->values[i];
     }
 
-    clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
       goto cleanup;
     }
   }
@@ -834,10 +863,10 @@
     /* create temp buffer */
     {
       length = inputImage->columns * inputImage->rows;
-      tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
+      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
         goto cleanup;
       }
     }
@@ -869,18 +898,18 @@
 
         /* set the kernel arguments */
         i = 0;
-        clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+        clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
         kernelWidth = kernel->width;
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
           goto cleanup;
         }
       }
@@ -895,13 +924,13 @@
         wsize[0] = chunkSize;
         wsize[1] = 1;
 
-        clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
           goto cleanup;
         }
-        clFlush(queue);
+        clEnv->library->clFlush(queue);
       }
     }
 
@@ -915,18 +944,18 @@
 
         /* set the kernel arguments */
         i = 0;
-        clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-        clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-        clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
-        clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+        clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
+        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
         kernelWidth = kernel->width;
-        clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-        clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-        clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-        clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
+        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
           goto cleanup;
         }
       }
@@ -941,13 +970,13 @@
         wsize[0] = 1;
         wsize[1] = chunkSize;
 
-        clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
           goto cleanup;
         }
-        clFlush(queue);
+        clEnv->library->clFlush(queue);
       }
     }
 
@@ -957,12 +986,12 @@
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -975,10 +1004,10 @@
 cleanup:
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
-  if (inputImageBuffer!=NULL)     clReleaseMemObject(inputImageBuffer);
-  if (tempImageBuffer!=NULL)      clReleaseMemObject(tempImageBuffer);
-  if (filteredImageBuffer!=NULL)  clReleaseMemObject(filteredImageBuffer);
-  if (imageKernelBuffer!=NULL)    clReleaseMemObject(imageKernelBuffer);
+  if (inputImageBuffer!=NULL)     clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
+  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
   if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
   if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
   if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
@@ -1060,10 +1089,10 @@
     }
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+    inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -1072,7 +1101,7 @@
   {
     filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
     assert(filteredImage != NULL);
-    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+    if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
     {
       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
       goto cleanup;
@@ -1096,10 +1125,10 @@
     }
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -1114,16 +1143,16 @@
       goto cleanup;
     }
 
-    imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
+    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
-    kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
+    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
       goto cleanup;
     }
 
@@ -1132,10 +1161,10 @@
       kernelBufferPtr[i] = (float) kernel->values[i];
     }
 
-    clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
       goto cleanup;
     }
   }
@@ -1147,10 +1176,10 @@
     /* create temp buffer */
     {
       length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
-      tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
+      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
         goto cleanup;
       }
     }
@@ -1191,19 +1220,19 @@
 
           /* set the kernel arguments */
           i = 0;
-          clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-          clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
+          clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
+          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
           if (clStatus != CL_SUCCESS)
           {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
             goto cleanup;
           }
         }
@@ -1218,13 +1247,13 @@
           wsize[0] = chunkSize;
           wsize[1] = 1;
 
-          clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+          clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
           if (clStatus != CL_SUCCESS)
           {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
             goto cleanup;
           }
-          clFlush(queue);
+          clEnv->library->clFlush(queue);
         }
       }
 
@@ -1245,19 +1274,19 @@
 
           /* set the kernel arguments */
           i = 0;
-          clStatus=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-          clStatus|=clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
+          clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
+          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
           if (clStatus != CL_SUCCESS)
           {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
             goto cleanup;
           }
         }
@@ -1272,13 +1301,13 @@
           wsize[0] = 1;
           wsize[1] = chunkSize;
 
-          clStatus = clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+          clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
           if (clStatus != CL_SUCCESS)
           {
-            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
             goto cleanup;
           }
-          clFlush(queue);
+          clEnv->library->clFlush(queue);
         }
       }
     }
@@ -1289,12 +1318,12 @@
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -1307,10 +1336,10 @@
 cleanup:
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
-  if (inputImageBuffer!=NULL)     clReleaseMemObject(inputImageBuffer);
-  if (tempImageBuffer!=NULL)      clReleaseMemObject(tempImageBuffer);
-  if (filteredImageBuffer!=NULL)  clReleaseMemObject(filteredImageBuffer);
-  if (imageKernelBuffer!=NULL)    clReleaseMemObject(imageKernelBuffer);
+  if (inputImageBuffer!=NULL)     clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
+  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
   if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
   if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
   if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
@@ -1413,7 +1442,7 @@
   float* cosThetaPtr;
   MagickSizeType length;
   unsigned int matte;
-  PixelInfo bias;
+  MagickPixelPacket bias;
   cl_float4 biasPixel;
   cl_float2 blurCenter;
   float blurRadius;
@@ -1460,17 +1489,17 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
 
   filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
   assert(filteredImage != NULL);
-  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+  if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
     goto cleanup;
@@ -1494,10 +1523,10 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
@@ -1507,29 +1536,29 @@
   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
 
   /* create a buffer for sin_theta and cos_theta */
-  sinThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
+  sinThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
-  cosThetaBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
+  cosThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
 
   queue = AcquireOpenCLCommandQueue(clEnv);
-  sinThetaPtr = (float*) clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
+  sinThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
     goto cleanup;
   }
 
-  cosThetaPtr = (float*) clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
+  cosThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
@@ -1544,11 +1573,11 @@
     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
   }
  
-  clStatus = clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
-  clStatus |= clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
+  clStatus |= clEnv->library->clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -1563,28 +1592,28 @@
   
   /* set the kernel arguments */
   i = 0;
-  clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-  clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+  clStatus=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
 
-  GetPixelInfo(inputImage,&bias);
+  GetMagickPixelPacket(inputImage,&bias);
   biasPixel.s[0] = bias.red;
   biasPixel.s[1] = bias.green;
   biasPixel.s[2] = bias.blue;
-  biasPixel.s[3] = bias.alpha;
-  clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
-  clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
+  biasPixel.s[3] = bias.opacity;
+  clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float4), &biasPixel);
+  clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(ChannelType), &channel);
 
-  matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
-  clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
+  matte = (inputImage->matte != MagickFalse)?1:0;
+  clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &matte);
 
-  clStatus=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
+  clStatus=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_float2), &blurCenter);
 
-  clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
-  clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
-  clStatus|=clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
+  clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(radialBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -1592,23 +1621,23 @@
   global_work_size[0] = inputImage->columns;
   global_work_size[1] = inputImage->rows;
   /* launch the kernel */
-  clStatus = clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, radialBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
 
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -1620,10 +1649,10 @@
 cleanup:
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
-  if (filteredImageBuffer!=NULL)  clReleaseMemObject(filteredImageBuffer);
-  if (inputImageBuffer!=NULL)     clReleaseMemObject(inputImageBuffer);
-  if (sinThetaBuffer!=NULL)       clReleaseMemObject(sinThetaBuffer);
-  if (cosThetaBuffer!=NULL)       clReleaseMemObject(cosThetaBuffer);
+  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (inputImageBuffer!=NULL)     clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (sinThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(sinThetaBuffer);
+  if (cosThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(cosThetaBuffer);
   if (radialBlurKernel!=NULL)     RelinquishOpenCLKernel(clEnv, radialBlurKernel);
   if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
   if (outputReady == MagickFalse)
@@ -1757,10 +1786,10 @@
     }
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+    inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -1769,7 +1798,7 @@
   {
     filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
     assert(filteredImage != NULL);
-    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+    if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
     {
       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
       goto cleanup;
@@ -1794,10 +1823,10 @@
 
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -1812,28 +1841,28 @@
       goto cleanup;
     }
 
-    imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
+    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
 
 
-    kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
+    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
       goto cleanup;
     }
     for (i = 0; i < kernel->width; i++)
     {
       kernelBufferPtr[i] = (float) kernel->values[i];
     }
-    clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
       goto cleanup;
     }
   }
@@ -1842,10 +1871,10 @@
     /* create temp buffer */
     {
       length = inputImage->columns * inputImage->rows;
-      tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
+      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
         goto cleanup;
       }
     }
@@ -1877,17 +1906,17 @@
 
       /* set the kernel arguments */
       i = 0;
-      clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-      clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-      clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
-      clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-      clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-      clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-      clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-      clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
+      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
         goto cleanup;
       }
     }
@@ -1902,13 +1931,13 @@
       wsize[0] = chunkSize;
       wsize[1] = 1;
 
-      clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+      clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
         goto cleanup;
       }
-      clFlush(queue);
+      clEnv->library->clFlush(queue);
     }
 
 
@@ -1921,22 +1950,22 @@
       fThreshold = (float)threshold;
 
       i = 0;
-      clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
-      clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
+      clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
+      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
 
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
         goto cleanup;
       }
     }
@@ -1951,13 +1980,13 @@
       wsize[0] = 1;
       wsize[1] = chunkSize;
 
-      clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+      clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
         goto cleanup;
       }
-      clFlush(queue);
+      clEnv->library->clFlush(queue);
     }
 
   }
@@ -1966,12 +1995,12 @@
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -1985,10 +2014,10 @@
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
   if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
-  if (inputImageBuffer!=NULL)		      clReleaseMemObject(inputImageBuffer);
-  if (filteredImageBuffer!=NULL)              clReleaseMemObject(filteredImageBuffer);
-  if (tempImageBuffer!=NULL)                  clReleaseMemObject(tempImageBuffer);
-  if (imageKernelBuffer!=NULL)                clReleaseMemObject(imageKernelBuffer);
+  if (inputImageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
+  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
   if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
   if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
@@ -2066,10 +2095,10 @@
     }
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+    inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -2078,7 +2107,7 @@
   {
     filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
     assert(filteredImage != NULL);
-    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+    if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
     {
       (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
       goto cleanup;
@@ -2103,10 +2132,10 @@
 
     /* create a CL buffer from image pixel buffer */
     length = inputImage->columns * inputImage->rows;
-    filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
@@ -2121,28 +2150,28 @@
       goto cleanup;
     }
 
-    imageKernelBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
+    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
 
 
-    kernelBufferPtr = (float*)clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
+    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
       goto cleanup;
     }
     for (i = 0; i < kernel->width; i++)
     {
       kernelBufferPtr[i] = (float) kernel->values[i];
     }
-    clStatus = clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
       goto cleanup;
     }
   }
@@ -2154,10 +2183,10 @@
     /* create temp buffer */
     {
       length = inputImage->columns * (inputImage->rows / 2 + 1 + (kernel->width-1) / 2);
-      tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
+      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
       if (clStatus != CL_SUCCESS)
       {
-        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
         goto cleanup;
       }
     }
@@ -2196,19 +2225,19 @@
 
         /* set the kernel arguments */
         i = 0;
-        clStatus=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-        clStatus|=clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
+        clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
+        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
           goto cleanup;
         }
       }
@@ -2222,13 +2251,13 @@
         wsize[0] = chunkSize;
         wsize[1] = 1;
 
-        clStatus = clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
           goto cleanup;
         }
-        clFlush(queue);
+        clEnv->library->clFlush(queue);
       }
 
 
@@ -2249,24 +2278,24 @@
         fThreshold = (float)threshold;
 
         i = 0;
-        clStatus=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
-        clStatus|=clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
+        clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
+        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
 
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
           goto cleanup;
         }
       }
@@ -2281,13 +2310,13 @@
         wsize[0] = 1;
         wsize[1] = chunkSize;
 
-        clStatus = clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
+        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
         if (clStatus != CL_SUCCESS)
         {
-          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
           goto cleanup;
         }
-        clFlush(queue);
+        clEnv->library->clFlush(queue);
       }
     }
   }
@@ -2296,12 +2325,12 @@
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -2315,10 +2344,10 @@
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
   if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
-  if (inputImageBuffer!=NULL)		      clReleaseMemObject(inputImageBuffer);
-  if (filteredImageBuffer!=NULL)              clReleaseMemObject(filteredImageBuffer);
-  if (tempImageBuffer!=NULL)                  clReleaseMemObject(tempImageBuffer);
-  if (imageKernelBuffer!=NULL)                clReleaseMemObject(imageKernelBuffer);
+  if (inputImageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
+  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
   if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
   if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
@@ -2524,46 +2553,46 @@
   }
 
   i = 0;
-  clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
+  clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
 
   resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
 
   resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
 
   resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
 
   resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
 
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
   
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
 
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -2572,13 +2601,13 @@
 
   local_work_size[0] = workgroupSize;
   local_work_size[1] = 1;
-  clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
   status = MagickTrue;
 
 
@@ -2704,46 +2733,46 @@
   }
 
   i = 0;
-  clStatus = clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
+  clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&inputImage);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageColumns);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&inputImageRows);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
 
   resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
 
   resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
 
   resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
 
   resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
 
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
   
 
-  clStatus |= clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
-  clStatus |= clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
+  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
 
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -2752,13 +2781,13 @@
 
   local_work_size[0] = 1;
   local_work_size[1] = workgroupSize;
-  clStatus = clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
   status = MagickTrue;
 
 
@@ -2825,25 +2854,25 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
-  cubicCoefficientsBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
+  cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
   queue = AcquireOpenCLCommandQueue(clEnv);
-  mappedCoefficientBuffer = (float*)clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
+  mappedCoefficientBuffer = (float*)clEnv->library->clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
           , 0, NULL, NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
     goto cleanup;
   }
   resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
@@ -2851,10 +2880,10 @@
   {
     mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
   }
-  clStatus = clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -2862,7 +2891,7 @@
   if (filteredImage == NULL)
     goto cleanup;
 
-  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+  if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
     goto cleanup;
@@ -2887,10 +2916,10 @@
 
   /* create a CL buffer from image pixel buffer */
   length = filteredImage->columns * filteredImage->rows;
-  filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
@@ -2900,21 +2929,21 @@
   {
 
     length = resizedColumns*inputImage->rows;
-    tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
+    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
     
-    status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
+    status = resizeHorizontalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte != MagickFalse)?1:0
           , tempImageBuffer, resizedColumns, inputImage->rows
           , resizeFilter, cubicCoefficientsBuffer
           , xFactor, clEnv, queue, exception);
     if (status != MagickTrue)
       goto cleanup;
     
-    status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
+    status = resizeVerticalFilter(tempImageBuffer, resizedColumns, inputImage->rows, (inputImage->matte != MagickFalse)?1:0
        , filteredImageBuffer, resizedColumns, resizedRows
        , resizeFilter, cubicCoefficientsBuffer
        , yFactor, clEnv, queue, exception);
@@ -2924,21 +2953,21 @@
   else
   {
     length = inputImage->columns*resizedRows;
-    tempImageBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
+    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
 
-    status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
+    status = resizeVerticalFilter(inputImageBuffer, inputImage->columns, inputImage->rows, (inputImage->matte != MagickFalse)?1:0
        , tempImageBuffer, inputImage->columns, resizedRows
        , resizeFilter, cubicCoefficientsBuffer
        , yFactor, clEnv, queue, exception);
     if (status != MagickTrue)
       goto cleanup;
 
-    status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->alpha_trait == BlendPixelTrait)?1:0
+    status = resizeHorizontalFilter(tempImageBuffer, inputImage->columns, resizedRows, (inputImage->matte != MagickFalse)?1:0
        , filteredImageBuffer, resizedColumns, resizedRows
        , resizeFilter, cubicCoefficientsBuffer
        , xFactor, clEnv, queue, exception);
@@ -2948,11 +2977,11 @@
   length = resizedColumns*resizedRows;
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -2964,10 +2993,10 @@
 cleanup:
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
-  if (inputImageBuffer!=NULL)		  clReleaseMemObject(inputImageBuffer);
-  if (tempImageBuffer!=NULL)		  clReleaseMemObject(tempImageBuffer);
-  if (filteredImageBuffer!=NULL)	  clReleaseMemObject(filteredImageBuffer);
-  if (cubicCoefficientsBuffer!=NULL)      clReleaseMemObject(cubicCoefficientsBuffer);
+  if (inputImageBuffer!=NULL)		  clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (tempImageBuffer!=NULL)		  clEnv->library->clReleaseMemObject(tempImageBuffer);
+  if (filteredImageBuffer!=NULL)	  clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (cubicCoefficientsBuffer!=NULL)      clEnv->library->clReleaseMemObject(cubicCoefficientsBuffer);
   if (queue != NULL)  	                  RelinquishOpenCLCommandQueue(clEnv, queue);
   if (outputReady == MagickFalse)
   {
@@ -3125,10 +3154,10 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
   
@@ -3140,13 +3169,13 @@
   }
 
   i = 0;
-  clStatus=clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
 
   uSharpen = (sharpen == MagickFalse)?0:1;
-  clStatus|=clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
+  clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -3154,23 +3183,23 @@
   global_work_size[1] = inputImage->rows;
   /* launch the kernel */
   queue = AcquireOpenCLCommandQueue(clEnv);
-  clStatus = clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
 
   if (ALIGNED(inputPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -3182,7 +3211,7 @@
 cleanup:
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
-  if (inputImageBuffer!=NULL)		      clReleaseMemObject(inputImageBuffer);
+  if (inputImageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(inputImageBuffer);
   if (filterKernel!=NULL)                     RelinquishOpenCLKernel(clEnv, filterKernel);
   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
   return outputReady;
@@ -3269,6 +3298,7 @@
 
   Image * inputImage = image;
 
+  inputPixels = NULL;
   inputImageBuffer = NULL;
   modulateKernel = NULL; 
 
@@ -3311,10 +3341,10 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
@@ -3331,14 +3361,14 @@
   color=colorspace;
 
   i = 0;
-  clStatus=clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-  clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
-  clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
-  clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
-  clStatus|=clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
+  clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
+  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
+  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
+  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     printf("no kernel\n");
     goto cleanup;
   }
@@ -3348,24 +3378,24 @@
     global_work_size[0] = inputImage->columns;
     global_work_size[1] = inputImage->rows;
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }
-    clFlush(queue);
+    clEnv->library->clFlush(queue);
   }
 
   if (ALIGNED(inputPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -3384,7 +3414,7 @@
   }
 
   if (inputImageBuffer!=NULL)		      
-    clReleaseMemObject(inputImageBuffer);
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
   if (modulateKernel!=NULL)                     
     RelinquishOpenCLKernel(clEnv, modulateKernel);
   if (queue != NULL)                          
@@ -3448,6 +3478,472 @@
   return status;
 }
 
+MagickBooleanType ComputeNegateImageChannel(Image* image, const ChannelType channel, const MagickBooleanType magick_unused(grayscale), ExceptionInfo* exception)
+{
+  register ssize_t
+    i;
+
+  MagickBooleanType outputReady;
+
+  MagickCLEnv clEnv;
+
+  void *inputPixels;
+
+  MagickSizeType length;
+
+  cl_context context;
+  cl_command_queue queue;
+  cl_kernel negateKernel; 
+
+  cl_mem inputImageBuffer;
+  cl_mem_flags mem_flags;
+
+  cl_int clStatus;
+
+  Image * inputImage = image;
+
+  magick_unreferenced(grayscale);
+
+  inputPixels = NULL;
+  inputImageBuffer = NULL;
+  negateKernel = NULL; 
+
+  assert(inputImage != (Image *) NULL);
+  assert(inputImage->signature == MagickSignature);
+  if (inputImage->debug != MagickFalse)
+    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
+
+  /*
+   * initialize opencl env
+   */
+  clEnv = GetDefaultOpenCLEnv();
+  context = GetOpenCLContext(clEnv);
+  queue = AcquireOpenCLCommandQueue(clEnv);
+
+  outputReady = MagickFalse;
+
+  /* Create and initialize OpenCL buffers.
+   inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
+   assume this  will get a writable image
+   */
+  inputPixels = GetPixelCachePixels(inputImage, &length, exception);
+  if (inputPixels == (void *) NULL)
+  {
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
+    goto cleanup;
+  }
+
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+   then use the host buffer directly from the GPU; otherwise, 
+   create a buffer on the GPU and copy the data over
+   */
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
+  }
+  /* create a CL buffer from image pixel buffer */
+  length = inputImage->columns * inputImage->rows;
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+  negateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Negate");
+  if (negateKernel == NULL)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  i = 0;
+  clStatus=clEnv->library->clSetKernelArg(negateKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus=clEnv->library->clSetKernelArg(negateKernel,i++,sizeof(ChannelType),(void *)&channel);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+    printf("no kernel\n");
+    goto cleanup;
+  }
+
+  {
+    size_t global_work_size[2];
+    global_work_size[0] = inputImage->columns;
+    global_work_size[1] = inputImage->rows;
+    /* launch the kernel */
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, negateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    if (clStatus != CL_SUCCESS)
+    {
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      goto cleanup;
+    }
+    clEnv->library->clFlush(queue);
+  }
+
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+  }
+  else 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+  }
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  outputReady = MagickTrue;
+
+cleanup:
+  OpenCLLogException(__FUNCTION__,__LINE__,exception);
+
+  if (inputPixels) {
+    //ReleasePixelCachePixels();
+    inputPixels = NULL;
+  }
+
+  if (inputImageBuffer!=NULL)		      
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (negateKernel!=NULL)                     
+    RelinquishOpenCLKernel(clEnv, negateKernel);
+  if (queue != NULL)                          
+    RelinquishOpenCLCommandQueue(clEnv, queue);
+
+  return outputReady;
+
+}
+
+
+/*
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%     N e g a t e I m a g e  w i t h  O p e n C L                             %
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%
+%  A description of each parameter follows:
+%
+%    o image: the image.
+%
+%    o channel: the channel.
+%
+%    o grayscale: If MagickTrue, only negate grayscale pixels within the image.
+%
+*/
+
+MagickExport
+MagickBooleanType AccelerateNegateImageChannel(Image* image, const ChannelType channel, const MagickBooleanType grayscale, ExceptionInfo* exception)
+{
+  MagickBooleanType status;
+
+  assert(image != NULL);
+  assert(exception != NULL);
+
+  status = checkOpenCLEnvironment(exception);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  status = checkAccelerateCondition(image, AllChannels);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  status = ComputeNegateImageChannel(image,channel,grayscale,exception);
+
+  return status;
+}
+
+
+MagickBooleanType ComputeGrayscaleImage(Image* image, const PixelIntensityMethod method, ExceptionInfo* exception)
+{
+  register ssize_t
+    i;
+
+  cl_int intensityMethod;
+  cl_int colorspace;
+
+  MagickBooleanType outputReady;
+
+  MagickCLEnv clEnv;
+
+  void *inputPixels;
+
+  MagickSizeType length;
+
+  cl_context context;
+  cl_command_queue queue;
+  cl_kernel grayscaleKernel; 
+
+  cl_mem inputImageBuffer;
+  cl_mem_flags mem_flags;
+
+  cl_int clStatus;
+
+  Image * inputImage = image;
+
+  inputPixels = NULL;
+  inputImageBuffer = NULL;
+  grayscaleKernel = NULL; 
+
+  assert(inputImage != (Image *) NULL);
+  assert(inputImage->signature == MagickSignature);
+  if (inputImage->debug != MagickFalse)
+    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
+
+  /*
+   * initialize opencl env
+   */
+  clEnv = GetDefaultOpenCLEnv();
+  context = GetOpenCLContext(clEnv);
+  queue = AcquireOpenCLCommandQueue(clEnv);
+
+  outputReady = MagickFalse;
+
+  /* Create and initialize OpenCL buffers.
+   inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
+   assume this  will get a writable image
+   */
+  inputPixels = GetPixelCachePixels(inputImage, &length, exception);
+  if (inputPixels == (void *) NULL)
+  {
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
+    goto cleanup;
+  }
+
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+   then use the host buffer directly from the GPU; otherwise, 
+   create a buffer on the GPU and copy the data over
+   */
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
+  }
+  /* create a CL buffer from image pixel buffer */
+  length = inputImage->columns * inputImage->rows;
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+  intensityMethod = method;
+  colorspace = image->colorspace;
+
+  grayscaleKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Grayscale");
+  if (grayscaleKernel == NULL)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  i = 0;
+  clStatus=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&intensityMethod);
+  clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&colorspace);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+    printf("no kernel\n");
+    goto cleanup;
+  }
+
+  {
+    size_t global_work_size[2];
+    global_work_size[0] = inputImage->columns;
+    global_work_size[1] = inputImage->rows;
+    /* launch the kernel */
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, grayscaleKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    if (clStatus != CL_SUCCESS)
+    {
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      goto cleanup;
+    }
+    clEnv->library->clFlush(queue);
+  }
+
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+  }
+  else 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+  }
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  outputReady = MagickTrue;
+
+cleanup:
+  OpenCLLogException(__FUNCTION__,__LINE__,exception);
+
+  if (inputPixels) {
+    //ReleasePixelCachePixels();
+    inputPixels = NULL;
+  }
+
+  if (inputImageBuffer!=NULL)		      
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (grayscaleKernel!=NULL)                     
+    RelinquishOpenCLKernel(clEnv, grayscaleKernel);
+  if (queue != NULL)                          
+    RelinquishOpenCLCommandQueue(clEnv, queue);
+
+  return outputReady;
+
+}
+/*
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%     G r a y s c a l e I m a g e  w i t h  O p e n C L                       %
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%  GrayscaleImage() converts the colors in the reference image to gray.
+%
+%  The format of the GrayscaleImageChannel method is:
+%
+%      MagickBooleanType GrayscaleImage(Image *image,
+%        const PixelIntensityMethod method)
+%
+%  A description of each parameter follows:
+%
+%    o image: the image.
+%
+%    o channel: the channel.
+%
+*/
+
+MagickExport
+MagickBooleanType AccelerateGrayscaleImage(Image* image, const PixelIntensityMethod method, ExceptionInfo* exception)
+{
+  MagickBooleanType status;
+
+  assert(image != NULL);
+  assert(exception != NULL);
+
+  status = checkOpenCLEnvironment(exception);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  status = checkAccelerateCondition(image, AllChannels);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  if (method == Rec601LuminancePixelIntensityMethod || method == Rec709LuminancePixelIntensityMethod)
+    return MagickFalse;
+
+  if (image->colorspace != sRGBColorspace)
+    return MagickFalse;
+
+  status = ComputeGrayscaleImage(image,method,exception);
+
+  return status;
+}
+
+static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
+                                              cl_command_queue queue,
+                                              cl_mem inputImageBuffer,
+                                              cl_mem histogramBuffer,
+                                              Image *inputImage, 
+                                              const ChannelType channel, 
+                                              ExceptionInfo * _exception)
+{
+  ExceptionInfo
+    *exception=_exception;
+
+  register ssize_t
+    i;
+
+  MagickBooleanType outputReady;
+
+  cl_int clStatus;
+
+  size_t global_work_size[2];
+
+  cl_kernel histogramKernel; 
+
+  cl_int method;
+  cl_int colorspace;
+
+  histogramKernel = NULL; 
+
+  outputReady = MagickFalse;
+  method = inputImage->intensity;
+  colorspace = inputImage->colorspace;
+
+  /* get the OpenCL kernel */
+  histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
+  if (histogramKernel == NULL)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  /* set the kernel arguments */
+  i = 0;
+  clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
+  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&method);
+  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
+  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  /* launch the kernel */
+  global_work_size[0] = inputImage->columns;
+  global_work_size[1] = inputImage->rows;
+
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  clEnv->library->clFlush(queue);
+
+  outputReady = MagickTrue;
+
+cleanup:
+  OpenCLLogException(__FUNCTION__,__LINE__,exception);
+ 
+  if (histogramKernel!=NULL)                     
+    RelinquishOpenCLKernel(clEnv, histogramKernel);
+
+  return outputReady;
+}
+
 
 MagickExport MagickBooleanType ComputeEqualizeImage(Image *inputImage, const ChannelType channel, ExceptionInfo * _exception)
 {
@@ -3460,13 +3956,13 @@
     white,
     black,
     intensity,
-    *map;
+    *map=NULL;
 
   cl_uint4
-    *histogram;
+    *histogram=NULL;
 
   PixelPacket
-    *equalize_map;
+    *equalize_map=NULL;
 
   register ssize_t
     i;
@@ -3474,9 +3970,12 @@
   Image * image = inputImage;
 
   MagickBooleanType outputReady;
+
   MagickCLEnv clEnv;
 
   cl_int clStatus;
+  MagickBooleanType status;
+
   size_t global_work_size[2];
 
   void *inputPixels;
@@ -3489,7 +3988,6 @@
   cl_kernel histogramKernel; 
   cl_kernel equalizeKernel; 
   cl_command_queue queue;
-  cl_int colorspace;
 
   void* hostPtr;
 
@@ -3498,6 +3996,7 @@
   inputPixels = NULL;
   inputImageBuffer = NULL;
   histogramBuffer = NULL;
+  equalizeMapBuffer = NULL;
   histogramKernel = NULL; 
   equalizeKernel = NULL; 
   context = NULL;
@@ -3510,6 +4009,13 @@
     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",inputImage->filename);
 
   /*
+   * initialize opencl env
+   */
+  clEnv = GetDefaultOpenCLEnv();
+  context = GetOpenCLContext(clEnv);
+  queue = AcquireOpenCLCommandQueue(clEnv);
+
+  /*
     Allocate and initialize histogram arrays.
   */
   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
@@ -3519,13 +4025,6 @@
   /* reset histogram */
   (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
 
-  /*
-   * initialize opencl env
-   */
-  clEnv = GetDefaultOpenCLEnv();
-  context = GetOpenCLContext(clEnv);
-  queue = AcquireOpenCLCommandQueue(clEnv);
-
   /* Create and initialize OpenCL buffers. */
   /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */
   /* assume this  will get a writable image */
@@ -3549,13 +4048,13 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
-  
+
   /* If the host pointer is aligned to the size of cl_uint, 
      then use the host buffer directly from the GPU; otherwise, 
      create a buffer on the GPU and copy the data over */
@@ -3571,70 +4070,27 @@
   }
   /* create a CL buffer for histogram  */
   length = (MaxMap+1); 
-  histogramBuffer = clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
+  histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
-  switch (inputImage->colorspace)
-  {
-  case RGBColorspace:
-    colorspace = 1;
-    break;
-  case sRGBColorspace:
-    colorspace = 0;
-    break;
-  default:
-    {
-    /* something is wrong, as we checked in checkAccelerateCondition */
-    }
-  }
-
-  /* get the OpenCL kernel */
-  histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
-  if (histogramKernel == NULL)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
+  status = LaunchHistogramKernel(clEnv, queue, inputImageBuffer, histogramBuffer, image, channel, exception);
+  if (status == MagickFalse)
     goto cleanup;
-  }
-
-  /* set the kernel arguments */
-  i = 0;
-  clStatus=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-  clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
-  clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
-  clStatus|=clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
-    goto cleanup;
-  }
-
-  /* launch the kernel */
-  global_work_size[0] = inputImage->columns;
-  global_work_size[1] = inputImage->rows;
-
-  clStatus = clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
-
-  if (clStatus != CL_SUCCESS)
-  {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
-    goto cleanup;
-  }
-  clFlush(queue);
 
   /* read from the kenel output */
   if (ALIGNED(histogram,cl_uint4)) 
   {
     length = (MaxMap+1); 
-    clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = (MaxMap+1); 
-    clStatus = clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -3645,33 +4101,28 @@
   /* unmap, don't block gpu to use this buffer again.  */
   if (ALIGNED(histogram,cl_uint4))
   {
-    clStatus = clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
       goto cleanup;
     }
   }
 
-  if (getenv("TEST")) {
-    unsigned int i;
-    for (i=0; i<(MaxMap+1UL); i++) 
-    {
-      printf("histogram %d: red %d\n", i, histogram[i].s[2]);
-      printf("histogram %d: green %d\n", i, histogram[i].s[1]);
-      printf("histogram %d: blue %d\n", i, histogram[i].s[0]);
-      printf("histogram %d: alpha %d\n", i, histogram[i].s[3]);
-    }
-  }
-
-  /* cpu stuff */
+  /* recreate input buffer later, in case image updated */
+#ifdef RECREATEBUFFER 
+  if (inputImageBuffer!=NULL)		      
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
+#endif
+ 
+  /* CPU stuff */
   equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
   if (equalize_map == (PixelPacket *) NULL)
-      ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
+    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
 
   map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
   if (map == (FloatPixelPacket *) NULL)
-      ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
+    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
 
   /*
     Integrate the histogram to get the equalization map.
@@ -3680,11 +4131,11 @@
   for (i=0; i <= (ssize_t) MaxMap; i++)
   {
     if ((channel & SyncChannels) != 0)
-      {
-        intensity.red+=histogram[i].s[2];
-        map[i]=intensity;
-        continue;
-      }
+    {
+      intensity.red+=histogram[i].s[2];
+      map[i]=intensity;
+      continue;
+    }
     if ((channel & RedChannel) != 0)
       intensity.red+=histogram[i].s[2];
     if ((channel & GreenChannel) != 0)
@@ -3692,13 +4143,14 @@
     if ((channel & BlueChannel) != 0)
       intensity.blue+=histogram[i].s[0];
     if ((channel & OpacityChannel) != 0)
-      intensity.alpha+=histogram[i].s[3];
+      intensity.opacity+=histogram[i].s[3];
+    /*
     if (((channel & IndexChannel) != 0) &&
         (image->colorspace == CMYKColorspace))
     {
-      printf("something here\n");
-      /*intensity.index+=histogram[i].index; */
+      intensity.index+=histogram[i].index; 
     }
+    */
     map[i]=intensity;
   }
   black=map[0];
@@ -3707,72 +4159,69 @@
   for (i=0; i <= (ssize_t) MaxMap; i++)
   {
     if ((channel & SyncChannels) != 0)
-      {
-        if (white.red != black.red)
-          equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
-            (map[i].red-black.red))/(white.red-black.red)));
-        continue;
-      }
+    {
+      if (white.red != black.red)
+        equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
+                (map[i].red-black.red))/(white.red-black.red)));
+      continue;
+    }
     if (((channel & RedChannel) != 0) && (white.red != black.red))
       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
-        (map[i].red-black.red))/(white.red-black.red)));
+              (map[i].red-black.red))/(white.red-black.red)));
     if (((channel & GreenChannel) != 0) && (white.green != black.green))
       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
-        (map[i].green-black.green))/(white.green-black.green)));
+              (map[i].green-black.green))/(white.green-black.green)));
     if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
-        (map[i].blue-black.blue))/(white.blue-black.blue)));
-    if (((channel & OpacityChannel) != 0) && (white.alpha != black.alpha))
-      equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
-        (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
+              (map[i].blue-black.blue))/(white.blue-black.blue)));
+    if (((channel & OpacityChannel) != 0) && (white.opacity != black.opacity))
+      equalize_map[i].opacity=ScaleMapToQuantum((MagickRealType) ((MaxMap*
+              (map[i].opacity-black.opacity))/(white.opacity-black.opacity)));
     /*
     if ((((channel & IndexChannel) != 0) &&
-        (image->colorspace == CMYKColorspace)) &&
+          (image->colorspace == CMYKColorspace)) &&
         (white.index != black.index))
       equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
-        (map[i].index-black.index))/(white.index-black.index)));
+              (map[i].index-black.index))/(white.index-black.index)));
     */
   }
 
-  histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
-  map=(FloatPixelPacket *) RelinquishMagickMemory(map);
-
   if (image->storage_class == PseudoClass)
   {
-      /*
-        Equalize colormap.
-      */
-      for (i=0; i < (ssize_t) image->colors; i++)
+    /*
+       Equalize colormap.
+       */
+    for (i=0; i < (ssize_t) image->colors; i++)
+    {
+      if ((channel & SyncChannels) != 0)
       {
-        if ((channel & SyncChannels) != 0)
-          {
-            if (white.red != black.red)
-              {
-                image->colormap[i].red=equalize_map[
-                  ScaleQuantumToMap(image->colormap[i].red)].red;
-                image->colormap[i].green=equalize_map[
-                  ScaleQuantumToMap(image->colormap[i].green)].red;
-                image->colormap[i].blue=equalize_map[
-                  ScaleQuantumToMap(image->colormap[i].blue)].red;
-                image->colormap[i].alpha=equalize_map[
-                  ScaleQuantumToMap(image->colormap[i].alpha)].red;
-              }
-            continue;
-          }
-        if (((channel & RedChannel) != 0) && (white.red != black.red))
+        if (white.red != black.red)
+        {
           image->colormap[i].red=equalize_map[
             ScaleQuantumToMap(image->colormap[i].red)].red;
-        if (((channel & GreenChannel) != 0) && (white.green != black.green))
           image->colormap[i].green=equalize_map[
-            ScaleQuantumToMap(image->colormap[i].green)].green;
-        if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
+            ScaleQuantumToMap(image->colormap[i].green)].red;
           image->colormap[i].blue=equalize_map[
-            ScaleQuantumToMap(image->colormap[i].blue)].blue;
-        if (((channel & OpacityChannel) != 0) &&
-            (white.alpha != black.alpha))
-          image->colormap[i].alpha=equalize_map[
-            ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
+            ScaleQuantumToMap(image->colormap[i].blue)].red;
+          image->colormap[i].opacity=equalize_map[
+            ScaleQuantumToMap(image->colormap[i].opacity)].red;
+        }
+        continue;
       }
+      if (((channel & RedChannel) != 0) && (white.red != black.red))
+        image->colormap[i].red=equalize_map[
+          ScaleQuantumToMap(image->colormap[i].red)].red;
+      if (((channel & GreenChannel) != 0) && (white.green != black.green))
+        image->colormap[i].green=equalize_map[
+          ScaleQuantumToMap(image->colormap[i].green)].green;
+      if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
+        image->colormap[i].blue=equalize_map[
+          ScaleQuantumToMap(image->colormap[i].blue)].blue;
+      if (((channel & OpacityChannel) != 0) &&
+          (white.opacity != black.opacity))
+        image->colormap[i].opacity=equalize_map[
+          ScaleQuantumToMap(image->colormap[i].opacity)].opacity;
+    }
   }
 
   /*
@@ -3784,9 +4233,7 @@
     equalize_map: uchar4 (PixelPacket)
     black, white: float4 (FloatPixelPacket) */
 
-  if (inputImageBuffer!=NULL)		      
-    clReleaseMemObject(inputImageBuffer);
- 
+#ifdef RECREATEBUFFER 
   /* If the host pointer is aligned to the size of CLPixelPacket, 
      then use the host buffer directly from the GPU; otherwise, 
      create a buffer on the GPU and copy the data over */
@@ -3800,12 +4247,13 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
+#endif
 
   /* Create and initialize OpenCL buffers. */
   if (ALIGNED(equalize_map, PixelPacket)) 
@@ -3820,10 +4268,10 @@
   }
   /* create a CL buffer for eqaulize_map  */
   length = (MaxMap+1); 
-  equalizeMapBuffer = clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
+  equalizeMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
@@ -3837,14 +4285,14 @@
 
   /* set the kernel arguments */
   i = 0;
-  clStatus=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
-  clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
-  clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
-  clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
-  clStatus|=clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
+  clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
+  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
+  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -3852,25 +4300,25 @@
   global_work_size[0] = inputImage->columns;
   global_work_size[1] = inputImage->rows;
 
-  clStatus = clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
 
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
     goto cleanup;
   }
-  clFlush(queue);
+  clEnv->library->clFlush(queue);
 
   /* read the data back */
   if (ALIGNED(inputPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -3879,8 +4327,6 @@
   }
 
   outputReady = MagickTrue;
-  
-  equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
 
 cleanup:
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
@@ -3891,11 +4337,26 @@
   }
 
   if (inputImageBuffer!=NULL)		      
-    clReleaseMemObject(inputImageBuffer);
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
+
+  if (map!=NULL)
+    map=(FloatPixelPacket *) RelinquishMagickMemory(map);
+
+  if (equalizeMapBuffer!=NULL)
+    clEnv->library->clReleaseMemObject(equalizeMapBuffer);
+  if (equalize_map!=NULL)
+    equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
+
   if (histogramBuffer!=NULL)		      
-    clReleaseMemObject(histogramBuffer);
+    clEnv->library->clReleaseMemObject(histogramBuffer);
+  if (histogram!=NULL)
+    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
+
   if (histogramKernel!=NULL)                     
     RelinquishOpenCLKernel(clEnv, histogramKernel);
+  if (equalizeKernel!=NULL)                     
+    RelinquishOpenCLKernel(clEnv, equalizeKernel);
+
   if (queue != NULL)                          
     RelinquishOpenCLCommandQueue(clEnv, queue);
 
@@ -3946,11 +4407,8 @@
   if (status == MagickFalse)
     return MagickFalse;
 
-  /* ensure this is the only pass get in for now. */
-  if ((channel & SyncChannels) == 0)
-    return MagickFalse;
-
-  if (image->colorspace != sRGBColorspace)
+  status = checkHistogramCondition(image, channel);
+  if (status == MagickFalse)
     return MagickFalse;
 
   status = ComputeEqualizeImage(image,channel,exception);
@@ -3958,6 +4416,636 @@
 }
 
 
+
+MagickExport MagickBooleanType ComputeContrastStretchImageChannel(Image *image,
+  const ChannelType channel,const double black_point,const double white_point, 
+  ExceptionInfo * _exception) 
+{
+#define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
+#define ContrastStretchImageTag  "ContrastStretch/Image"
+
+  ExceptionInfo
+    *exception=_exception;
+
+  double
+    intensity;
+
+  FloatPixelPacket
+    black,
+    white;
+
+  cl_uint4
+    *histogram=NULL;
+
+  PixelPacket
+    *stretch_map=NULL;
+
+  register ssize_t
+    i;
+
+  Image * inputImage;
+
+  MagickBooleanType outputReady;
+
+  MagickCLEnv clEnv;
+
+  cl_int clStatus;
+  MagickBooleanType status;
+
+  size_t global_work_size[2];
+
+  void *inputPixels;
+  cl_mem_flags mem_flags;
+
+  cl_context context;
+  cl_mem inputImageBuffer;
+  cl_mem histogramBuffer;
+  cl_mem stretchMapBuffer;
+  cl_kernel histogramKernel; 
+  cl_kernel stretchKernel; 
+  cl_command_queue queue;
+
+  void* hostPtr;
+
+  MagickSizeType length;
+
+  inputImage = image;
+  inputPixels = NULL;
+  inputImageBuffer = NULL;
+  histogramBuffer = NULL;
+  stretchMapBuffer = NULL;
+  histogramKernel = NULL; 
+  stretchKernel = NULL; 
+  context = NULL;
+  queue = NULL;
+  outputReady = MagickFalse;
+
+
+  assert(image != (Image *) NULL);
+  assert(image->signature == MagickSignature);
+  if (image->debug != MagickFalse)
+    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
+
+  //exception=(&image->exception);
+
+  /*
+   * initialize opencl env
+   */
+  clEnv = GetDefaultOpenCLEnv();
+  context = GetOpenCLContext(clEnv);
+  queue = AcquireOpenCLCommandQueue(clEnv);
+
+  /*
+    Allocate and initialize histogram arrays.
+  */
+  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
+
+  if ((histogram == (cl_uint4 *) NULL))
+    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
+ 
+  /* reset histogram */
+  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
+
+  /*
+  if (IsGrayImage(image,exception) != MagickFalse)
+    (void) SetImageColorspace(image,GRAYColorspace);
+  */
+
+  status=MagickTrue;
+
+
+  /*
+    Form histogram.
+  */
+  /* Create and initialize OpenCL buffers. */
+  /* inputPixels = AcquirePixelCachePixels(inputImage, &length, exception); */
+  /* assume this  will get a writable image */
+  inputPixels = GetPixelCachePixels(inputImage, &length, exception);
+
+  if (inputPixels == (void *) NULL)
+  {
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
+    goto cleanup;
+  }
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+     then use the host buffer directly from the GPU; otherwise, 
+     create a buffer on the GPU and copy the data over */
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
+  }
+  /* create a CL buffer from image pixel buffer */
+  length = inputImage->columns * inputImage->rows;
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+  /* If the host pointer is aligned to the size of cl_uint, 
+     then use the host buffer directly from the GPU; otherwise, 
+     create a buffer on the GPU and copy the data over */
+  if (ALIGNED(histogram,cl_uint4)) 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
+    hostPtr = histogram;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
+    hostPtr = histogram;
+  }
+  /* create a CL buffer for histogram  */
+  length = (MaxMap+1); 
+  histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+  status = LaunchHistogramKernel(clEnv, queue, inputImageBuffer, histogramBuffer, image, channel, exception);
+  if (status == MagickFalse)
+    goto cleanup;
+
+  /* read from the kenel output */
+  if (ALIGNED(histogram,cl_uint4)) 
+  {
+    length = (MaxMap+1); 
+    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
+  }
+  else 
+  {
+    length = (MaxMap+1); 
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
+  }
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  /* unmap, don't block gpu to use this buffer again.  */
+  if (ALIGNED(histogram,cl_uint4))
+  {
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
+    if (clStatus != CL_SUCCESS)
+    {
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
+      goto cleanup;
+    }
+  }
+
+  /* recreate input buffer later, in case image updated */
+#ifdef RECREATEBUFFER 
+  if (inputImageBuffer!=NULL)		      
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
+#endif
+
+  /* CPU stuff */
+  /*
+     Find the histogram boundaries by locating the black/white levels.
+  */
+  black.red=0.0;
+  white.red=MaxRange(QuantumRange);
+  if ((channel & RedChannel) != 0)
+  {
+    intensity=0.0;
+    for (i=0; i <= (ssize_t) MaxMap; i++)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > black_point)
+        break;
+    }
+    black.red=(MagickRealType) i;
+    intensity=0.0;
+    for (i=(ssize_t) MaxMap; i != 0; i--)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > ((double) image->columns*image->rows-white_point))
+        break;
+    }
+    white.red=(MagickRealType) i;
+  }
+  black.green=0.0;
+  white.green=MaxRange(QuantumRange);
+  if ((channel & GreenChannel) != 0)
+  {
+    intensity=0.0;
+    for (i=0; i <= (ssize_t) MaxMap; i++)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > black_point)
+        break;
+    }
+    black.green=(MagickRealType) i;
+    intensity=0.0;
+    for (i=(ssize_t) MaxMap; i != 0; i--)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > ((double) image->columns*image->rows-white_point))
+        break;
+    }
+    white.green=(MagickRealType) i;
+  }
+  black.blue=0.0;
+  white.blue=MaxRange(QuantumRange);
+  if ((channel & BlueChannel) != 0)
+  {
+    intensity=0.0;
+    for (i=0; i <= (ssize_t) MaxMap; i++)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > black_point)
+        break;
+    }
+    black.blue=(MagickRealType) i;
+    intensity=0.0;
+    for (i=(ssize_t) MaxMap; i != 0; i--)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > ((double) image->columns*image->rows-white_point))
+        break;
+    }
+    white.blue=(MagickRealType) i;
+  }
+  black.opacity=0.0;
+  white.opacity=MaxRange(QuantumRange);
+  if ((channel & OpacityChannel) != 0)
+  {
+    intensity=0.0;
+    for (i=0; i <= (ssize_t) MaxMap; i++)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > black_point)
+        break;
+    }
+    black.opacity=(MagickRealType) i;
+    intensity=0.0;
+    for (i=(ssize_t) MaxMap; i != 0; i--)
+    {
+      intensity+=histogram[i].s[2];
+      if (intensity > ((double) image->columns*image->rows-white_point))
+        break;
+    }
+    white.opacity=(MagickRealType) i;
+  }
+  /*
+  black.index=0.0;
+  white.index=MaxRange(QuantumRange);
+  if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
+  {
+    intensity=0.0;
+    for (i=0; i <= (ssize_t) MaxMap; i++)
+    {
+      intensity+=histogram[i].index;
+      if (intensity > black_point)
+        break;
+    }
+    black.index=(MagickRealType) i;
+    intensity=0.0;
+    for (i=(ssize_t) MaxMap; i != 0; i--)
+    {
+      intensity+=histogram[i].index;
+      if (intensity > ((double) image->columns*image->rows-white_point))
+        break;
+    }
+    white.index=(MagickRealType) i;
+  }
+  */
+
+
+  stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
+    sizeof(*stretch_map));
+
+  if ((stretch_map == (PixelPacket *) NULL))
+    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
+      image->filename);
+ 
+  /*
+    Stretch the histogram to create the stretched image mapping.
+  */
+  (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
+  for (i=0; i <= (ssize_t) MaxMap; i++)
+  {
+    if ((channel & RedChannel) != 0)
+    {
+      if (i < (ssize_t) black.red)
+        stretch_map[i].red=(Quantum) 0;
+      else
+        if (i > (ssize_t) white.red)
+          stretch_map[i].red=QuantumRange;
+        else
+          if (black.red != white.red)
+            stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
+                  (i-black.red)/(white.red-black.red)));
+    }
+    if ((channel & GreenChannel) != 0)
+    {
+      if (i < (ssize_t) black.green)
+        stretch_map[i].green=0;
+      else
+        if (i > (ssize_t) white.green)
+          stretch_map[i].green=QuantumRange;
+        else
+          if (black.green != white.green)
+            stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
+                  (i-black.green)/(white.green-black.green)));
+    }
+    if ((channel & BlueChannel) != 0)
+    {
+      if (i < (ssize_t) black.blue)
+        stretch_map[i].blue=0;
+      else
+        if (i > (ssize_t) white.blue)
+          stretch_map[i].blue= QuantumRange;
+        else
+          if (black.blue != white.blue)
+            stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
+                  (i-black.blue)/(white.blue-black.blue)));
+    }
+    if ((channel & OpacityChannel) != 0)
+    {
+      if (i < (ssize_t) black.opacity)
+        stretch_map[i].opacity=0;
+      else
+        if (i > (ssize_t) white.opacity)
+          stretch_map[i].opacity=QuantumRange;
+        else
+          if (black.opacity != white.opacity)
+            stretch_map[i].opacity=ScaleMapToQuantum((MagickRealType) (MaxMap*
+                  (i-black.opacity)/(white.opacity-black.opacity)));
+    }
+    /*
+    if (((channel & IndexChannel) != 0) &&
+        (image->colorspace == CMYKColorspace))
+    {
+      if (i < (ssize_t) black.index)
+        stretch_map[i].index=0;
+      else
+        if (i > (ssize_t) white.index)
+          stretch_map[i].index=QuantumRange;
+        else
+          if (black.index != white.index)
+            stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
+                  (i-black.index)/(white.index-black.index)));
+    }
+    */
+  }
+
+  /*
+    Stretch the image.
+  */
+  if (((channel & OpacityChannel) != 0) || (((channel & IndexChannel) != 0) &&
+      (image->colorspace == CMYKColorspace)))
+    image->storage_class=DirectClass;
+  if (image->storage_class == PseudoClass)
+  {
+    /*
+       Stretch colormap.
+       */
+    for (i=0; i < (ssize_t) image->colors; i++)
+    {
+      if ((channel & RedChannel) != 0)
+      {
+        if (black.red != white.red)
+          image->colormap[i].red=stretch_map[
+            ScaleQuantumToMap(image->colormap[i].red)].red;
+      }
+      if ((channel & GreenChannel) != 0)
+      {
+        if (black.green != white.green)
+          image->colormap[i].green=stretch_map[
+            ScaleQuantumToMap(image->colormap[i].green)].green;
+      }
+      if ((channel & BlueChannel) != 0)
+      {
+        if (black.blue != white.blue)
+          image->colormap[i].blue=stretch_map[
+            ScaleQuantumToMap(image->colormap[i].blue)].blue;
+      }
+      if ((channel & OpacityChannel) != 0)
+      {
+        if (black.opacity != white.opacity)
+          image->colormap[i].opacity=stretch_map[
+            ScaleQuantumToMap(image->colormap[i].opacity)].opacity;
+      }
+    }
+  }
+
+  /*
+    Stretch image.
+  */
+
+
+  /* GPU can work on this again, image and equalize map as input
+    image:        uchar4 (CLPixelPacket)
+    stretch_map:  uchar4 (PixelPacket)
+    black, white: float4 (FloatPixelPacket) */
+
+#ifdef RECREATEBUFFER 
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+     then use the host buffer directly from the GPU; otherwise, 
+     create a buffer on the GPU and copy the data over */
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
+  }
+  /* create a CL buffer from image pixel buffer */
+  length = inputImage->columns * inputImage->rows;
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+#endif
+
+  /* Create and initialize OpenCL buffers. */
+  if (ALIGNED(stretch_map, PixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
+    hostPtr = stretch_map;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
+    hostPtr = stretch_map;
+  }
+  /* create a CL buffer for stretch_map  */
+  length = (MaxMap+1); 
+  stretchMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+  /* get the OpenCL kernel */
+  stretchKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Stretch");
+  if (stretchKernel == NULL)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  /* set the kernel arguments */
+  i = 0;
+  clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&channel);
+  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
+  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  /* launch the kernel */
+  global_work_size[0] = inputImage->columns;
+  global_work_size[1] = inputImage->rows;
+
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  clEnv->library->clFlush(queue);
+
+  /* read the data back */
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+  }
+  else 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+  }
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  outputReady = MagickTrue;
+
+cleanup:
+  OpenCLLogException(__FUNCTION__,__LINE__,exception);
+
+  if (inputPixels) {
+    /*ReleasePixelCachePixels();*/
+    inputPixels = NULL;
+  }
+
+  if (inputImageBuffer!=NULL)		      
+    clEnv->library->clReleaseMemObject(inputImageBuffer);
+
+  if (stretchMapBuffer!=NULL)
+    clEnv->library->clReleaseMemObject(stretchMapBuffer);
+  if (stretch_map!=NULL)
+    stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
+
+
+  if (histogramBuffer!=NULL)
+    clEnv->library->clReleaseMemObject(histogramBuffer);
+  if (histogram!=NULL)
+    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
+
+
+  if (histogramKernel!=NULL)                     
+    RelinquishOpenCLKernel(clEnv, histogramKernel);
+  if (stretchKernel!=NULL)                     
+    RelinquishOpenCLKernel(clEnv, stretchKernel);
+
+  if (queue != NULL)                          
+    RelinquishOpenCLCommandQueue(clEnv, queue);
+
+  return outputReady;
+}
+
+
+/*
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%     C o n t r a s t S t r e t c h I m a g e  w i t h  O p e n C L           %
+%                                                                             %
+%                                                                             %
+%                                                                             %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+%  ContrastStretchImage() is a simple image enhancement technique that attempts
+%  to improve the contrast in an image by `stretching' the range of intensity
+%  values it contains to span a desired range of values. It differs from the
+%  more sophisticated histogram equalization in that it can only apply a
+%  linear scaling function to the image pixel values.  As a result the
+%  `enhancement' is less harsh.
+%
+%  The format of the ContrastStretchImage method is:
+%
+%      MagickBooleanType ContrastStretchImage(Image *image,
+%        const char *levels)
+%      MagickBooleanType ContrastStretchImageChannel(Image *image,
+%        const size_t channel,const double black_point,
+%        const double white_point)
+%
+%  A description of each parameter follows:
+%
+%    o image: the image.
+%
+%    o channel: the channel.
+%
+%    o black_point: the black point.
+%
+%    o white_point: the white point.
+%
+%    o levels: Specify the levels where the black and white points have the
+%      range of 0 to number-of-pixels (e.g. 1%, 10x90%, etc.).
+%
+*/
+
+MagickExport MagickBooleanType AccelerateContrastStretchImageChannel(
+    Image * image, const ChannelType channel, const double black_point, const double white_point, 
+    ExceptionInfo* exception)
+{
+   MagickBooleanType status;
+
+  assert(image != NULL);
+  assert(exception != NULL);
+
+  status = checkOpenCLEnvironment(exception);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  status = checkAccelerateCondition(image, channel);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  status = checkHistogramCondition(image, channel);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  status = ComputeContrastStretchImageChannel(image,channel, black_point, white_point, exception);
+
+  return status;
+}
+
+
 static Image* ComputeDespeckleImage(const Image* inputImage, ExceptionInfo* exception)
 {
 
@@ -4012,10 +5100,10 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
@@ -4023,17 +5111,17 @@
   length = inputImage->columns * inputImage->rows;
   for (k = 0; k < 2; k++)
   {
-    tempImageBuffer[k] = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
+    tempImageBuffer[k] = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
   }
 
   filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
   assert(filteredImage != NULL);
-  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+  if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
     goto cleanup;
@@ -4057,41 +5145,41 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
   hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
   hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
 
-  clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer);
-  clStatus |=clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
+  clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
   imageWidth = inputImage->columns;
-  clStatus |=clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
+  clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
   imageHeight = inputImage->rows;
-  clStatus |=clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
+  clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
   matte = (inputImage->matte==MagickFalse)?0:1;
-  clStatus |=clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
+  clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
-  clStatus = clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
-  clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
+  clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
+  clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
   imageWidth = inputImage->columns;
-  clStatus |=clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
+  clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
   imageHeight = inputImage->rows;
-  clStatus |=clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
-  matte = (inputImage->alpha_trait == BlendPixelTrait)?1:0;
-  clStatus |=clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
+  clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
+  matte = (inputImage->matte==MagickFalse)?0:1;
+  clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
     goto cleanup;
   }
 
@@ -4109,115 +5197,115 @@
     offset.s[0] = X[k];
     offset.s[1] = Y[k];
     polarity = 1;
-    clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
-    clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
+    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
       goto cleanup;
     }
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
 
 
     if (k == 0)
-      clStatus =clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
+      clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
     offset.s[0] = -X[k];
     offset.s[1] = -Y[k];
     polarity = 1;
-    clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
-    clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
+    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
       goto cleanup;
     }
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
 
     offset.s[0] = -X[k];
     offset.s[1] = -Y[k];
     polarity = -1;
-    clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
-    clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
+    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
       goto cleanup;
     }
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
 
     offset.s[0] = X[k];
     offset.s[1] = Y[k];
     polarity = -1;
-    clStatus = clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|= clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
-    clStatus|=clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
-    clStatus|=clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
+    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
+    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
 
     if (k == 3)
-      clStatus |=clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
+      clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
 
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clSetKernelArg failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
       goto cleanup;
     }
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
     /* launch the kernel */
-    clStatus = clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueNDRangeKernel failed.", "'%s'", ".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
       goto cleanup;
     }  
   }
@@ -4225,12 +5313,12 @@
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -4244,12 +5332,12 @@
   OpenCLLogException(__FUNCTION__,__LINE__,exception);
 
   if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
-  if (inputImageBuffer!=NULL)		      clReleaseMemObject(inputImageBuffer);
+  if (inputImageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(inputImageBuffer);
   for (k = 0; k < 2; k++)
   {
-    if (tempImageBuffer[k]!=NULL)	      clReleaseMemObject(tempImageBuffer[k]);
+    if (tempImageBuffer[k]!=NULL)	      clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
   }
-  if (filteredImageBuffer!=NULL)	      clReleaseMemObject(filteredImageBuffer);
+  if (filteredImageBuffer!=NULL)	      clEnv->library->clReleaseMemObject(filteredImageBuffer);
   if (hullPass1!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass1);
   if (hullPass2!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass2);
   if (outputReady == MagickFalse)
@@ -4375,17 +5463,17 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
 
   filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
   assert(filteredImage != NULL);
-  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+  if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
     goto cleanup;
@@ -4409,10 +5497,10 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
@@ -4449,7 +5537,7 @@
   numRowsPerKernelLaunch = 512;
   /* create a buffer for random numbers */
   numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
-  randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float)
+  randomNumberBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, numRandomNumberPerBuffer*sizeof(float)
                                       , NULL, &clStatus);
 
 
@@ -4466,31 +5554,31 @@
   addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
 
   k = 0;
-  clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   inputColumns = inputImage->columns;
-  clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
   inputRows = inputImage->rows;
-  clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
   attenuate=1.0f;
   option=GetImageArtifact(inputImage,"attenuate");
   if (option != (char *) NULL)
     attenuate=(float)StringToDouble(option,(char **) NULL);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
 
   global_work_size[0] = inputColumns;
   for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch) 
   {
     /* Generate random numbers in the buffer */
-    randomNumberBufferPtr = (float*)clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0
+    randomNumberBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, randomNumberBuffer, CL_TRUE, CL_MAP_WRITE, 0
       , numRandomNumberPerBuffer*sizeof(float), 0, NULL, NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
       goto cleanup;
     }
 
@@ -4504,28 +5592,28 @@
       randomNumberBufferPtr[i] = (float)GetPseudoRandomValue(random_info[id]);
     }
 
-    clStatus = clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, randomNumberBuffer, randomNumberBufferPtr, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.",".");
       goto cleanup;
     }
 
     /* set the row offset */
-    clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
+    clEnv->library->clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
     global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
-    clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
+    clEnv->library->clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
   }
 
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -4540,9 +5628,9 @@
 
   if (queue!=NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
   if (addNoiseKernel!=NULL)         RelinquishOpenCLKernel(clEnv, addNoiseKernel);
-  if (inputImageBuffer!=NULL)		    clReleaseMemObject(inputImageBuffer);
-  if (randomNumberBuffer!=NULL)     clReleaseMemObject(randomNumberBuffer);
-  if (filteredImageBuffer!=NULL)	  clReleaseMemObject(filteredImageBuffer);
+  if (inputImageBuffer!=NULL)		    clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (randomNumberBuffer!=NULL)     clEnv->library->clReleaseMemObject(randomNumberBuffer);
+  if (filteredImageBuffer!=NULL)	  clEnv->library->clReleaseMemObject(filteredImageBuffer);
   if (outputReady == MagickFalse
       && filteredImage != NULL) 
   {
@@ -4614,17 +5702,17 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  inputImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
 
   filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,MagickTrue,exception);
   assert(filteredImage != NULL);
-  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
+  if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
   {
     (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
     goto cleanup;
@@ -4648,10 +5736,10 @@
   }
   /* create a CL buffer from image pixel buffer */
   length = inputImage->columns * inputImage->rows;
-  filteredImageBuffer = clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
-    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
     goto cleanup;
   }
 
@@ -4689,25 +5777,25 @@
 
   /* create a buffer for random numbers */
   numRandomNumberPerBuffer = (inputImage->columns*numRowsPerKernelLaunch)*numRandomNumberPerPixel;
-  randomNumberBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float)
+  randomNumberBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, numRandomNumberPerBuffer*sizeof(float)
     , NULL, &clStatus);
 
   {
     /* setup the random number generators */
     unsigned long* seeds;
     numRandomNumberGenerators = 512;
-    randomNumberSeedsBuffer = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE
+    randomNumberSeedsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_WRITE
                                             , numRandomNumberGenerators * 4 * sizeof(unsigned long), NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clCreateBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
       goto cleanup;
     }
-    seeds = (unsigned long*) clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0
+    seeds = (unsigned long*) clEnv->library->clEnqueueMapBuffer(queue, randomNumberSeedsBuffer, CL_TRUE, CL_MAP_WRITE, 0
                                                 , numRandomNumberGenerators*4*sizeof(unsigned long), 0, NULL, NULL, &clStatus);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueMapBuffer failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
       goto cleanup;
     }
 
@@ -4722,10 +5810,10 @@
       randomInfo = DestroyRandomInfo(randomInfo);
     }
 
-    clStatus = clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, randomNumberSeedsBuffer, seeds, 0, NULL, NULL);
     if (clStatus != CL_SUCCESS)
     {
-      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueueUnmapMemObject failed.",".");
+      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.",".");
       goto cleanup;
     }
 
@@ -4733,63 +5821,63 @@
                                                         ,"randomNumberGeneratorKernel");
     
     k = 0;
-    clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer);
-    clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize);
-    clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
+    clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberSeedsBuffer);
+    clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(float),(void *)&fNormalize);
+    clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
     initRandom = 1;
-    clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom);
-    clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer);
+    clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&initRandom);
+    clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerBuffer);
 
     random_work_size = numRandomNumberGenerators;
   }
 
   addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoiseImage");
   k = 0;
-  clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&inputImageBuffer);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
   inputColumns = inputImage->columns;
-  clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputColumns);
   inputRows = inputImage->rows;
-  clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&inputRows);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
   attenuate=1.0f;
   option=GetImageArtifact(inputImage,"attenuate");
   if (option != (char *) NULL)
     attenuate=(float)StringToDouble(option,(char **) NULL);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
-  clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&randomNumberBuffer);
+  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
 
   global_work_size[0] = inputColumns;
   for (r = 0; r < inputRows; r+=numRowsPerKernelLaunch) 
   {
     size_t generator_local_size = 64;
     /* Generate random numbers in the buffer */
-    clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL
+    clEnv->library->clEnqueueNDRangeKernel(queue,randomNumberGeneratorKernel,1,NULL
                             ,&random_work_size,&generator_local_size,0,NULL,NULL);
     if (initRandom != 0)
     {
       /* make sure we only do init once */
       initRandom = 0;
-      clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom);
+      clEnv->library->clSetKernelArg(randomNumberGeneratorKernel,3,sizeof(unsigned int),(void *)&initRandom);
     }
 
     /* set the row offset */
-    clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
+    clEnv->library->clSetKernelArg(addNoiseKernel,k,sizeof(unsigned int),(void *)&r);
     global_work_size[1] = MAGICK_MIN(numRowsPerKernelLaunch, inputRows - r);
-    clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
+    clEnv->library->clEnqueueNDRangeKernel(queue,addNoiseKernel,2,NULL,global_work_size,NULL,0,NULL,NULL);
   }
 
   if (ALIGNED(filteredPixels,CLPixelPacket)) 
   {
     length = inputImage->columns * inputImage->rows;
-    clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
   }
   else 
   {
     length = inputImage->columns * inputImage->rows;
-    clStatus = clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
   }
   if (clStatus != CL_SUCCESS)
   {
@@ -4805,10 +5893,10 @@
   if (queue!=NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
   if (addNoiseKernel!=NULL)         RelinquishOpenCLKernel(clEnv, addNoiseKernel);
   if (randomNumberGeneratorKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomNumberGeneratorKernel);
-  if (inputImageBuffer!=NULL)		    clReleaseMemObject(inputImageBuffer);
-  if (randomNumberBuffer!=NULL)     clReleaseMemObject(randomNumberBuffer);
-  if (filteredImageBuffer!=NULL)	  clReleaseMemObject(filteredImageBuffer);
-  if (randomNumberSeedsBuffer!=NULL) clReleaseMemObject(randomNumberSeedsBuffer);
+  if (inputImageBuffer!=NULL)		    clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (randomNumberBuffer!=NULL)     clEnv->library->clReleaseMemObject(randomNumberBuffer);
+  if (filteredImageBuffer!=NULL)	  clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (randomNumberSeedsBuffer!=NULL) clEnv->library->clReleaseMemObject(randomNumberSeedsBuffer);
   if (outputReady == MagickFalse
       && filteredImage != NULL) 
   {
@@ -4848,6 +5936,725 @@
   return filteredImage;
 }
 
+static MagickBooleanType LaunchRandomImageKernel(MagickCLEnv clEnv,
+                                              cl_command_queue queue,
+                                              cl_mem inputImageBuffer,
+                                              const unsigned int imageColumns,
+                                              const unsigned int imageRows,
+                                              cl_mem seedBuffer,
+                                              const unsigned int numGenerators,
+                                              ExceptionInfo *exception)
+{
+  MagickBooleanType status = MagickFalse;
+  size_t global_work_size;
+  size_t local_work_size;
+  int k;
+
+  cl_int clStatus;
+  cl_kernel randomImageKernel = NULL;
+
+  randomImageKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RandomImage");
+
+  k = 0;
+  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&inputImageBuffer);
+  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageColumns);
+  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageRows);
+  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&seedBuffer);
+  {
+    const float randNormNumerator = 1.0f;
+    const unsigned int randNormDenominator = (unsigned int)(~0UL);
+    clEnv->library->clSetKernelArg(randomImageKernel,k++,
+          sizeof(float),(void*)&randNormNumerator);
+    clEnv->library->clSetKernelArg(randomImageKernel,k++,
+          sizeof(cl_uint),(void*)&randNormDenominator);
+  }
+
+
+  global_work_size = numGenerators;
+  local_work_size = 64;
+
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue,randomImageKernel,1,NULL,&global_work_size,
+                                    &local_work_size,0,NULL,NULL);
+
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, 
+                                      "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  status = MagickTrue;
+
+cleanup:
+  if (randomImageKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomImageKernel);
+  return status;
+}
+
+static MagickBooleanType ComputeRandomImage(Image* inputImage, 
+                                            ExceptionInfo* exception)
+{
+  MagickBooleanType status = MagickFalse;
+
+  MagickBooleanType outputReady = MagickFalse;
+  MagickCLEnv clEnv = NULL;
+
+  cl_int clStatus;
+  
+  void *inputPixels = NULL;
+  MagickSizeType length;
+
+  cl_mem_flags mem_flags;
+  cl_context context = NULL;
+  cl_mem inputImageBuffer = NULL;
+  cl_command_queue queue = NULL;
+
+  /* Don't release this buffer in this function !!! */
+  cl_mem randomNumberSeedsBuffer;
+
+  clEnv = GetDefaultOpenCLEnv();
+  context = GetOpenCLContext(clEnv);
+
+  /* Create and initialize OpenCL buffers. */
+  inputPixels = GetPixelCachePixels(inputImage, &length, exception);
+  if (inputPixels == (void *) NULL)
+  {
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",inputImage->filename);
+    goto cleanup;
+  }
+
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+     then use the host buffer directly from the GPU; otherwise, 
+     create a buffer on the GPU and copy the data over */
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
+  }
+  /* create a CL buffer from image pixel buffer */
+  length = inputImage->columns * inputImage->rows;
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+ 
+  queue = AcquireOpenCLCommandQueue(clEnv);
+
+  randomNumberSeedsBuffer = GetAndLockRandSeedBuffer(clEnv);
+  if (randomNumberSeedsBuffer==NULL)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), 
+           ResourceLimitWarning, "Failed to get GPU random number generators.",
+           "'%s'", ".");
+    goto cleanup;
+  }
+
+  status = LaunchRandomImageKernel(clEnv,queue,
+                                   inputImageBuffer,
+                                   inputImage->columns,
+                                   inputImage->rows,
+                                   randomNumberSeedsBuffer,
+                                   GetNumRandGenerators(clEnv),
+                                   exception);
+  if (status==MagickFalse)
+  {
+    goto cleanup;
+  }
+
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
+  }
+  else 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+  }
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  outputReady = MagickTrue;
+
+cleanup:
+  OpenCLLogException(__FUNCTION__,__LINE__,exception);
+
+  UnlockRandSeedBuffer(clEnv);
+  if (inputImageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (queue != NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
+  return outputReady;
+}
+
+MagickExport MagickBooleanType AccelerateRandomImage(Image* image, ExceptionInfo* exception)
+{
+  MagickBooleanType status = MagickFalse;
+
+  status = checkOpenCLEnvironment(exception);
+  if (status==MagickFalse)
+    return status;
+
+  status = checkAccelerateCondition(image, AllChannels);
+  if (status==MagickFalse)
+    return status;
+
+  status = ComputeRandomImage(image,exception);
+  return status;
+}
+
+static Image* ComputeMotionBlurImage(const Image *inputImage, 
+  const ChannelType channel, const double *kernel, const size_t width, 
+  const OffsetInfo *offset, ExceptionInfo *exception)
+{
+  MagickBooleanType outputReady;
+  Image* filteredImage;
+  MagickCLEnv clEnv;
+
+  cl_int clStatus;
+  size_t global_work_size[2];
+  size_t local_work_size[2];
+
+  cl_context context;
+  cl_mem_flags mem_flags;
+  cl_mem inputImageBuffer, filteredImageBuffer, imageKernelBuffer, 
+    offsetBuffer;
+  cl_kernel motionBlurKernel;
+  cl_command_queue queue;
+
+  const void *inputPixels;
+  void *filteredPixels;
+  void* hostPtr;
+  float* kernelBufferPtr;
+  int* offsetBufferPtr;
+  MagickSizeType length;
+  unsigned int matte;
+  MagickPixelPacket bias;
+  cl_float4 biasPixel;
+  unsigned int imageWidth, imageHeight;
+
+  unsigned int i;
+
+  outputReady = MagickFalse;
+  context = NULL;
+  filteredImage = NULL;
+  inputImageBuffer = NULL;
+  filteredImageBuffer = NULL;
+  imageKernelBuffer = NULL;
+  motionBlurKernel = NULL;
+  queue = NULL;
+
+
+  clEnv = GetDefaultOpenCLEnv();
+  context = GetOpenCLContext(clEnv);
+
+  /* Create and initialize OpenCL buffers. */
+
+  inputPixels = NULL;
+  inputPixels = AcquirePixelCachePixels(inputImage, &length, exception);
+  if (inputPixels == (const void *) NULL)
+  {
+    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
+      "UnableToReadPixelCache.","`%s'",inputImage->filename);
+    goto cleanup;
+  }
+
+  // If the host pointer is aligned to the size of CLPixelPacket, 
+  // then use the host buffer directly from the GPU; otherwise, 
+  // create a buffer on the GPU and copy the data over
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
+  }
+  // create a CL buffer from image pixel buffer
+  length = inputImage->columns * inputImage->rows;
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
+    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(),
+      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+
+  filteredImage = CloneImage(inputImage,inputImage->columns,inputImage->rows,
+    MagickTrue,exception);
+  assert(filteredImage != NULL);
+  if (SetImageStorageClass(filteredImage,DirectClass) != MagickTrue)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitError, "CloneImage failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  filteredPixels = GetPixelCachePixels(filteredImage, &length, exception);
+  if (filteredPixels == (void *) NULL)
+  {
+    (void) ThrowMagickException(exception,GetMagickModule(),CacheError, 
+      "UnableToReadPixelCache.","`%s'",filteredImage->filename);
+    goto cleanup;
+  }
+
+  if (ALIGNED(filteredPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
+    hostPtr = filteredPixels;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_WRITE_ONLY;
+    hostPtr = NULL;
+  }
+  // create a CL buffer from image pixel buffer
+  length = inputImage->columns * inputImage->rows;
+  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
+    length * sizeof(CLPixelPacket), hostPtr, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+
+  imageKernelBuffer = clEnv->library->clCreateBuffer(context, 
+    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
+    &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+  queue = AcquireOpenCLCommandQueue(clEnv);
+  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, 
+    CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
+    goto cleanup;
+  }
+  for (i = 0; i < width; i++)
+  {
+    kernelBufferPtr[i] = (float) kernel[i];
+  }
+  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
+    0, NULL, NULL);
+ if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, 
+      "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  offsetBuffer = clEnv->library->clCreateBuffer(context, 
+    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
+    &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+  offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE, 
+    CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
+    goto cleanup;
+  }
+  for (i = 0; i < width; i++)
+  {
+    offsetBufferPtr[2*i] = (int)offset[i].x;
+    offsetBufferPtr[2*i+1] = (int)offset[i].y;
+  }
+  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0, 
+    NULL, NULL);
+ if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
+      "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+
+ // get the OpenCL kernel
+  motionBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, 
+    "MotionBlur");
+  if (motionBlurKernel == NULL)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
+      "AcquireOpenCLKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  
+  // set the kernel arguments
+  i = 0;
+  clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
+    (void *)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
+    (void *)&filteredImageBuffer);
+  imageWidth = inputImage->columns;
+  imageHeight = inputImage->rows;
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
+    &imageWidth);
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
+    &imageHeight);
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
+    (void *)&imageKernelBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
+    &width);
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
+    (void *)&offsetBuffer);
+
+  GetMagickPixelPacket(inputImage,&bias);
+  biasPixel.s[0] = bias.red;
+  biasPixel.s[1] = bias.green;
+  biasPixel.s[2] = bias.blue;
+  biasPixel.s[3] = bias.opacity;
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
+
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &channel);
+  matte = (inputImage->matte == MagickTrue)?1:0;
+  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
+      "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
+    goto cleanup;
+  }
+
+  // launch the kernel
+  local_work_size[0] = 16;
+  local_work_size[1] = 16;
+  global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
+                                inputImage->columns,local_work_size[0]);
+  global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
+                                inputImage->rows,local_work_size[1]);
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL, 
+    global_work_size, local_work_size, 0, NULL, NULL);
+
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
+      "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  clEnv->library->clFlush(queue);
+
+  if (ALIGNED(filteredPixels,CLPixelPacket)) 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, 
+      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, 
+      NULL, &clStatus);
+  }
+  else 
+  {
+    length = inputImage->columns * inputImage->rows;
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, 
+      length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
+  }
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
+      "Reading output image from CL buffer failed.", "'%s'", ".");
+    goto cleanup;
+  }
+  outputReady = MagickTrue;
+
+cleanup:
+
+  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
+  if (inputImageBuffer!=NULL)     clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
+  if (motionBlurKernel!=NULL)  RelinquishOpenCLKernel(clEnv, motionBlurKernel);
+  if (queue != NULL)           RelinquishOpenCLCommandQueue(clEnv, queue);
+  if (outputReady == MagickFalse)
+  {
+    if (filteredImage != NULL)
+    {
+      DestroyImage(filteredImage);
+      filteredImage = NULL;
+    }
+  }
+
+  return filteredImage;
+}
+
+
+MagickExport
+Image* AccelerateMotionBlurImage(const Image *image, const ChannelType channel,
+  const double* kernel, const size_t width, const OffsetInfo *offset, 
+  ExceptionInfo *exception)
+{
+  MagickBooleanType status;
+  Image* filteredImage = NULL;
+
+  assert(image != NULL);
+  assert(kernel != (double *) NULL);
+  assert(offset != (OffsetInfo *) NULL);
+  assert(exception != (ExceptionInfo *) NULL);
+
+  status = checkOpenCLEnvironment(exception);
+  if (status == MagickFalse)
+    return NULL;
+
+  status = checkAccelerateCondition(image, channel);
+  if (status == MagickFalse)
+    return NULL;
+
+  filteredImage = ComputeMotionBlurImage(image, channel, kernel, width,
+    offset, exception);
+  return filteredImage;
+
+}
+
+
+static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv,
+    cl_command_queue queue,
+  cl_mem inputImageBuffer, 
+  const unsigned int inputWidth, const unsigned int inputHeight,
+  const unsigned int matte,
+  const ChannelType channel,const CompositeOperator compose,
+  const cl_mem compositeImageBuffer,
+  const unsigned int compositeWidth, 
+  const unsigned int compositeHeight,
+  const float destination_dissolve,const float source_dissolve,
+  ExceptionInfo *magick_unused(exception))
+{
+  size_t global_work_size[2];
+  size_t local_work_size[2];
+  unsigned int composeOp;
+  int k;
+  
+  cl_int clStatus;
+  cl_kernel compositeKernel = NULL;
+
+  magick_unreferenced(exception);
+
+  compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
+    "Composite");
+
+  k = 0;
+  clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&inputImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight);
+  composeOp = (unsigned int)compose;
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve);
+  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve);
+
+  if (clStatus!=CL_SUCCESS)
+    return MagickFalse;
+
+  local_work_size[0] = 64;
+  local_work_size[1] = 1;
+
+  global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth,
+    local_work_size[0]);
+  global_work_size[1] = inputHeight;
+  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL, 
+    global_work_size, local_work_size, 0, NULL, NULL);
+
+
+  RelinquishOpenCLKernel(clEnv, compositeKernel);
+
+  return (clStatus==CL_SUCCESS)?MagickTrue:MagickFalse;
+}
+
+
+static MagickBooleanType ComputeCompositeImage(Image *inputImage,
+  const ChannelType channel,const CompositeOperator compose,
+  const Image *compositeImage,const ssize_t magick_unused(x_offset),const ssize_t magick_unused(y_offset),
+  const float destination_dissolve,const float source_dissolve,
+  ExceptionInfo *exception)
+{
+  MagickBooleanType status = MagickFalse;
+
+  MagickBooleanType outputReady = MagickFalse;
+  MagickCLEnv clEnv = NULL;
+
+  cl_int clStatus;
+  
+  void *inputPixels = NULL;
+  const void *composePixels = NULL;
+  MagickSizeType length;
+
+  cl_mem_flags mem_flags;
+  cl_context context = NULL;
+  cl_mem inputImageBuffer = NULL;
+  cl_mem compositeImageBuffer = NULL;
+  cl_command_queue queue = NULL;
+
+  magick_unreferenced(x_offset);
+  magick_unreferenced(y_offset);
+
+  clEnv = GetDefaultOpenCLEnv();
+  context = GetOpenCLContext(clEnv);
+  queue = AcquireOpenCLCommandQueue(clEnv);
+
+  /* Create and initialize OpenCL buffers. */
+  inputPixels = GetPixelCachePixels(inputImage, &length, exception);
+  if (inputPixels == (void *) NULL)
+  {
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
+      "UnableToReadPixelCache.","`%s'",inputImage->filename);
+    goto cleanup;
+  }
+
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+     then use the host buffer directly from the GPU; otherwise, 
+     create a buffer on the GPU and copy the data over */
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
+  }
+  /* create a CL buffer from image pixel buffer */
+  length = inputImage->columns * inputImage->rows;
+  inputImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
+    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+
+
+  /* Create and initialize OpenCL buffers. */
+  composePixels = AcquirePixelCachePixels(compositeImage, &length, exception); 
+  if (composePixels == (void *) NULL)
+  {
+    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
+      "UnableToReadPixelCache.","`%s'",compositeImage->filename);
+    goto cleanup;
+  }
+
+  /* If the host pointer is aligned to the size of CLPixelPacket, 
+     then use the host buffer directly from the GPU; otherwise, 
+     create a buffer on the GPU and copy the data over */
+  if (ALIGNED(composePixels,CLPixelPacket)) 
+  {
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
+  }
+  else 
+  {
+    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
+  }
+  /* create a CL buffer from image pixel buffer */
+  length = compositeImage->columns * compositeImage->rows;
+  compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, 
+    length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus);
+  if (clStatus != CL_SUCCESS)
+  {
+    (void) OpenCLThrowMagickException(exception, GetMagickModule(), 
+      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
+    goto cleanup;
+  }
+  
+  status = LaunchCompositeKernel(clEnv,queue,inputImageBuffer,
+           (unsigned int) inputImage->columns,
+           (unsigned int) inputImage->rows,
+           (unsigned int) inputImage->matte,
+           channel, compose, compositeImageBuffer,
+           (unsigned int) compositeImage->columns,
+           (unsigned int) compositeImage->rows,
+           destination_dissolve,source_dissolve,
+           exception);
+
+  if (status==MagickFalse)
+    goto cleanup;
+
+  length = inputImage->columns * inputImage->rows;
+  if (ALIGNED(inputPixels,CLPixelPacket)) 
+  {
+    clEnv->library->clEnqueueMapBuffer(queue, inputImageBuffer, CL_TRUE, 
+      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, 
+      NULL, &clStatus);
+  }
+  else
+  {
+    clStatus = clEnv->library->clEnqueueReadBuffer(queue, inputImageBuffer, CL_TRUE, 0, 
+      length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
+  }
+  if (clStatus==CL_SUCCESS)
+    outputReady = MagickTrue;
+
+cleanup:
+  if (inputImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(inputImageBuffer);
+  if (compositeImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(compositeImageBuffer);
+  if (queue != NULL)               RelinquishOpenCLCommandQueue(clEnv, queue);
+
+  return outputReady;
+}
+
+
+MagickExport
+MagickBooleanType AccelerateCompositeImage(Image *image,
+  const ChannelType channel,const CompositeOperator compose,
+  const Image *composite,const ssize_t x_offset,const ssize_t y_offset,
+  const float destination_dissolve,const float source_dissolve,
+  ExceptionInfo *exception)
+{
+  MagickBooleanType status;
+
+  assert(image != NULL);
+  assert(composite != NULL);
+  assert(exception != (ExceptionInfo *) NULL);
+
+  status = checkOpenCLEnvironment(exception);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  status = checkAccelerateCondition(image, channel);
+  if (status == MagickFalse)
+    return MagickFalse;
+
+  /* only support zero offset and
+     images with the size for now */
+  if (x_offset!=0
+    || y_offset!=0
+    || image->columns!=composite->columns
+    || image->rows!=composite->rows)
+    return MagickFalse;
+
+  switch(compose) {
+  case ColorDodgeCompositeOp: 
+  case BlendCompositeOp:
+    break;
+  default:
+    // unsupported compose operator, quit
+    return MagickFalse;
+  };
+
+  status = ComputeCompositeImage(image,channel,compose,composite,
+    x_offset,y_offset,destination_dissolve,source_dissolve,exception);
+
+  return status;
+}
+
+
 
 #else  /* MAGICKCORE_OPENCL_SUPPORT  */
 
@@ -4936,6 +6743,19 @@
   return MagickFalse;
 }
 
+MagickExport MagickBooleanType AcceleratContrastStretchImageChannel(
+    Image * image, const ChannelType channel, const double black_point, const double white_point, 
+    ExceptionInfo* magick_unused(exception))
+{
+  magick_unreferenced(image);
+  magick_unreferenced(channel);
+  magick_unreferenced(black_point);
+  magick_unreferenced(white_point);
+  magick_unreferenced(exception);
+
+  return MagickFalse;
+}
+
 MagickExport MagickBooleanType AccelerateEqualizeImage(
   Image* magick_unused(image), const ChannelType magick_unused(channel),
   ExceptionInfo* magick_unused(exception))
@@ -4971,7 +6791,6 @@
   return NULL;
 }
 
-
 MagickExport
 MagickBooleanType AccelerateModulateImage(
   Image* image, double percent_brightness, double percent_hue, 
@@ -4986,6 +6805,27 @@
   return(MagickFalse);
 }
 
+MagickExport
+MagickBooleanType AccelerateNegateImageChannel(
+  Image* image, const ChannelType channel, const MagickBooleanType grayscale, ExceptionInfo* exception)
+{
+  magick_unreferenced(image);
+  magick_unreferenced(channel);
+  magick_unreferenced(grayscale);
+  magick_unreferenced(exception);
+  return(MagickFalse);
+}
+
+MagickExport
+MagickBooleanType AccelerateGrayscaleImage(
+  Image* image, const PixelIntensityMethod method, ExceptionInfo* exception)
+{
+  magick_unreferenced(image);
+  magick_unreferenced(method);
+  magick_unreferenced(exception);
+  return(MagickFalse);
+}
+
 MagickExport Image *AccelerateAddNoiseImage(const Image *image, 
   const ChannelType channel, const NoiseType noise_type,ExceptionInfo *exception) 
 {
@@ -4996,6 +6836,29 @@
   return NULL;
 }
 
+
+MagickExport MagickBooleanType AccelerateRandomImage(Image* image, ExceptionInfo* exception)
+{
+  magick_unreferenced(image);
+  magick_unreferenced(exception);
+  return MagickFalse;
+}
+
+MagickExport
+Image* AccelerateMotionBlurImage(const Image *image, const ChannelType channel,
+                                const double* kernel, const size_t width,
+                                const OffsetInfo *offset, 
+                                ExceptionInfo *exception)
+{
+  magick_unreferenced(image);
+  magick_unreferenced(channel);
+  magick_unreferenced(kernel);
+  magick_unreferenced(width);
+  magick_unreferenced(offset);
+  magick_unreferenced(exception);
+  return NULL;
+}
+
 #endif /* MAGICKCORE_OPENCL_SUPPORT */
 
 MagickExport MagickBooleanType AccelerateConvolveImage(
diff --git a/MagickCore/accelerate.h b/MagickCore/accelerate.h
index 1030e2a..24e072c 100644
--- a/MagickCore/accelerate.h
+++ b/MagickCore/accelerate.h
@@ -29,14 +29,23 @@
 #endif
 
 extern MagickExport MagickBooleanType
+  AccelerateCompositeImage(Image *,const ChannelType,const CompositeOperator,
+    const Image *,const ssize_t,const ssize_t,const float,const float,ExceptionInfo *),
   AccelerateContrastImage(Image *,const MagickBooleanType,ExceptionInfo *),
   AccelerateConvolveImage(const Image *,const KernelInfo *,Image *,
     ExceptionInfo *),
+  AccelerateContrastStretchImageChannel(Image *, const ChannelType, const double, const double, 
+    ExceptionInfo*),
   AccelerateEqualizeImage(Image *,const ChannelType,ExceptionInfo *),
   AccelerateFunctionImage(Image *,const ChannelType,const MagickFunction,
     const size_t,const double *,ExceptionInfo *),
+  AccelerateGrayscaleImage(Image*, const PixelIntensityMethod,
+    ExceptionInfo *),
   AccelerateModulateImage(Image*, double, double, double, 
-    ColorspaceType, ExceptionInfo*);
+    ColorspaceType, ExceptionInfo*),
+  AccelerateNegateImageChannel(Image*, const ChannelType, const MagickBooleanType,
+    ExceptionInfo *),
+  AccelerateRandomImage(Image*, ExceptionInfo*);
 
 extern MagickExport Image
   *AccelerateAddNoiseImage(const Image*,const ChannelType,const NoiseType,
@@ -46,6 +55,8 @@
   *AccelerateConvolveImageChannel(const Image *,const ChannelType,
     const KernelInfo *,ExceptionInfo *),
   *AccelerateDespeckleImage(const Image *,ExceptionInfo *),
+  *AccelerateMotionBlurImage(const Image*, const ChannelType,
+    const double*,const size_t,const OffsetInfo*,ExceptionInfo*),
   *AccelerateRadialBlurImage(const Image *,const ChannelType,const double,
     ExceptionInfo *),
   *AccelerateResizeImage(const Image *,const size_t,const size_t,
diff --git a/MagickCore/opencl-private.h b/MagickCore/opencl-private.h
index 657166f..2e69e3c 100644
--- a/MagickCore/opencl-private.h
+++ b/MagickCore/opencl-private.h
@@ -1,23 +1,26 @@
 /*
-  Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization
-  dedicated to making software imaging solutions freely available.
+Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization
+dedicated to making software imaging solutions freely available.
 
-  You may not use this file except in compliance with the License.
-  obtain a copy of the License at
+You may not use this file except in compliance with the License.
+obtain a copy of the License at
 
-  http://www.imagemagick.org/script/license.php
+http://www.imagemagick.org/script/license.php
 
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 
-  MagickCore OpenCL private methods.
+MagickCore OpenCL private methods.
 */
 #ifndef _MAGICKCORE_OPENCL_PRIVATE_H
 #define _MAGICKCORE_OPENCL_PRIVATE_H
 
+/*
+Include declarations.
+*/
 #include "MagickCore/studio.h"
 #include "MagickCore/opencl.h"
 
@@ -31,7 +34,258 @@
   typedef void* cl_context;
   typedef void* cl_command_queue;
   typedef void* cl_kernel;
+  typedef void* cl_mem;
   typedef struct { unsigned char t[8]; } cl_device_type; /* 64-bit */
+#else
+/*
+ *
+ * function pointer typedefs
+ *
+ */
+
+/* Platform APIs */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetPlatformIDs)(
+                 cl_uint          num_entries,
+                 cl_platform_id * platforms,
+                 cl_uint *        num_platforms) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetPlatformInfo)(
+    cl_platform_id   platform, 
+    cl_platform_info param_name,
+    size_t           param_value_size, 
+    void *           param_value,
+    size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
+/* Device APIs */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetDeviceIDs)(
+    cl_platform_id   platform,
+    cl_device_type   device_type, 
+    cl_uint          num_entries, 
+    cl_device_id *   devices, 
+    cl_uint *        num_devices) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetDeviceInfo)(
+    cl_device_id    device,
+    cl_device_info  param_name, 
+    size_t          param_value_size, 
+    void *          param_value,
+    size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
+/* Context APIs */
+typedef CL_API_ENTRY cl_context (CL_API_CALL *MAGICKpfn_clCreateContext)(
+    const cl_context_properties * properties,
+    cl_uint                 num_devices,
+    const cl_device_id *    devices,
+    void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
+    void *                  user_data,
+    cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseContext)(
+    cl_context context) CL_API_SUFFIX__VERSION_1_0;
+
+/* Command Queue APIs */
+typedef CL_API_ENTRY cl_command_queue (CL_API_CALL *MAGICKpfn_clCreateCommandQueue)(
+    cl_context                     context, 
+    cl_device_id                   device, 
+    cl_command_queue_properties    properties,
+    cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseCommandQueue)(
+    cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
+
+/* Memory Object APIs */
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *MAGICKpfn_clCreateBuffer)(
+    cl_context   context,
+    cl_mem_flags flags,
+    size_t       size,
+    void *       host_ptr,
+    cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseMemObject)(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0;
+
+/* Program Object APIs */
+typedef CL_API_ENTRY cl_program (CL_API_CALL *MAGICKpfn_clCreateProgramWithSource)(
+    cl_context        context,
+    cl_uint           count,
+    const char **     strings,
+    const size_t *    lengths,
+    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_program (CL_API_CALL *MAGICKpfn_clCreateProgramWithBinary)(
+    cl_context                     context,
+    cl_uint                        num_devices,
+    const cl_device_id *           device_list,
+    const size_t *                 lengths,
+    const unsigned char **         binaries,
+    cl_int *                       binary_status,
+    cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseProgram)(cl_program program) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clBuildProgram)(
+    cl_program           program,
+    cl_uint              num_devices,
+    const cl_device_id * device_list,
+    const char *         options, 
+    void (CL_CALLBACK *pfn_notify)(cl_program program, void * user_data),
+    void *               user_data) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetProgramInfo)(
+    cl_program         program,
+    cl_program_info    param_name,
+    size_t             param_value_size,
+    void *             param_value,
+    size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clGetProgramBuildInfo)(
+    cl_program            program,
+    cl_device_id          device,
+    cl_program_build_info param_name,
+    size_t                param_value_size,
+    void *                param_value,
+    size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+                            
+/* Kernel Object APIs */
+typedef CL_API_ENTRY cl_kernel (CL_API_CALL *MAGICKpfn_clCreateKernel)(
+    cl_program      program,
+    const char *    kernel_name,
+    cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clReleaseKernel)(cl_kernel   kernel) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clSetKernelArg)(
+    cl_kernel    kernel,
+    cl_uint      arg_index,
+    size_t       arg_size,
+    const void * arg_value) CL_API_SUFFIX__VERSION_1_0;
+
+/* Flush and Finish APIs */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clFlush)(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clFinish)(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
+
+/* Enqueued Commands APIs */
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueReadBuffer)(
+    cl_command_queue    command_queue,
+    cl_mem              buffer,
+    cl_bool             blocking_read,
+    size_t              offset,
+    size_t              cb, 
+    void *              ptr,
+    cl_uint             num_events_in_wait_list,
+    const cl_event *    event_wait_list,
+    cl_event *          event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueWriteBuffer)(
+    cl_command_queue   command_queue, 
+    cl_mem             buffer, 
+    cl_bool            blocking_write, 
+    size_t             offset, 
+    size_t             cb, 
+    const void *       ptr, 
+    cl_uint            num_events_in_wait_list, 
+    const cl_event *   event_wait_list, 
+    cl_event *         event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY void * (CL_API_CALL *MAGICKpfn_clEnqueueMapBuffer)(
+    cl_command_queue command_queue,
+    cl_mem           buffer,
+    cl_bool          blocking_map, 
+    cl_map_flags     map_flags,
+    size_t           offset,
+    size_t           cb,
+    cl_uint          num_events_in_wait_list,
+    const cl_event * event_wait_list,
+    cl_event *       event,
+    cl_int *         errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueUnmapMemObject)(
+    cl_command_queue command_queue,
+    cl_mem           memobj,
+    void *           mapped_ptr,
+    cl_uint          num_events_in_wait_list,
+    const cl_event *  event_wait_list,
+    cl_event *        event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *MAGICKpfn_clEnqueueNDRangeKernel)(
+    cl_command_queue command_queue,
+    cl_kernel        kernel,
+    cl_uint          work_dim,
+    const size_t *   global_work_offset,
+    const size_t *   global_work_size,
+    const size_t *   local_work_size,
+    cl_uint          num_events_in_wait_list,
+    const cl_event * event_wait_list,
+    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
+
+/*
+ *
+ * vendor dispatch table structure
+ *
+ * note that the types in the structure KHRicdVendorDispatch mirror the function 
+ * names listed in the string table khrIcdVendorDispatchFunctionNames
+ *
+ */
+
+typedef struct MagickLibraryRec MagickLibrary;
+
+struct MagickLibraryRec
+{
+  void * base;
+
+  MAGICKpfn_clGetPlatformIDs                         clGetPlatformIDs;
+  MAGICKpfn_clGetPlatformInfo                        clGetPlatformInfo;
+  MAGICKpfn_clGetDeviceIDs                           clGetDeviceIDs;
+  MAGICKpfn_clGetDeviceInfo                          clGetDeviceInfo;
+  MAGICKpfn_clCreateContext                          clCreateContext;
+  MAGICKpfn_clCreateCommandQueue                     clCreateCommandQueue;
+  MAGICKpfn_clReleaseCommandQueue                    clReleaseCommandQueue;
+  MAGICKpfn_clCreateBuffer                           clCreateBuffer;
+  MAGICKpfn_clReleaseMemObject                       clReleaseMemObject;
+  MAGICKpfn_clCreateProgramWithSource                clCreateProgramWithSource;
+  MAGICKpfn_clCreateProgramWithBinary                clCreateProgramWithBinary;
+  MAGICKpfn_clReleaseProgram                         clReleaseProgram;
+  MAGICKpfn_clBuildProgram                           clBuildProgram;
+  MAGICKpfn_clGetProgramInfo                         clGetProgramInfo;
+  MAGICKpfn_clGetProgramBuildInfo                    clGetProgramBuildInfo;
+  MAGICKpfn_clCreateKernel                           clCreateKernel;
+  MAGICKpfn_clReleaseKernel                          clReleaseKernel;
+  MAGICKpfn_clSetKernelArg                           clSetKernelArg;
+  MAGICKpfn_clFlush                                  clFlush;
+  MAGICKpfn_clFinish                                 clFinish;
+  MAGICKpfn_clEnqueueReadBuffer                      clEnqueueReadBuffer;
+  MAGICKpfn_clEnqueueWriteBuffer                     clEnqueueWriteBuffer;
+  MAGICKpfn_clEnqueueMapBuffer                       clEnqueueMapBuffer;
+  MAGICKpfn_clEnqueueUnmapMemObject                  clEnqueueUnmapMemObject;
+  MAGICKpfn_clEnqueueNDRangeKernel                   clEnqueueNDRangeKernel;
+};
+
+struct _MagickCLEnv {
+  MagickBooleanType OpenCLInitialized;  /* whether OpenCL environment is initialized. */
+  MagickBooleanType OpenCLDisabled;	/* whether if OpenCL has been explicitely disabled. */
+
+  MagickLibrary * library;
+
+  /*OpenCL objects */
+  cl_platform_id platform;
+  cl_device_type deviceType;
+  cl_device_id device;
+  cl_context context;
+
+  MagickBooleanType disableProgramCache; /* disable the OpenCL program cache */
+  cl_program programs[MAGICK_OPENCL_NUM_PROGRAMS]; /* one program object maps one kernel source file */
+
+  MagickBooleanType regenerateProfile;   /* re-run the microbenchmark in auto device selection mode */ 
+
+  /* Random number generator seeds */
+  unsigned int numGenerators;
+  float randNormalize;
+  cl_mem seeds;
+  SemaphoreInfo* seedsLock;
+
+  SemaphoreInfo* lock;
+};
+
 #endif
 
 #if defined(MAGICKCORE_HDRI_SUPPORT)
@@ -43,8 +297,8 @@
 #define CLCharQuantumScale 1.0f
 #elif (MAGICKCORE_QUANTUM_DEPTH == 8)
 #define CLOptions "-cl-single-precision-constant -cl-mad-enable " \
-  "-DCLQuantum=uchar -DCLSignedQuantum=char -DCLPixelType=uchar4 -DQuantumRange=%f " \
-  "-DQuantumScale=%f -DCharQuantumScale=%f -DMagickEpsilon=%f -DMagickPI=%f "\
+  "-DCLQuantum=uchar -DCLSignedQuantum=char -DCLPixelType=uchar4 -DQuantumRange=%ff " \
+  "-DQuantumScale=%ff -DCharQuantumScale=%ff -DMagickEpsilon=%ff -DMagickPI=%ff "\
   "-DMaxMap=%u -DMAGICKCORE_QUANTUM_DEPTH=%u"
 #define CLPixelPacket  cl_uchar4
 #define CLCharQuantumScale 1.0f
@@ -81,6 +335,9 @@
   AcquireOpenCLCommandQueue(MagickCLEnv);
 
 extern MagickPrivate MagickBooleanType 
+  OpenCLThrowMagickException(ExceptionInfo *,
+    const char *,const char *,const size_t,
+    const ExceptionType,const char *,const char *,...),
   RelinquishOpenCLCommandQueue(MagickCLEnv, cl_command_queue),
   RelinquishOpenCLKernel(MagickCLEnv, cl_kernel);
 
@@ -91,9 +348,32 @@
 extern MagickPrivate const char* 
   GetOpenCLCachedFilesDirectory();
 
-extern MagickPrivate void 
+extern MagickPrivate void
+  UnlockRandSeedBuffer(MagickCLEnv),
   OpenCLLog(const char*);
 
+extern MagickPrivate cl_mem 
+  GetAndLockRandSeedBuffer(MagickCLEnv);
+
+extern MagickPrivate unsigned int 
+  GetNumRandGenerators(MagickCLEnv);
+
+extern MagickPrivate float 
+  GetRandNormalize(MagickCLEnv clEnv);
+
+typedef struct _AccelerateTimer {
+  long long _freq;	
+  long long _clocks;
+  long long _start;
+} AccelerateTimer;
+
+
+void startAccelerateTimer(AccelerateTimer* timer);
+void stopAccelerateTimer(AccelerateTimer* timer);
+void resetAccelerateTimer(AccelerateTimer* timer);
+void initAccelerateTimer(AccelerateTimer* timer);
+double readAccelerateTimer(AccelerateTimer* timer);
+
 /* #define OPENCLLOG_ENABLED 1 */
 static inline void OpenCLLogException(const char* function, 
                         const unsigned int line, 
@@ -102,8 +382,8 @@
   if (exception->severity!=0) {
     char message[MaxTextExtent];
     /*  dump the source into a file */
-    (void) FormatLocaleString(message,MaxTextExtent,"%s:%d Exception(%d)"
-      ,function,line,exception->severity);
+    (void) FormatLocaleString(message,MaxTextExtent,"%s:%d Exception(%d):%s "
+        ,function,line,exception->severity,exception->reason);
     OpenCLLog(message);
   }
 #else
@@ -113,6 +393,7 @@
 #endif
 }
 
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }
 #endif
diff --git a/MagickCore/opencl.c b/MagickCore/opencl.c
index 000f44c..98c9888 100644
--- a/MagickCore/opencl.c
+++ b/MagickCore/opencl.c
@@ -71,6 +71,8 @@
 #include "MagickCore/property.h"
 #include "MagickCore/quantize.h"
 #include "MagickCore/quantum.h"
+#include "MagickCore/random_.h"
+#include "MagickCore/random-private.h"
 #include "MagickCore/resample.h"
 #include "MagickCore/resource_.h"
 #include "MagickCore/splay-tree.h"
@@ -87,22 +89,60 @@
 
 #if defined(MAGICKCORE_OPENCL_SUPPORT)
 
-struct _MagickCLEnv {
-  MagickBooleanType OpenCLInitialized;  /* whether OpenCL environment is initialized. */
-  MagickBooleanType OpenCLDisabled;	/* whether if OpenCL has been explicitely disabled. */
+#ifdef MAGICKCORE_HAVE_OPENCL_CL_H
+#define MAGICKCORE_OPENCL_MACOSX  1
+#endif
 
-  /*OpenCL objects */
-  cl_platform_id platform;
-  cl_device_type deviceType;
-  cl_device_id device;
-  cl_context context;
 
-  MagickBooleanType disableProgramCache; /* disable the OpenCL program cache */
-  cl_program programs[MAGICK_OPENCL_NUM_PROGRAMS]; /* one program object maps one kernel source file */
+#define NUM_CL_RAND_GENERATORS 1024  /* number of random number generators running in parallel */ 
 
-  MagickBooleanType regenerateProfile;   /* re-run the microbenchmark in auto device selection mode */ 
-  SemaphoreInfo* lock;
-};
+/*
+ * 
+ * Dynamic library loading functions
+ *
+ */
+#ifdef MAGICKCORE_WINDOWS_SUPPORT
+#else
+#include <dlfcn.h>
+#endif
+
+// dynamically load a library.  returns NULL on failure
+void *OsLibraryLoad(const char *libraryName)
+{
+#ifdef MAGICKCORE_WINDOWS_SUPPORT
+    return (void *)LoadLibraryA(libraryName);
+#else 
+    return (void *)dlopen(libraryName, RTLD_NOW);
+#endif
+}
+
+// get a function pointer from a loaded library.  returns NULL on failure.
+void *OsLibraryGetFunctionAddress(void *library, const char *functionName)
+{
+#ifdef MAGICKCORE_WINDOWS_SUPPORT
+    if (!library || !functionName)
+    {
+        return NULL;
+    }
+    return (void *) GetProcAddress( (HMODULE)library, functionName);
+#else
+    if (!library || !functionName)
+    {
+        return NULL;
+    }
+    return (void *)dlsym(library, functionName);
+#endif
+}
+
+// unload a library.
+void OsLibraryUnload(void *library)
+{
+#ifdef MAGICKCORE_WINDOWS_SUPPORT
+    FreeLibrary( (HMODULE)library);
+#else
+    dlclose(library);
+#endif
+}
 
 
 /*
@@ -160,7 +200,7 @@
 {
   if (clEnv != (MagickCLEnv)NULL)
   {
-    RelinquishSemaphoreInfo(clEnv->lock);
+    DestroySemaphoreInfo(&clEnv->lock);
     RelinquishMagickMemory(clEnv);
     return MagickTrue;
   }
@@ -174,6 +214,103 @@
 MagickCLEnv defaultCLEnv;
 SemaphoreInfo* defaultCLEnvLock;
 
+/*
+* OpenCL library
+*/
+MagickLibrary * OpenCLLib;
+SemaphoreInfo* OpenCLLibLock;
+
+
+static MagickBooleanType bindOpenCLFunctions(void* library)
+{
+#ifdef MAGICKCORE_OPENCL_MACOSX
+#define BIND(X) OpenCLLib->X= &X;
+#else
+#define BIND(X)\
+  if ((OpenCLLib->X=(MAGICKpfn_##X)OsLibraryGetFunctionAddress(library,#X)) == NULL)\
+  return MagickFalse;
+#endif
+
+  BIND(clGetPlatformIDs);
+  BIND(clGetPlatformInfo);
+
+  BIND(clGetDeviceIDs);
+  BIND(clGetDeviceInfo);
+
+  BIND(clCreateContext);
+
+  BIND(clCreateBuffer);
+  BIND(clReleaseMemObject);
+
+  BIND(clCreateProgramWithSource);
+  BIND(clCreateProgramWithBinary);
+  BIND(clBuildProgram);
+  BIND(clGetProgramInfo);
+  BIND(clGetProgramBuildInfo);
+
+  BIND(clCreateKernel);
+  BIND(clReleaseKernel);
+  BIND(clSetKernelArg);
+
+  BIND(clFlush);
+  BIND(clFinish);
+
+  BIND(clEnqueueNDRangeKernel);
+  BIND(clEnqueueReadBuffer);
+  BIND(clEnqueueMapBuffer);
+  BIND(clEnqueueUnmapMemObject);
+
+  BIND(clCreateCommandQueue);
+  BIND(clReleaseCommandQueue);
+
+  return MagickTrue;
+}
+
+MagickLibrary * GetOpenCLLib()
+{ 
+  if (OpenCLLib == NULL)
+  {
+    if (OpenCLLibLock == NULL)
+    {
+      ActivateSemaphoreInfo(&OpenCLLibLock);
+    }
+
+    LockSemaphoreInfo(OpenCLLibLock);
+
+    OpenCLLib = (MagickLibrary *) AcquireMagickMemory (sizeof (MagickLibrary));
+
+    if (OpenCLLib != NULL)
+    {
+      MagickBooleanType status = MagickFalse;
+      void * library = NULL;
+
+#ifdef MAGICKCORE_OPENCL_MACOSX
+      status = bindOpenCLFunctions(library);
+#else
+      
+      memset(OpenCLLib, 0, sizeof(MagickLibrary));
+#ifdef MAGICKCORE_WINDOWS_SUPPORT
+      library = OsLibraryLoad("OpenCL.dll");
+#else
+      library = OsLibraryLoad("libOpenCL.so");
+#endif
+      if (library)
+        status = bindOpenCLFunctions(library);
+
+      if (status==MagickTrue)
+        OpenCLLib->base=library;
+      else
+        OpenCLLib=(MagickLibrary *)RelinquishMagickMemory(OpenCLLib);
+#endif
+    }
+
+    UnlockSemaphoreInfo(OpenCLLibLock); 
+  }
+  
+
+  return OpenCLLib; 
+}
+
 
 /*
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -498,7 +635,7 @@
   char path[MaxTextExtent];
   char deviceName[MaxTextExtent];
   const char* prefix = "magick_opencl";
-  clGetDeviceInfo(clEnv->device, CL_DEVICE_NAME, MaxTextExtent, deviceName, NULL);
+  clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_NAME, MaxTextExtent, deviceName, NULL);
   ptr=deviceName;
   /* strip out illegal characters for file names */
   while (*ptr != '\0')
@@ -536,7 +673,7 @@
   fileHandle = NULL;
   saveSuccessful = MagickFalse;
 
-  clStatus = clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binaryProgramSize, NULL);
+  clStatus = clEnv->library->clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binaryProgramSize, NULL);
   if (clStatus != CL_SUCCESS)
   {
     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clGetProgramInfo failed.", "'%s'", ".");
@@ -544,7 +681,7 @@
   }
 
   binaryProgram = (unsigned char*) AcquireMagickMemory(binaryProgramSize);
-  clStatus = clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARIES, sizeof(char*), &binaryProgram, NULL);
+  clStatus = clEnv->library->clGetProgramInfo(clEnv->programs[prog], CL_PROGRAM_BINARIES, sizeof(char*), &binaryProgram, NULL);
   if (clStatus != CL_SUCCESS)
   {
     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "clGetProgramInfo failed.", "'%s'", ".");
@@ -619,7 +756,7 @@
     memset(binaryProgram, 0, length);
     b_error |= fread(binaryProgram, 1, length, fileHandle) != length;
 
-    clEnv->programs[prog] = clCreateProgramWithBinary(clEnv->context, 1, &clEnv->device, &length, (const unsigned char**)&binaryProgram, &clBinaryStatus, &clStatus);
+    clEnv->programs[prog] = clEnv->library->clCreateProgramWithBinary(clEnv->context, 1, &clEnv->device, &length, (const unsigned char**)&binaryProgram, &clBinaryStatus, &clStatus);
     if (clStatus != CL_SUCCESS
         || clBinaryStatus != CL_SUCCESS)
       goto cleanup;
@@ -745,7 +882,7 @@
     {
       /* Binary CL program unavailable, compile the program from source */
       size_t programLength = strlen(MagickOpenCLProgramStrings[i]);
-      clEnv->programs[i] = clCreateProgramWithSource(clEnv->context, 1, &(MagickOpenCLProgramStrings[i]), &programLength, &clStatus);
+      clEnv->programs[i] = clEnv->library->clCreateProgramWithSource(clEnv->context, 1, &(MagickOpenCLProgramStrings[i]), &programLength, &clStatus);
       if (clStatus!=CL_SUCCESS)
       {
         (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning,
@@ -755,7 +892,7 @@
       }
     }
 
-    clStatus = clBuildProgram(clEnv->programs[i], 1, &clEnv->device, options, NULL, NULL);
+    clStatus = clEnv->library->clBuildProgram(clEnv->programs[i], 1, &clEnv->device, options, NULL, NULL);
     if (clStatus!=CL_SUCCESS)
     {
       (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning,
@@ -781,9 +918,9 @@
         {
           char* log;
           size_t logSize;
-          clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
+          clEnv->library->clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
           log = (char*)AcquireMagickMemory(logSize);
-          clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, logSize, log, &logSize);
+          clEnv->library->clGetProgramBuildInfo(clEnv->programs[i], clEnv->device, CL_PROGRAM_BUILD_LOG, logSize, log, &logSize);
 
           (void) FormatLocaleString(path,MaxTextExtent,"%s%s%s"
            ,GetOpenCLCachedFilesDirectory()
@@ -860,7 +997,7 @@
 
   if (clEnv->device != NULL)
   {
-    status = clGetDeviceInfo(clEnv->device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &clEnv->platform, NULL);
+    status = clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &clEnv->platform, NULL);
     if (status != CL_SUCCESS) {
       (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning,
           "Failed to get OpenCL platform from the selected device.", "(%d)", status);
@@ -884,7 +1021,7 @@
     clEnv->device = NULL;
 
     /* Get the number of OpenCL platforms available */
-    status = clGetPlatformIDs(0, NULL, &numPlatforms);
+    status = clEnv->library->clGetPlatformIDs(0, NULL, &numPlatforms);
     if (status != CL_SUCCESS)
     {
       (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning, 
@@ -905,7 +1042,7 @@
       goto cleanup;
     }
 
-    status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+    status = clEnv->library->clGetPlatformIDs(numPlatforms, platforms, NULL);
     if (status != CL_SUCCESS)
     {
       (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning,
@@ -937,11 +1074,11 @@
     for (i = 0; i < numPlatforms; i++)
     {
       cl_uint numDevices;
-      status = clGetDeviceIDs(platforms[i], deviceType, 1, &(clEnv->device), &numDevices);
+      status = clEnv->library->clGetDeviceIDs(platforms[i], deviceType, 1, &(clEnv->device), &numDevices);
       if (status != CL_SUCCESS)
       {
         (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning,
-          "clGetPlatformIDs failed.", "(%d)", status);
+          "clGetDeviceIDs failed.", "(%d)", status);
         goto cleanup;
       }
       if (clEnv->device != NULL)
@@ -1010,8 +1147,31 @@
   cl_int clStatus;
   cl_context_properties cps[3];
 
-
+#ifdef MAGICKCORE_CLPERFMARKER
+  {
+    int status = clInitializePerfMarkerAMD();
+    if (status == AP_SUCCESS) {
+      //printf("PerfMarker successfully initialized\n");
+    }
+  }
+#endif
   clEnv->OpenCLInitialized = MagickTrue;
+
+  /* check and init the global lib */
+  OpenCLLib=GetOpenCLLib();
+  if (OpenCLLib)
+  {
+    clEnv->library=OpenCLLib;
+  }
+  else
+  {
+    /* turn off opencl */
+    MagickBooleanType flag;
+    flag = MagickTrue;
+    SetMagickOpenCLEnvParamInternal(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
+        , sizeof(MagickBooleanType), &flag, exception);
+  }
+  
   if (clEnv->OpenCLDisabled != MagickFalse)
     goto cleanup;
 
@@ -1027,7 +1187,7 @@
   cps[0] = CL_CONTEXT_PLATFORM;
   cps[1] = (cl_context_properties)clEnv->platform;
   cps[2] = 0;
-  clEnv->context = clCreateContext(cps, 1, &(clEnv->device), NULL, NULL, &clStatus);
+  clEnv->context = clEnv->library->clCreateContext(cps, 1, &(clEnv->device), NULL, NULL, &clStatus);
   if (clStatus != CL_SUCCESS)
   {
     (void) ThrowMagickException(exception, GetMagickModule(), DelegateWarning,
@@ -1046,6 +1206,7 @@
   }
 
   status = EnableOpenCLInternal(clEnv);
+
 cleanup:
   return status;
 }
@@ -1106,7 +1267,7 @@
 cl_command_queue AcquireOpenCLCommandQueue(MagickCLEnv clEnv)
 {
   if (clEnv != NULL)
-    return clCreateCommandQueue(clEnv->context, clEnv->device, 0, NULL);
+    return clEnv->library->clCreateCommandQueue(clEnv->context, clEnv->device, 0, NULL);
   else
     return NULL;
 }
@@ -1143,7 +1304,7 @@
 {
   if (clEnv != NULL)
   {
-    return ((clReleaseCommandQueue(queue) == CL_SUCCESS) ? MagickTrue:MagickFalse);
+    return ((clEnv->library->clReleaseCommandQueue(queue) == CL_SUCCESS) ? MagickTrue:MagickFalse);
   }
   else
     return MagickFalse;
@@ -1186,7 +1347,7 @@
   cl_kernel kernel = NULL;
   if (clEnv != NULL && kernelName!=NULL)
   {
-    kernel = clCreateKernel(clEnv->programs[program], kernelName, &clStatus);
+    kernel = clEnv->library->clCreateKernel(clEnv->programs[program], kernelName, &clStatus);
   }
   return kernel;
 }
@@ -1225,7 +1386,7 @@
   MagickBooleanType status = MagickFalse;
   if (clEnv != NULL && kernel != NULL)
   {
-    status = ((clReleaseKernel(kernel) == CL_SUCCESS)?MagickTrue:MagickFalse);
+    status = ((clEnv->library->clReleaseKernel(kernel) == CL_SUCCESS)?MagickTrue:MagickFalse);
   }
   return status;
 }
@@ -1258,7 +1419,7 @@
  unsigned long GetOpenCLDeviceLocalMemorySize(MagickCLEnv clEnv)
 {
   cl_ulong localMemorySize;
-  clGetDeviceInfo(clEnv->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemorySize, NULL);
+  clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMemorySize, NULL);
   return (unsigned long)localMemorySize;
 }
 
@@ -1266,7 +1427,7 @@
   unsigned long GetOpenCLDeviceMaxMemAllocSize(MagickCLEnv clEnv)
 {
   cl_ulong maxMemAllocSize;
-  clGetDeviceInfo(clEnv->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &maxMemAllocSize, NULL);
+  clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &maxMemAllocSize, NULL);
   return (unsigned long)maxMemAllocSize;
 }
 
@@ -1363,17 +1524,17 @@
   
   memset(profile, 0, sizeof(ds_profile));
 
-  clGetPlatformIDs(0, NULL, &numPlatforms);
+  OpenCLLib->clGetPlatformIDs(0, NULL, &numPlatforms);
   if (numPlatforms > 0) {
     platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
     if (platforms == NULL) {
       status = DS_MEMORY_ERROR;
       goto cleanup;
     }
-    clGetPlatformIDs(numPlatforms, platforms, NULL);
+    OpenCLLib->clGetPlatformIDs(numPlatforms, platforms, NULL);
     for (i = 0; i < (unsigned int)numPlatforms; i++) {
       cl_uint num;
-      if (clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, 0, NULL, &num) == CL_SUCCESS)
+      if (OpenCLLib->clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, 0, NULL, &num) == CL_SUCCESS)
         numDevices+=num;
     }
   }
@@ -1412,7 +1573,7 @@
           continue;
           break;
         }
-        if (clGetDeviceIDs(platforms[i], deviceType, numDevices, devices, &num) != CL_SUCCESS)
+        if (OpenCLLib->clGetDeviceIDs(platforms[i], deviceType, numDevices, devices, &num) != CL_SUCCESS)
           continue;
         for (j = 0; j < num; j++, next++) {
           size_t length;
@@ -1420,22 +1581,22 @@
           profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE;
           profile->devices[next].oclDeviceID = devices[j];
 
-          clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME
+          OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME
             , 0, NULL, &length);
           profile->devices[next].oclDeviceName = (char*)malloc(sizeof(char)*length);
-          clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME
+          OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME
             , length, profile->devices[next].oclDeviceName, NULL);
 
-          clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION
+          OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION
             , 0, NULL, &length);
           profile->devices[next].oclDriverVersion = (char*)malloc(sizeof(char)*length);
-          clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION
+          OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION
             , length, profile->devices[next].oclDriverVersion, NULL);
 
-          clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY
+          OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY
             , sizeof(cl_uint), &profile->devices[next].oclMaxClockFrequency, NULL);
 
-          clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_COMPUTE_UNITS
+          OpenCLLib->clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_MAX_COMPUTE_UNITS
             , sizeof(cl_uint), &profile->devices[next].oclMaxComputeUnits, NULL);
         }
       }
@@ -1923,57 +2084,6 @@
 */
 
 
-
-typedef struct _AccelerateTimer {
-  long long _freq;	
-  long long _clocks;
-  long long _start;
-} AccelerateTimer;
-
-static void startAccelerateTimer(AccelerateTimer* timer) {
-#ifdef _WIN32
-      QueryPerformanceCounter((LARGE_INTEGER*)&timer->_start);	
-
-
-#else
-      struct timeval s;
-      gettimeofday(&s, 0);
-      timer->_start = (long long)s.tv_sec * (long long)1.0E3 + (long long)s.tv_usec / (long long)1.0E3;
-#endif  
-}
-
-static void stopAccelerateTimer(AccelerateTimer* timer) {
-      long long n=0;
-#ifdef _WIN32
-      QueryPerformanceCounter((LARGE_INTEGER*)&(n));	
-#else
-      struct timeval s;
-      gettimeofday(&s, 0);
-      n = (long long)s.tv_sec * (long long)1.0E3+ (long long)s.tv_usec / (long long)1.0E3;
-#endif
-      n -= timer->_start;
-      timer->_start = 0;
-      timer->_clocks += n;
-}
-
-static void resetAccelerateTimer(AccelerateTimer* timer) {
-   timer->_clocks = 0; 
-   timer->_start = 0;
-}
-
-
-static void initAccelerateTimer(AccelerateTimer* timer) {
-#ifdef _WIN32
-    QueryPerformanceFrequency((LARGE_INTEGER*)&timer->_freq);
-#else
-    timer->_freq = (long long)1.0E3;
-#endif
-   resetAccelerateTimer(timer);
-}
-
-double readAccelerateTimer(AccelerateTimer* timer) { return (double)timer->_clocks/(double)timer->_freq; };
-
-
 typedef double AccelerateScoreType;
 
 static ds_status AcceleratePerfEvaluator(ds_device *device,
@@ -2067,7 +2177,7 @@
       bluredImage=BlurImage(inputImage,10.0f,3.5f,exception);
       unsharpedImage=UnsharpMaskImage(bluredImage,2.0f,2.0f,50.0f,10.0f,
         exception);
-      resizedImage=ResizeImage(unsharpedImage,640,480,LanczosFilter,
+      resizedImage=ResizeImage(unsharpedImage,640,480,LanczosFilter,1.0,
         exception);
 
 #ifdef MAGICKCORE_CLPERFMARKER
@@ -2156,6 +2266,14 @@
   SetMagickOpenCLEnvParamInternal(clEnv, MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED
     , sizeof(MagickBooleanType), &flag, exception);
 
+  /* check and init the global lib */
+  OpenCLLib=GetOpenCLLib();
+  if (OpenCLLib==NULL)
+  {
+    mStatus=InitOpenCLEnvInternal(clEnv, exception);
+    goto cleanup;
+  }
+
   status = initDSProfile(&profile, IMAGEMAGICK_PROFILE_VERSION);
   if (status!=DS_SUCCESS) {
     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError, "Error when initializing the profile", "'%s'", ".");
@@ -2353,10 +2471,10 @@
 
   if (severity!=0) {
     cl_device_type dType;
-    clGetDeviceInfo(clEnv->device,CL_DEVICE_TYPE ,sizeof(cl_device_type),&dType,NULL);
+    clEnv->library->clGetDeviceInfo(clEnv->device,CL_DEVICE_TYPE ,sizeof(cl_device_type),&dType,NULL);
     if (dType == CL_DEVICE_TYPE_CPU) {
       char buffer[MaxTextExtent];
-      clGetPlatformInfo(clEnv->platform, CL_PLATFORM_NAME, MaxTextExtent, buffer, NULL);
+      clEnv->library->clGetPlatformInfo(clEnv->platform, CL_PLATFORM_NAME, MaxTextExtent, buffer, NULL);
 
       /* Workaround for Intel OpenCL CPU runtime bug */
       /* Turn off OpenCL when a problem is detected! */
@@ -2386,6 +2504,88 @@
   return(status);
 }
 
+MagickPrivate cl_mem GetAndLockRandSeedBuffer(MagickCLEnv clEnv)
+{ 
+  LockSemaphoreInfo(clEnv->lock);
+  if (clEnv->seedsLock == NULL)
+  {
+    ActivateSemaphoreInfo(&clEnv->seedsLock);
+  }
+  LockSemaphoreInfo(clEnv->seedsLock);
+
+  if (clEnv->seeds == NULL)
+  {
+    cl_int clStatus;
+    clEnv->numGenerators = NUM_CL_RAND_GENERATORS;
+    clEnv->seeds = clEnv->library->clCreateBuffer(clEnv->context, CL_MEM_READ_WRITE,
+                                  clEnv->numGenerators*4*sizeof(unsigned int),
+                                  NULL, &clStatus);
+    if (clStatus != CL_SUCCESS)
+    {
+      clEnv->seeds = NULL;
+    }
+    else
+    {
+      unsigned int i;
+      cl_command_queue queue = NULL;
+      unsigned int *seeds;
+
+      queue = AcquireOpenCLCommandQueue(clEnv);
+      seeds = (unsigned int*) clEnv->library->clEnqueueMapBuffer(queue, clEnv->seeds, CL_TRUE, 
+                                                  CL_MAP_WRITE, 0,
+                                                  clEnv->numGenerators*4
+                                                  *sizeof(unsigned int),
+                                                  0, NULL, NULL, &clStatus);
+      if (clStatus!=CL_SUCCESS)
+      {
+        clEnv->library->clReleaseMemObject(clEnv->seeds);
+        goto cleanup;
+      }
+
+      for (i = 0; i < clEnv->numGenerators; i++) {
+        RandomInfo* randomInfo = AcquireRandomInfo();
+        const unsigned long* s = GetRandomInfoSeed(randomInfo);
+        if (i == 0)
+          clEnv->randNormalize = GetRandomInfoNormalize(randomInfo);
+
+        seeds[i*4]   = (unsigned int) s[0];
+        seeds[i*4+1] = (unsigned int) 0x50a7f451;
+        seeds[i*4+2] = (unsigned int) 0x5365417e;
+        seeds[i*4+3] = (unsigned int) 0xc3a4171a;
+
+        randomInfo = DestroyRandomInfo(randomInfo);
+      }
+      clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, clEnv->seeds, seeds, 0, 
+                                          NULL, NULL);
+      clEnv->library->clFinish(queue);
+cleanup:
+      if (queue != NULL) 
+        RelinquishOpenCLCommandQueue(clEnv, queue);
+    }
+  }
+  UnlockSemaphoreInfo(clEnv->lock);
+  return clEnv->seeds; 
+}
+
+MagickPrivate void UnlockRandSeedBuffer(MagickCLEnv clEnv) {
+  if (clEnv->seedsLock == NULL)
+  {
+    ActivateSemaphoreInfo(&clEnv->seedsLock);
+  }
+  else
+    UnlockSemaphoreInfo(clEnv->seedsLock);
+}
+
+MagickPrivate unsigned int GetNumRandGenerators(MagickCLEnv clEnv)
+{
+  return clEnv->numGenerators;
+}
+
+
+MagickPrivate float GetRandNormalize(MagickCLEnv clEnv)
+{
+  return clEnv->randNormalize;
+}
 
 #else
 
@@ -2393,12 +2593,12 @@
   MagickBooleanType OpenCLInitialized;  /* whether OpenCL environment is initialized. */
 };
 
-extern MagickExport MagickCLEnv AcquireMagickOpenCLEnv()
+MagickExport MagickCLEnv AcquireMagickOpenCLEnv()
 {
   return NULL;
 }
 
-extern MagickExport MagickBooleanType RelinquishMagickOpenCLEnv(
+MagickExport MagickBooleanType RelinquishMagickOpenCLEnv(
   MagickCLEnv magick_unused(clEnv))
 {
   magick_unreferenced(clEnv);
@@ -2470,7 +2670,7 @@
   return (cl_command_queue) NULL;
 }
 
-MagickExport MagickBooleanType RelinquishCommandQueue(
+MagickPrivate MagickBooleanType RelinquishCommandQueue(
   MagickCLEnv magick_unused(clEnv),cl_command_queue magick_unused(queue))
 {
   magick_unreferenced(clEnv);
@@ -2534,6 +2734,32 @@
   magick_unreferenced(format);
   return(MagickFalse);
 }
+
+
+MagickPrivate cl_mem GetAndLockRandSeedBuffer(MagickCLEnv clEnv)
+{
+  magick_unreferenced(clEnv);
+  return NULL;
+}
+
+
+MagickPrivate void UnlockRandSeedBuffer(MagickCLEnv clEnv)
+{
+  magick_unreferenced(clEnv);
+}
+
+MagickPrivate unsigned int GetNumRandGenerators(MagickCLEnv clEnv)
+{
+  magick_unreferenced(clEnv);
+  return 0;
+}
+
+MagickPrivate float GetRandNormalize(MagickCLEnv clEnv)
+{
+  magick_unreferenced(clEnv);
+  return 0.0f;
+}
+
 #endif /* MAGICKCORE_OPENCL_SUPPORT */
 
 char* openclCachedFilesDirectory;
@@ -2554,43 +2780,65 @@
       struct stat attributes;
       MagickBooleanType status;
 
+
+
+      home=GetEnvironmentValue("IMAGEMAGICK_OPENCL_CACHE_DIR");
+      if (home == (char *) NULL)
+      {
 #ifdef MAGICKCORE_WINDOWS_SUPPORT
-      home=GetEnvironmentValue("LOCALAPPDATA");
-      if (home == (char *) NULL)
-        home=GetEnvironmentValue("APPDATA");
-      if (home == (char *) NULL)
-        home=GetEnvironmentValue("USERPROFILE");
+        home=GetEnvironmentValue("LOCALAPPDATA");
+        if (home == (char *) NULL)
+          home=GetEnvironmentValue("APPDATA");
+        if (home == (char *) NULL)
+          home=GetEnvironmentValue("USERPROFILE");
 #else
-      home=GetEnvironmentValue("HOME");
+        home=GetEnvironmentValue("HOME");
 #endif
+      }
+      
       if (home != (char *) NULL)
       {
+        int mkdirStatus = 0;
         /*
-          Search $HOME/.config/ImageMagick.
         */
-        (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config",home,
-          DirectorySeparator);
+
+        /* first check if $HOME/.config exists */
+        (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config",
+          home,DirectorySeparator);
         status=GetPathAttributes(path,&attributes);
-        if (status == MagickFalse) {
+        if (status == MagickFalse) 
+        {
+          
 #ifdef MAGICKCORE_WINDOWS_SUPPORT
-          mkdir(path);
+          mkdirStatus = mkdir(path);
 #else
-          mkdir(path, 0777);
+          mkdirStatus = mkdir(path, 0777);
 #endif
         }
-        (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config%sImageMagick",
-          home,DirectorySeparator,DirectorySeparator);
+        
+        /* first check if $HOME/.config/ImageMagick exists */
+        if (mkdirStatus==0) 
+        {
+            (void) FormatLocaleString(path,MaxTextExtent,"%s%s.config%sImageMagick",
+              home,DirectorySeparator,DirectorySeparator);
+                    
+            status=GetPathAttributes(path,&attributes);
+            if (status == MagickFalse) 
+            {
+#ifdef MAGICKCORE_WINDOWS_SUPPORT
+              mkdirStatus = mkdir(path);
+#else
+              mkdirStatus = mkdir(path, 0777);
+#endif
+            }
+        }
+
+        if (mkdirStatus==0)
+        {
+          temp = (char*)AcquireMagickMemory(strlen(path)+1);
+          CopyMagickString(temp,path,strlen(path)+1);
+        }
         home=DestroyString(home);
-        temp = (char*)AcquireMagickMemory(strlen(path)+1);
-        CopyMagickString(temp,path,strlen(path)+1);
-        status=GetPathAttributes(path,&attributes);
-        if (status == MagickFalse) {
-#ifdef MAGICKCORE_WINDOWS_SUPPORT
-          mkdir(path);
-#else
-          mkdir(path, 0777);
-#endif
-        }
       }
       openclCachedFilesDirectory = temp;
     }
@@ -2599,6 +2847,52 @@
   return openclCachedFilesDirectory;
 }
 
+void startAccelerateTimer(AccelerateTimer* timer) {
+#ifdef _WIN32
+      QueryPerformanceCounter((LARGE_INTEGER*)&timer->_start);	
+
+
+#else
+      struct timeval s;
+      gettimeofday(&s, 0);
+      timer->_start = (long long)s.tv_sec * (long long)1.0E3 + (long long)s.tv_usec / (long long)1.0E3;
+#endif  
+}
+
+void stopAccelerateTimer(AccelerateTimer* timer) {
+      long long n=0;
+#ifdef _WIN32
+      QueryPerformanceCounter((LARGE_INTEGER*)&(n));	
+#else
+      struct timeval s;
+      gettimeofday(&s, 0);
+      n = (long long)s.tv_sec * (long long)1.0E3+ (long long)s.tv_usec / (long long)1.0E3;
+#endif
+      n -= timer->_start;
+      timer->_start = 0;
+      timer->_clocks += n;
+}
+
+void resetAccelerateTimer(AccelerateTimer* timer) {
+   timer->_clocks = 0; 
+   timer->_start = 0;
+}
+
+
+void initAccelerateTimer(AccelerateTimer* timer) {
+#ifdef _WIN32
+    QueryPerformanceFrequency((LARGE_INTEGER*)&timer->_freq);
+#else
+    timer->_freq = (long long)1.0E3;
+#endif
+   resetAccelerateTimer(timer);
+}
+
+double readAccelerateTimer(AccelerateTimer* timer) { 
+  return (double)timer->_clocks/(double)timer->_freq; 
+};
+
+
 /* create a function for OpenCL log */
 MagickPrivate
 void OpenCLLog(const char* message) {
@@ -2640,3 +2934,5 @@
   magick_unreferenced(message);
 #endif
 }
+
+
diff --git a/MagickCore/version.h b/MagickCore/version.h
index efc89ca..a876602 100644
--- a/MagickCore/version.h
+++ b/MagickCore/version.h
@@ -27,7 +27,7 @@
 */
 #define MagickPackageName "ImageMagick"
 #define MagickCopyright  "Copyright (C) 1999-2014 ImageMagick Studio LLC"
-#define MagickSVNRevision  "15035M"
+#define MagickSVNRevision  "15038M"
 #define MagickLibVersion  0x700
 #define MagickLibVersionText  "7.0.0"
 #define MagickLibVersionNumber  1,0,0
@@ -48,7 +48,7 @@
 #define MagickppLibAddendum  "-0"
 #define MagickppLibInterface  1
 #define MagickppLibMinInterface  1
-#define MagickReleaseDate  "2014-03-06"
+#define MagickReleaseDate  "2014-03-07"
 #define MagickChangeDate   "20120427"
 #define MagickAuthoritativeURL  "http://www.imagemagick.org"
 #define MagickFeatures "DPC HDRI OpenMP"
diff --git a/configure b/configure
index 16ddf16..a482eaf 100755
--- a/configure
+++ b/configure
@@ -3702,7 +3702,7 @@
 
 MAGICK_VERSION=7.0.0-0
 
-MAGICK_SVN_REVISION=15035M
+MAGICK_SVN_REVISION=15038M
 
 
 # Substitute library versioning
@@ -10524,6 +10524,9 @@
 
 
 
+#remove static link on Linux
+CL_LIBS=`echo $CL_LIBS | $SED -e 's/-lOpenCL //'`
+
 
 CFLAGS="$CL_CFLAGS $CFLAGS"
 CPPFLAGS="$CL_CFLAGS $CPPFLAGS"
diff --git a/m4/ax_opencl.m4 b/m4/ax_opencl.m4
index 834b618..a646346 100644
--- a/m4/ax_opencl.m4
+++ b/m4/ax_opencl.m4
@@ -143,5 +143,8 @@
 fi
   
 AC_SUBST([CL_CFLAGS])
+
+#remove static link on Linux
+CL_LIBS=`echo $CL_LIBS | $SED -e 's/-lOpenCL //'`
 AC_SUBST([CL_LIBS])
 ])dnl
