Jason Sams | d85e283 | 2012-09-11 16:04:27 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | |
| 18 | #include "rsdCore.h" |
| 19 | #include "rsdIntrinsics.h" |
| 20 | #include "rsdAllocation.h" |
| 21 | |
| 22 | #include "rsdIntrinsicInlines.h" |
| 23 | |
| 24 | using namespace android; |
| 25 | using namespace android::renderscript; |
| 26 | |
| 27 | struct ConvolveParams { |
| 28 | float fp[104]; |
| 29 | short ip[104]; |
| 30 | float radius; |
| 31 | int iradius; |
| 32 | ObjectBaseRef<Allocation> alloc; |
| 33 | }; |
| 34 | |
| 35 | static void ComputeGaussianWeights(ConvolveParams *cp) { |
| 36 | // Compute gaussian weights for the blur |
| 37 | // e is the euler's number |
| 38 | float e = 2.718281828459045f; |
| 39 | float pi = 3.1415926535897932f; |
| 40 | // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 ) |
| 41 | // x is of the form [-radius .. 0 .. radius] |
| 42 | // and sigma varies with radius. |
| 43 | // Based on some experimental radius values and sigma's |
| 44 | // we approximately fit sigma = f(radius) as |
| 45 | // sigma = radius * 0.4 + 0.6 |
| 46 | // The larger the radius gets, the more our gaussian blur |
| 47 | // will resemble a box blur since with large sigma |
| 48 | // the gaussian curve begins to lose its shape |
| 49 | float sigma = 0.4f * cp->radius + 0.6f; |
| 50 | |
| 51 | // Now compute the coefficients. We will store some redundant values to save |
| 52 | // some math during the blur calculations precompute some values |
| 53 | float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma); |
| 54 | float coeff2 = - 1.0f / (2.0f * sigma * sigma); |
| 55 | |
| 56 | float normalizeFactor = 0.0f; |
| 57 | float floatR = 0.0f; |
| 58 | int r; |
| 59 | cp->iradius = (float)ceil(cp->radius) + 0.5f; |
| 60 | for (r = -cp->iradius; r <= cp->iradius; r ++) { |
| 61 | floatR = (float)r; |
| 62 | cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2); |
| 63 | normalizeFactor += cp->fp[r + cp->iradius]; |
| 64 | } |
| 65 | |
| 66 | //Now we need to normalize the weights because all our coefficients need to add up to one |
| 67 | normalizeFactor = 1.0f / normalizeFactor; |
| 68 | for (r = -cp->iradius; r <= cp->iradius; r ++) { |
| 69 | cp->fp[r + cp->iradius] *= normalizeFactor; |
| 70 | cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768); |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | static void Blur_Bind(const Context *dc, const Script *script, |
| 75 | void * intrinsicData, uint32_t slot, Allocation *data) { |
| 76 | ConvolveParams *cp = (ConvolveParams *)intrinsicData; |
| 77 | rsAssert(slot == 1); |
| 78 | cp->alloc.set(data); |
| 79 | } |
| 80 | |
| 81 | static void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData, |
| 82 | uint32_t slot, void *data, size_t dataLength) { |
| 83 | ConvolveParams *cp = (ConvolveParams *)intrinsicData; |
| 84 | rsAssert(slot == 0); |
| 85 | |
| 86 | cp->radius = ((const float *)data)[0]; |
| 87 | ComputeGaussianWeights(cp); |
| 88 | } |
| 89 | |
Jason Sams | d85e283 | 2012-09-11 16:04:27 -0700 | [diff] [blame] | 90 | |
| 91 | |
| 92 | static void OneV(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y, |
| 93 | const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { |
| 94 | |
| 95 | const uchar *pi = ptrIn + x*4; |
| 96 | |
| 97 | float4 blurredPixel = 0; |
| 98 | for (int r = -iradius; r <= iradius; r ++) { |
| 99 | int validY = rsMax((y + r), 0); |
| 100 | validY = rsMin(validY, (int)(p->dimY - 1)); |
| 101 | const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride]; |
| 102 | float4 pf = convert_float4(pvy[0]); |
| 103 | blurredPixel += pf * gPtr[0]; |
| 104 | gPtr++; |
| 105 | } |
| 106 | |
| 107 | out->xyzw = blurredPixel; |
| 108 | } |
| 109 | |
Jason Sams | e78e514 | 2012-09-19 00:46:31 -0700 | [diff] [blame] | 110 | extern "C" void rsdIntrinsicBlurVF_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int x2); |
| 111 | extern "C" void rsdIntrinsicBlurHF_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int x2); |
| 112 | |
| 113 | static void OneVF(float4 *out, |
| 114 | const uchar *ptrIn, int iStride, const float* gPtr, int ct, |
| 115 | int x1, int x2) { |
| 116 | |
| 117 | #if defined(ARCH_ARM_HAVE_NEON) |
Jason Sams | 2207ab7 | 2012-09-19 13:44:55 -0700 | [diff] [blame^] | 118 | { |
| 119 | int t = (x2 - x1); |
| 120 | t &= ~1; |
| 121 | if(t) { |
| 122 | rsdIntrinsicBlurVF_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t); |
| 123 | } |
| 124 | x1 += t; |
| 125 | } |
Jason Sams | e78e514 | 2012-09-19 00:46:31 -0700 | [diff] [blame] | 126 | #endif |
| 127 | |
| 128 | while(x2 > x1) { |
| 129 | const uchar *pi = ptrIn + x1 * 4; |
| 130 | float4 blurredPixel = 0; |
| 131 | const float* gp = gPtr; |
| 132 | |
| 133 | for (int r = 0; r < ct; r++) { |
| 134 | float4 pf = convert_float4(((const uchar4 *)pi)[0]); |
| 135 | blurredPixel += pf * gp[0]; |
| 136 | pi += iStride; |
| 137 | gp++; |
| 138 | } |
| 139 | out->xyzw = blurredPixel; |
| 140 | x1++; |
| 141 | out++; |
| 142 | } |
| 143 | } |
| 144 | |
Jason Sams | d85e283 | 2012-09-11 16:04:27 -0700 | [diff] [blame] | 145 | static void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x, |
| 146 | const float4 *ptrIn, const float* gPtr, int iradius) { |
| 147 | |
| 148 | float4 blurredPixel = 0; |
| 149 | for (int r = -iradius; r <= iradius; r ++) { |
| 150 | int validX = rsMax((x + r), 0); |
| 151 | validX = rsMin(validX, (int)(p->dimX - 1)); |
| 152 | float4 pf = ptrIn[validX]; |
| 153 | blurredPixel += pf * gPtr[0]; |
| 154 | gPtr++; |
| 155 | } |
| 156 | |
| 157 | out->xyzw = convert_uchar4(blurredPixel); |
| 158 | } |
| 159 | |
| 160 | |
| 161 | static void Blur_uchar4(const RsForEachStubParamStruct *p, |
| 162 | uint32_t xstart, uint32_t xend, |
| 163 | uint32_t instep, uint32_t outstep) { |
| 164 | float buf[4 * 2048]; |
| 165 | ConvolveParams *cp = (ConvolveParams *)p->usr; |
| 166 | DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv; |
| 167 | const uchar *pin = (const uchar *)din->lod[0].mallocPtr; |
| 168 | |
| 169 | uchar4 *out = (uchar4 *)p->out; |
| 170 | uint32_t x1 = xstart; |
| 171 | uint32_t x2 = xend; |
| 172 | |
| 173 | float4 *fout = (float4 *)buf; |
Jason Sams | e78e514 | 2012-09-19 00:46:31 -0700 | [diff] [blame] | 174 | int y = p->y; |
| 175 | if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) { |
| 176 | const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride; |
| 177 | OneVF(fout, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, x1, x2); |
| 178 | } else { |
| 179 | while(x2 > x1) { |
| 180 | OneV(p, fout, x1, y, pin, din->lod[0].stride, cp->fp, cp->iradius); |
| 181 | fout++; |
| 182 | x1++; |
| 183 | } |
Jason Sams | d85e283 | 2012-09-11 16:04:27 -0700 | [diff] [blame] | 184 | } |
| 185 | |
| 186 | x1 = xstart; |
Jason Sams | e78e514 | 2012-09-19 00:46:31 -0700 | [diff] [blame] | 187 | while ((x1 < (uint32_t)cp->iradius) && (x1 < x2)) { |
| 188 | OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius); |
| 189 | out++; |
| 190 | x1++; |
| 191 | } |
| 192 | #if defined(ARCH_ARM_HAVE_NEON) |
| 193 | if ((x1 + cp->iradius) < x2) { |
| 194 | rsdIntrinsicBlurHF_K(out, ((float4 *)buf) - cp->iradius, cp->fp, cp->iradius * 2 + 1, x1, x2 - cp->iradius); |
| 195 | out += (x2 - cp->iradius) - x1; |
| 196 | x1 = x2 - cp->iradius; |
| 197 | } |
| 198 | #endif |
Jason Sams | d85e283 | 2012-09-11 16:04:27 -0700 | [diff] [blame] | 199 | while(x2 > x1) { |
| 200 | OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius); |
| 201 | out++; |
| 202 | x1++; |
| 203 | } |
| 204 | |
| 205 | } |
| 206 | |
| 207 | void * rsdIntrinsic_InitBlur(const android::renderscript::Context *dc, |
| 208 | android::renderscript::Script *script, |
| 209 | RsdIntriniscFuncs_t *funcs) { |
| 210 | |
| 211 | script->mHal.info.exportedVariableCount = 2; |
| 212 | funcs->bind = Blur_Bind; |
| 213 | funcs->setVar = Blur_SetVar; |
| 214 | funcs->root = Blur_uchar4; |
| 215 | |
| 216 | ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); |
| 217 | cp->radius = 5; |
| 218 | ComputeGaussianWeights(cp); |
| 219 | return cp; |
| 220 | } |
| 221 | |
| 222 | |