When building circle blur profile evaluate kernel vertically once per column
BUG=skia:5224
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1991413002
Review-Url: https://codereview.chromium.org/1991413002
diff --git a/src/effects/GrCircleBlurFragmentProcessor.cpp b/src/effects/GrCircleBlurFragmentProcessor.cpp
index ff6985f..bd8adee 100644
--- a/src/effects/GrCircleBlurFragmentProcessor.cpp
+++ b/src/effects/GrCircleBlurFragmentProcessor.cpp
@@ -137,47 +137,55 @@
}
}
-// Applies the 1D half kernel vertically at a point (x, 0) to a circle centered at the origin with
-// radius circleR.
-static float eval_vertically(float x, float circleR, const float* summedHalfKernelTable,
- int halfKernelSize) {
- // Given x find the positive y that is on the edge of the circle.
- float y = sqrtf(fabs(circleR * circleR - x * x));
- // In the column at x we exit the circle at +y and -y
- // table entry j is actually the kernel evaluated at j + 0.5.
- y -= 0.5f;
- int yInt = SkScalarFloorToInt(y);
- SkASSERT(yInt >= -1);
- if (y < 0) {
- return (y + 0.5f) * summedHalfKernelTable[0];
- } else if (yInt >= halfKernelSize - 1) {
- return 0.5f;
- } else {
- float yFrac = y - yInt;
- return (1.f - yFrac) * summedHalfKernelTable[yInt] +
- yFrac * summedHalfKernelTable[yInt + 1];
+// Applies the 1D half kernel vertically at points along the x axis to a circle centered at the
+// origin with radius circleR.
+void apply_kernel_in_y(float* results, int numSteps, float firstX, float circleR,
+ int halfKernelSize, const float* summedHalfKernelTable) {
+ float x = firstX;
+ for (int i = 0; i < numSteps; ++i, x += 1.f) {
+ if (x < -circleR || x > circleR) {
+ results[i] = 0;
+ continue;
+ }
+ float y = sqrtf(circleR * circleR - x * x);
+ // In the column at x we exit the circle at +y and -y
+ // The summed table entry j is actually reflects an offset of j + 0.5.
+ y -= 0.5f;
+ int yInt = SkScalarFloorToInt(y);
+ SkASSERT(yInt >= -1);
+ if (y < 0) {
+ results[i] = (y + 0.5f) * summedHalfKernelTable[0];
+ } else if (yInt >= halfKernelSize - 1) {
+ results[i] = 0.5f;
+ } else {
+ float yFrac = y - yInt;
+ results[i] = (1.f - yFrac) * summedHalfKernelTable[yInt] +
+ yFrac * summedHalfKernelTable[yInt + 1];
+ }
}
}
-// Apply the kernel at point (t, 0) to a circle centered at the origin with radius circleR.
-static uint8_t eval_at(float t, float circleR, const float* halfKernel,
- const float* summedHalfKernelTable, int halfKernelSize) {
+// Apply a Gaussian at point (evalX, 0) to a circle centered at the origin with radius circleR.
+// This relies on having a half kernel computed for the Gaussian and a table of applications of
+// the half kernel in y to columns at (evalX - halfKernel, evalX - halfKernel + 1, ..., evalX +
+// halfKernel) passed in as yKernelEvaluations.
+static uint8_t eval_at(float evalX, float circleR, const float* halfKernel, int halfKernelSize,
+ const float* yKernelEvaluations) {
float acc = 0;
- for (int i = 0; i < halfKernelSize; ++i) {
- float x = t - i - 0.5f;
+ float x = evalX - halfKernelSize;
+ for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
if (x < -circleR || x > circleR) {
continue;
}
- float verticalEval = eval_vertically(x, circleR, summedHalfKernelTable, halfKernelSize);
- acc += verticalEval * halfKernel[i];
+ float verticalEval = yKernelEvaluations[i];
+ acc += verticalEval * halfKernel[halfKernelSize - i - 1];
}
- for (int i = 0; i < halfKernelSize; ++i) {
- float x = t + i + 0.5f;
+ for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {
if (x < -circleR || x > circleR) {
continue;
}
- float verticalEval = eval_vertically(x, circleR, summedHalfKernelTable, halfKernelSize);
+ float verticalEval = yKernelEvaluations[i + halfKernelSize];
acc += verticalEval * halfKernel[i];
}
// Since we applied a half kernel in y we multiply acc by 2 (the circle is symmetric about the
@@ -201,11 +209,12 @@
}
// This function creates a profile of a blurred circle. It does this by computing a kernel for
-// half the Gaussian and a matching summed area table. To compute a profile value at x = r it steps
-// outward in x from (r, 0) in both directions. There is a step for each direction for each entry
-// in the half kernel. The y contribution at each step is computed from the summed area table using
-// the height of the circle above the step point. Each y contribution is multiplied by the half
-// kernel value corresponding to the step in x.
+// half the Gaussian and a matching summed area table. The summed area table is used to compute
+// an array of vertical applications of the half kernel to the circle along the x axis. The table
+// of y evaluations has 2 * k + n entries where k is the size of the half kernel and n is the size
+// of the profile being computed. Then for each of the n profile entries we walk out k steps in each
+// horizontal direction multiplying the corresponding y evaluation by the half kernel entry and
+// sum these values to compute the profile entry.
static uint8_t* create_profile(float circleR, float sigma) {
float offset;
int numSteps;
@@ -217,13 +226,22 @@
int halfKernelSize = SkScalarCeilToInt(6.0f*sigma);
// round up to next multiple of 2 and then divide by 2
halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;
- SkAutoTArray<float> halfKernel(halfKernelSize);
- SkAutoTArray<float> summedKernel(halfKernelSize);
- make_half_kernel_and_summed_table(halfKernel.get(), summedKernel.get(), halfKernelSize,
- sigma);
+
+ // Number of x steps at which to apply kernel in y to cover all the profile samples in x.
+ int numYSteps = numSteps + 2 * halfKernelSize;
+
+ SkAutoTArray<float> bulkAlloc(halfKernelSize + halfKernelSize + numYSteps);
+ float* halfKernel = bulkAlloc.get();
+ float* summedKernel = bulkAlloc.get() + halfKernelSize;
+ float* yEvals = bulkAlloc.get() + 2 * halfKernelSize;
+ make_half_kernel_and_summed_table(halfKernel, summedKernel, halfKernelSize, sigma);
+
+ float firstX = offset - halfKernelSize + 0.5f;
+ apply_kernel_in_y(yEvals, numYSteps, firstX, circleR, halfKernelSize, summedKernel);
+
for (int i = 0; i < numSteps - 1; ++i) {
- weights[i] = eval_at(offset+i, circleR, halfKernel.get(), summedKernel.get(),
- halfKernelSize);
+ float evalX = offset + i + 0.5f;
+ weights[i] = eval_at(evalX, circleR, halfKernel, halfKernelSize, yEvals + i);
}
// Ensure the tail of the Gaussian goes to zero.
weights[numSteps - 1] = 0;