humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "SkConvolver.h" |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 6 | #include "SkOpts.h" |
caryclark | 368342c | 2016-01-12 10:44:02 -0800 | [diff] [blame] | 7 | #include "SkTArray.h" |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 8 | |
| 9 | namespace { |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 10 | // Stores a list of rows in a circular buffer. The usage is you write into it |
| 11 | // by calling AdvanceRow. It will keep track of which row in the buffer it |
| 12 | // should use next, and the total number of rows added. |
| 13 | class CircularRowBuffer { |
| 14 | public: |
| 15 | // The number of pixels in each row is given in |sourceRowPixelWidth|. |
| 16 | // The maximum number of rows needed in the buffer is |maxYFilterSize| |
| 17 | // (we only need to store enough rows for the biggest filter). |
| 18 | // |
| 19 | // We use the |firstInputRow| to compute the coordinates of all of the |
| 20 | // following rows returned by Advance(). |
| 21 | CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize, |
| 22 | int firstInputRow) |
| 23 | : fRowByteWidth(destRowPixelWidth * 4), |
| 24 | fNumRows(maxYFilterSize), |
| 25 | fNextRow(0), |
| 26 | fNextRowCoordinate(firstInputRow) { |
| 27 | fBuffer.reset(fRowByteWidth * maxYFilterSize); |
| 28 | fRowAddresses.reset(fNumRows); |
| 29 | } |
| 30 | |
| 31 | // Moves to the next row in the buffer, returning a pointer to the beginning |
| 32 | // of it. |
| 33 | unsigned char* advanceRow() { |
| 34 | unsigned char* row = &fBuffer[fNextRow * fRowByteWidth]; |
| 35 | fNextRowCoordinate++; |
| 36 | |
| 37 | // Set the pointer to the next row to use, wrapping around if necessary. |
| 38 | fNextRow++; |
| 39 | if (fNextRow == fNumRows) { |
| 40 | fNextRow = 0; |
| 41 | } |
| 42 | return row; |
| 43 | } |
| 44 | |
| 45 | // Returns a pointer to an "unrolled" array of rows. These rows will start |
| 46 | // at the y coordinate placed into |*firstRowIndex| and will continue in |
| 47 | // order for the maximum number of rows in this circular buffer. |
| 48 | // |
| 49 | // The |firstRowIndex_| may be negative. This means the circular buffer |
| 50 | // starts before the top of the image (it hasn't been filled yet). |
| 51 | unsigned char* const* GetRowAddresses(int* firstRowIndex) { |
| 52 | // Example for a 4-element circular buffer holding coords 6-9. |
| 53 | // Row 0 Coord 8 |
| 54 | // Row 1 Coord 9 |
| 55 | // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10. |
| 56 | // Row 3 Coord 7 |
| 57 | // |
| 58 | // The "next" row is also the first (lowest) coordinate. This computation |
| 59 | // may yield a negative value, but that's OK, the math will work out |
| 60 | // since the user of this buffer will compute the offset relative |
| 61 | // to the firstRowIndex and the negative rows will never be used. |
| 62 | *firstRowIndex = fNextRowCoordinate - fNumRows; |
| 63 | |
| 64 | int curRow = fNextRow; |
| 65 | for (int i = 0; i < fNumRows; i++) { |
| 66 | fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth]; |
| 67 | |
| 68 | // Advance to the next row, wrapping if necessary. |
| 69 | curRow++; |
| 70 | if (curRow == fNumRows) { |
| 71 | curRow = 0; |
| 72 | } |
| 73 | } |
| 74 | return &fRowAddresses[0]; |
| 75 | } |
| 76 | |
| 77 | private: |
| 78 | // The buffer storing the rows. They are packed, each one fRowByteWidth. |
| 79 | SkTArray<unsigned char> fBuffer; |
| 80 | |
| 81 | // Number of bytes per row in the |buffer|. |
| 82 | int fRowByteWidth; |
| 83 | |
| 84 | // The number of rows available in the buffer. |
| 85 | int fNumRows; |
| 86 | |
| 87 | // The next row index we should write into. This wraps around as the |
| 88 | // circular buffer is used. |
| 89 | int fNextRow; |
| 90 | |
| 91 | // The y coordinate of the |fNextRow|. This is incremented each time a |
| 92 | // new row is appended and does not wrap. |
| 93 | int fNextRowCoordinate; |
| 94 | |
| 95 | // Buffer used by GetRowAddresses(). |
| 96 | SkTArray<unsigned char*> fRowAddresses; |
| 97 | }; |
| 98 | |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 99 | } // namespace |
| 100 | |
| 101 | // SkConvolutionFilter1D --------------------------------------------------------- |
| 102 | |
| 103 | SkConvolutionFilter1D::SkConvolutionFilter1D() |
| 104 | : fMaxFilter(0) { |
| 105 | } |
| 106 | |
| 107 | SkConvolutionFilter1D::~SkConvolutionFilter1D() { |
| 108 | } |
| 109 | |
| 110 | void SkConvolutionFilter1D::AddFilter(int filterOffset, |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 111 | const ConvolutionFixed* filterValues, |
| 112 | int filterLength) { |
| 113 | // It is common for leading/trailing filter values to be zeros. In such |
| 114 | // cases it is beneficial to only store the central factors. |
| 115 | // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on |
| 116 | // a 1080p image this optimization gives a ~10% speed improvement. |
| 117 | int filterSize = filterLength; |
| 118 | int firstNonZero = 0; |
| 119 | while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) { |
| 120 | firstNonZero++; |
| 121 | } |
| 122 | |
| 123 | if (firstNonZero < filterLength) { |
| 124 | // Here we have at least one non-zero factor. |
| 125 | int lastNonZero = filterLength - 1; |
| 126 | while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) { |
| 127 | lastNonZero--; |
| 128 | } |
| 129 | |
| 130 | filterOffset += firstNonZero; |
| 131 | filterLength = lastNonZero + 1 - firstNonZero; |
| 132 | SkASSERT(filterLength > 0); |
| 133 | |
caryclark | 368342c | 2016-01-12 10:44:02 -0800 | [diff] [blame] | 134 | fFilterValues.append(filterLength, &filterValues[firstNonZero]); |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 135 | } else { |
| 136 | // Here all the factors were zeroes. |
| 137 | filterLength = 0; |
| 138 | } |
| 139 | |
| 140 | FilterInstance instance; |
| 141 | |
| 142 | // We pushed filterLength elements onto fFilterValues |
| 143 | instance.fDataLocation = (static_cast<int>(fFilterValues.count()) - |
| 144 | filterLength); |
| 145 | instance.fOffset = filterOffset; |
| 146 | instance.fTrimmedLength = filterLength; |
| 147 | instance.fLength = filterSize; |
caryclark | 368342c | 2016-01-12 10:44:02 -0800 | [diff] [blame] | 148 | fFilters.push(instance); |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 149 | |
| 150 | fMaxFilter = SkTMax(fMaxFilter, filterLength); |
| 151 | } |
| 152 | |
| 153 | const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter( |
| 154 | int* specifiedFilterlength, |
| 155 | int* filterOffset, |
| 156 | int* filterLength) const { |
| 157 | const FilterInstance& filter = fFilters[0]; |
| 158 | *filterOffset = filter.fOffset; |
| 159 | *filterLength = filter.fTrimmedLength; |
| 160 | *specifiedFilterlength = filter.fLength; |
| 161 | if (filter.fTrimmedLength == 0) { |
halcanary | 96fcdcc | 2015-08-27 07:41:13 -0700 | [diff] [blame] | 162 | return nullptr; |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 163 | } |
| 164 | |
| 165 | return &fFilterValues[filter.fDataLocation]; |
| 166 | } |
| 167 | |
reed | d1d4460 | 2015-10-01 11:21:57 -0700 | [diff] [blame] | 168 | bool BGRAConvolve2D(const unsigned char* sourceData, |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 169 | int sourceByteRowStride, |
| 170 | bool sourceHasAlpha, |
| 171 | const SkConvolutionFilter1D& filterX, |
| 172 | const SkConvolutionFilter1D& filterY, |
| 173 | int outputByteRowStride, |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 174 | unsigned char* output) { |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 175 | |
| 176 | int maxYFilterSize = filterY.maxFilter(); |
| 177 | |
| 178 | // The next row in the input that we will generate a horizontally |
| 179 | // convolved row for. If the filter doesn't start at the beginning of the |
| 180 | // image (this is the case when we are only resizing a subset), then we |
| 181 | // don't want to generate any output rows before that. Compute the starting |
| 182 | // row for convolution as the first pixel for the first vertical filter. |
| 183 | int filterOffset, filterLength; |
| 184 | const SkConvolutionFilter1D::ConvolutionFixed* filterValues = |
| 185 | filterY.FilterForValue(0, &filterOffset, &filterLength); |
| 186 | int nextXRow = filterOffset; |
| 187 | |
| 188 | // We loop over each row in the input doing a horizontal convolution. This |
| 189 | // will result in a horizontally convolved image. We write the results into |
| 190 | // a circular buffer of convolved rows and do vertical convolution as rows |
| 191 | // are available. This prevents us from having to store the entire |
| 192 | // intermediate image and helps cache coherency. |
| 193 | // We will need four extra rows to allow horizontal convolution could be done |
| 194 | // simultaneously. We also pad each row in row buffer to be aligned-up to |
xiangze.zhang | 4adac2e | 2016-12-07 17:54:04 -0800 | [diff] [blame] | 195 | // 32 bytes. |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 196 | // TODO(jiesun): We do not use aligned load from row buffer in vertical |
| 197 | // convolution pass yet. Somehow Windows does not like it. |
xiangze.zhang | 4adac2e | 2016-12-07 17:54:04 -0800 | [diff] [blame] | 198 | int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F; |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 199 | int rowBufferHeight = maxYFilterSize + |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 200 | (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0); |
reed | d1d4460 | 2015-10-01 11:21:57 -0700 | [diff] [blame] | 201 | |
| 202 | // check for too-big allocation requests : crbug.com/528628 |
| 203 | { |
| 204 | int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); |
| 205 | // need some limit, to avoid over-committing success from malloc, but then |
| 206 | // crashing when we try to actually use the memory. |
| 207 | // 100meg seems big enough to allow "normal" zoom factors and image sizes through |
| 208 | // while avoiding the crash seen by the bug (crbug.com/528628) |
| 209 | if (size > 100 * 1024 * 1024) { |
| 210 | // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); |
| 211 | return false; |
| 212 | } |
| 213 | } |
| 214 | |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 215 | CircularRowBuffer rowBuffer(rowBufferWidth, |
| 216 | rowBufferHeight, |
| 217 | filterOffset); |
| 218 | |
| 219 | // Loop over every possible output row, processing just enough horizontal |
| 220 | // convolutions to run each subsequent vertical convolution. |
| 221 | SkASSERT(outputByteRowStride >= filterX.numValues() * 4); |
| 222 | int numOutputRows = filterY.numValues(); |
| 223 | |
| 224 | // We need to check which is the last line to convolve before we advance 4 |
| 225 | // lines in one iteration. |
| 226 | int lastFilterOffset, lastFilterLength; |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 227 | filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, |
| 228 | &lastFilterLength); |
| 229 | |
| 230 | for (int outY = 0; outY < numOutputRows; outY++) { |
| 231 | filterValues = filterY.FilterForValue(outY, |
| 232 | &filterOffset, &filterLength); |
| 233 | |
| 234 | // Generate output rows until we have enough to run the current filter. |
| 235 | while (nextXRow < filterOffset + filterLength) { |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 236 | if (SkOpts::convolve_4_rows_horizontally != nullptr && |
xiangze.zhang | 3f32135 | 2016-11-10 05:44:32 -0800 | [diff] [blame] | 237 | nextXRow + 3 < lastFilterOffset + lastFilterLength) { |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 238 | const unsigned char* src[4]; |
| 239 | unsigned char* outRow[4]; |
| 240 | for (int i = 0; i < 4; ++i) { |
sugoi | 35fcd15 | 2014-06-11 06:31:29 -0700 | [diff] [blame] | 241 | src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride]; |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 242 | outRow[i] = rowBuffer.advanceRow(); |
| 243 | } |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 244 | SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*rowBufferWidth); |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 245 | nextXRow += 4; |
| 246 | } else { |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 247 | SkOpts::convolve_horizontally( |
sugoi | 35fcd15 | 2014-06-11 06:31:29 -0700 | [diff] [blame] | 248 | &sourceData[(uint64_t)nextXRow * sourceByteRowStride], |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 249 | filterX, rowBuffer.advanceRow(), sourceHasAlpha); |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 250 | nextXRow++; |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | // Compute where in the output image this row of final data will go. |
sugoi | c197c8a | 2014-07-03 10:44:26 -0700 | [diff] [blame] | 255 | unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride]; |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 256 | |
| 257 | // Get the list of rows that the circular buffer has, in order. |
| 258 | int firstRowInCircularBuffer; |
| 259 | unsigned char* const* rowsToConvolve = |
| 260 | rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); |
| 261 | |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 262 | // Now compute the start of the subset of those rows that the filter needs. |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 263 | unsigned char* const* firstRowForFilter = |
| 264 | &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; |
| 265 | |
xiangze.zhang | d2265e5 | 2016-11-17 18:39:38 -0800 | [diff] [blame] | 266 | SkOpts::convolve_vertically(filterValues, filterLength, |
| 267 | firstRowForFilter, |
| 268 | filterX.numValues(), curOutputRow, |
| 269 | sourceHasAlpha); |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 270 | } |
reed | d1d4460 | 2015-10-01 11:21:57 -0700 | [diff] [blame] | 271 | return true; |
humper@google.com | 138ebc3 | 2013-07-19 20:20:04 +0000 | [diff] [blame] | 272 | } |