blob: 9d42b4f71dcdeff40544ac7f9ae907bf407d3b0a [file] [log] [blame]
krajcevski6c354882014-07-22 07:44:00 -07001/*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkTextureCompressor_LATC.h"
krajcevskib5294e82014-07-30 08:34:51 -07009#include "SkTextureCompressor_Blitter.h"
krajcevski6c354882014-07-22 07:44:00 -070010
11#include "SkEndian.h"
12
krajcevskib5294e82014-07-30 08:34:51 -070013// Compression options. In general, the slow version is much more accurate, but
14// much slower. The fast option is much faster, but much less accurate. YMMV.
15#define COMPRESS_LATC_SLOW 0
16#define COMPRESS_LATC_FAST 1
17
18////////////////////////////////////////////////////////////////////////////////
19
20#if COMPRESS_LATC_SLOW
21
krajcevski6c354882014-07-22 07:44:00 -070022////////////////////////////////////////////////////////////////////////////////
23//
24// Utility Functions
25//
26////////////////////////////////////////////////////////////////////////////////
27
28// Absolute difference between two values. More correct than SkTAbs(a - b)
29// because it works on unsigned values.
30template <typename T> inline T abs_diff(const T &a, const T &b) {
31 return (a > b) ? (a - b) : (b - a);
32}
33
34static bool is_extremal(uint8_t pixel) {
35 return 0 == pixel || 255 == pixel;
36}
37
38typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
39
40// This function is used by both R11 EAC and LATC to compress 4x4 blocks
41// of 8-bit alpha into 64-bit values that comprise the compressed data.
42// For both formats, we need to make sure that the dimensions of the
43// src pixels are divisible by 4, and copy 4x4 blocks one at a time
44// for compression.
45static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
46 int width, int height, int rowBytes,
47 A84x4To64BitProc proc) {
48 // Make sure that our data is well-formed enough to be considered for compression
49 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
50 return false;
51 }
52
53 int blocksX = width >> 2;
54 int blocksY = height >> 2;
55
56 uint8_t block[16];
57 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
58 for (int y = 0; y < blocksY; ++y) {
59 for (int x = 0; x < blocksX; ++x) {
60 // Load block
61 for (int k = 0; k < 4; ++k) {
62 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
63 }
64
65 // Compress it
66 *encPtr = proc(block);
67 ++encPtr;
68 }
69 src += 4 * rowBytes;
70 }
71
72 return true;
73}
74
75////////////////////////////////////////////////////////////////////////////////
76//
77// LATC compressor
78//
79////////////////////////////////////////////////////////////////////////////////
80
81// LATC compressed texels down into square 4x4 blocks
82static const int kLATCPaletteSize = 8;
83static const int kLATCBlockSize = 4;
84static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
85
86// Generates an LATC palette. LATC constructs
87// a palette of eight colors from LUM0 and LUM1 using the algorithm:
88//
89// LUM0, if lum0 > lum1 and code(x,y) == 0
90// LUM1, if lum0 > lum1 and code(x,y) == 1
91// (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2
92// (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
93// (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
94// (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
95// (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
96// ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
97//
98// LUM0, if lum0 <= lum1 and code(x,y) == 0
99// LUM1, if lum0 <= lum1 and code(x,y) == 1
100// (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
101// (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
102// (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
103// ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
104// 0, if lum0 <= lum1 and code(x,y) == 6
105// 255, if lum0 <= lum1 and code(x,y) == 7
106
107static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
108 palette[0] = lum0;
109 palette[1] = lum1;
110 if (lum0 > lum1) {
111 for (int i = 1; i < 7; i++) {
112 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
113 }
114 } else {
115 for (int i = 1; i < 5; i++) {
116 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
117 }
118 palette[6] = 0;
119 palette[7] = 255;
120 }
121}
122
123// Compress a block by using the bounding box of the pixels. It is assumed that
124// there are no extremal pixels in this block otherwise we would have used
125// compressBlockBBIgnoreExtremal.
126static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
127 uint8_t minVal = 255;
128 uint8_t maxVal = 0;
129 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
130 minVal = SkTMin(pixels[i], minVal);
131 maxVal = SkTMax(pixels[i], maxVal);
132 }
133
134 SkASSERT(!is_extremal(minVal));
135 SkASSERT(!is_extremal(maxVal));
136
137 uint8_t palette[kLATCPaletteSize];
138 generate_latc_palette(palette, maxVal, minVal);
139
140 uint64_t indices = 0;
141 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
142
143 // Find the best palette index
144 uint8_t bestError = abs_diff(pixels[i], palette[0]);
145 uint8_t idx = 0;
146 for (int j = 1; j < kLATCPaletteSize; ++j) {
147 uint8_t error = abs_diff(pixels[i], palette[j]);
148 if (error < bestError) {
149 bestError = error;
150 idx = j;
151 }
152 }
153
154 indices <<= 3;
155 indices |= idx;
156 }
157
158 return
159 SkEndian_SwapLE64(
160 static_cast<uint64_t>(maxVal) |
161 (static_cast<uint64_t>(minVal) << 8) |
162 (indices << 16));
163}
164
165// Compress a block by using the bounding box of the pixels without taking into
166// account the extremal values. The generated palette will contain extremal values
167// and fewer points along the line segment to interpolate.
168static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
169 uint8_t minVal = 255;
170 uint8_t maxVal = 0;
171 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
172 if (is_extremal(pixels[i])) {
173 continue;
174 }
175
176 minVal = SkTMin(pixels[i], minVal);
177 maxVal = SkTMax(pixels[i], maxVal);
178 }
179
180 SkASSERT(!is_extremal(minVal));
181 SkASSERT(!is_extremal(maxVal));
182
183 uint8_t palette[kLATCPaletteSize];
184 generate_latc_palette(palette, minVal, maxVal);
185
186 uint64_t indices = 0;
187 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
188
189 // Find the best palette index
190 uint8_t idx = 0;
191 if (is_extremal(pixels[i])) {
192 if (0xFF == pixels[i]) {
193 idx = 7;
194 } else if (0 == pixels[i]) {
195 idx = 6;
196 } else {
197 SkFAIL("Pixel is extremal but not really?!");
198 }
199 } else {
200 uint8_t bestError = abs_diff(pixels[i], palette[0]);
201 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
202 uint8_t error = abs_diff(pixels[i], palette[j]);
203 if (error < bestError) {
204 bestError = error;
205 idx = j;
206 }
207 }
208 }
209
210 indices <<= 3;
211 indices |= idx;
212 }
213
214 return
215 SkEndian_SwapLE64(
216 static_cast<uint64_t>(minVal) |
217 (static_cast<uint64_t>(maxVal) << 8) |
218 (indices << 16));
219}
220
221
222// Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
223// values LUM0 and LUM1, and an index into the generated palette. Details of how
224// the palette is generated can be found in the comments of generatePalette above.
225//
226// We choose which palette type to use based on whether or not 'pixels' contains
227// any extremal values (0 or 255). If there are extremal values, then we use the
228// palette that has the extremal values built in. Otherwise, we use the full bounding
229// box.
230
231static uint64_t compress_latc_block(const uint8_t pixels[]) {
232 // Collect unique pixels
233 int nUniquePixels = 0;
234 uint8_t uniquePixels[kLATCPixelsPerBlock];
235 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
236 bool foundPixel = false;
237 for (int j = 0; j < nUniquePixels; ++j) {
238 foundPixel = foundPixel || uniquePixels[j] == pixels[i];
239 }
240
241 if (!foundPixel) {
242 uniquePixels[nUniquePixels] = pixels[i];
243 ++nUniquePixels;
244 }
245 }
246
247 // If there's only one unique pixel, then our compression is easy.
248 if (1 == nUniquePixels) {
249 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
250
251 // Similarly, if there are only two unique pixels, then our compression is
252 // easy again: place the pixels in the block header, and assign the indices
253 // with one or zero depending on which pixel they belong to.
254 } else if (2 == nUniquePixels) {
255 uint64_t outBlock = 0;
256 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
257 int idx = 0;
258 if (pixels[i] == uniquePixels[1]) {
259 idx = 1;
260 }
261
262 outBlock <<= 3;
263 outBlock |= idx;
264 }
265 outBlock <<= 16;
266 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
267 return SkEndian_SwapLE64(outBlock);
268 }
269
270 // Count non-maximal pixel values
271 int nonExtremalPixels = 0;
272 for (int i = 0; i < nUniquePixels; ++i) {
273 if (!is_extremal(uniquePixels[i])) {
274 ++nonExtremalPixels;
275 }
276 }
277
278 // If all the pixels are nonmaximal then compute the palette using
279 // the bounding box of all the pixels.
280 if (nonExtremalPixels == nUniquePixels) {
281 // This is really just for correctness, in all of my tests we
282 // never take this step. We don't lose too much perf here because
283 // most of the processing in this function is worth it for the
284 // 1 == nUniquePixels optimization.
285 return compress_latc_block_bb(pixels);
286 } else {
287 return compress_latc_block_bb_ignore_extremal(pixels);
288 }
289}
290
krajcevskib5294e82014-07-30 08:34:51 -0700291#endif // COMPRESS_LATC_SLOW
292
293////////////////////////////////////////////////////////////////////////////////
294
295#if COMPRESS_LATC_FAST
296
297// Take the top three indices of each int and pack them into the low 12
298// bits of the integer.
299static inline uint32_t convert_index(uint32_t x) {
300 // Since the palette is
301 // 255, 0, 219, 182, 146, 109, 73, 36
302 // we need to map the high three bits of each byte in the integer
303 // from
304 // 0 1 2 3 4 5 6 7
305 // to
306 // 1 7 6 5 4 3 2 0
307 //
308 // This first operation takes the mapping from
309 // 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0
310 x = 0x07070707 - ((x >> 5) & 0x07070707);
311
312 // mask is 1 if index is non-zero
313 const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;
314
315 // add mask:
316 // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
317 x = (x + mask);
318
319 // Handle overflow:
320 // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
321 x |= (x >> 3) & 0x01010101;
322
323 // Mask out high bits:
324 // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
325 x &= 0x07070707;
326
327 // Pack it in...
328#if defined (SK_CPU_BENDIAN)
329 return
330 (x >> 24) |
331 ((x >> 13) & 0x38) |
332 ((x >> 2) & 0x1C0) |
333 ((x << 9) & 0xE00);
334#else
335 return
336 (x & 0x7) |
337 ((x >> 5) & 0x38) |
338 ((x >> 10) & 0x1C0) |
339 ((x >> 15) & 0xE00);
340#endif
341}
342
343typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes);
344template<PackIndicesProc packIndicesProc>
345static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) {
346 *(reinterpret_cast<uint64_t*>(*dstPtr)) =
347 SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
348 *dstPtr += 8;
349}
350
351inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) {
352 uint64_t result = 0;
353 for (int i = 0; i < 4; ++i) {
354 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
355 result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
356 }
357 return result;
358}
359
360inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) {
361 // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
362 // a LOT faster.
363 uint8_t transposed[16];
364 for (int i = 0; i < 4; ++i) {
365 for (int j = 0; j < 4; ++j) {
366 transposed[j*4+i] = indices[i*rowBytes + j];
367 }
368 }
369
370 return PackRowMajor(transposed, 4);
371}
372
373static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
374 int width, int height, int rowBytes) {
375
376 if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
377 return false;
378 }
379
380 uint8_t** dstPtr = &dst;
381 for (int y = 0; y < height; y += 4) {
382 for (int x = 0; x < width; x += 4) {
383 compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
384 }
385 }
386
387 return true;
388}
389
390void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
391 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
392}
393
394#endif // COMPRESS_LATC_FAST
395
krajcevski6c354882014-07-22 07:44:00 -0700396////////////////////////////////////////////////////////////////////////////////
397
398namespace SkTextureCompressor {
399
400bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700401#if COMPRESS_LATC_FAST
402 return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
403#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700404 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
krajcevskib5294e82014-07-30 08:34:51 -0700405#else
406#error "Must choose either fast or slow LATC compression"
407#endif
krajcevski6c354882014-07-22 07:44:00 -0700408}
409
410SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) {
krajcevskib5294e82014-07-30 08:34:51 -0700411#if COMPRESS_LATC_FAST
412 return new
413 SkTCompressedAlphaBlitter<4, 8, CompressA8LATCBlockVertical>
414 (width, height, outputBuffer);
415#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700416 // TODO (krajcevski)
417 return NULL;
krajcevskib5294e82014-07-30 08:34:51 -0700418#endif
krajcevski6c354882014-07-22 07:44:00 -0700419}
420
421} // SkTextureCompressor