blob: f0cf4a90ac3d09cf3c50caf37a82c90d4b925d27 [file] [log] [blame]
krajcevski6c354882014-07-22 07:44:00 -07001/*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkTextureCompressor_LATC.h"
krajcevskib5294e82014-07-30 08:34:51 -07009#include "SkTextureCompressor_Blitter.h"
pavel47eedcc2014-10-23 13:18:50 -070010#include "SkTextureCompressor_Utils.h"
krajcevski6c354882014-07-22 07:44:00 -070011
krajcevskib8ccc2f2014-08-07 08:15:14 -070012#include "SkBlitter.h"
krajcevski6c354882014-07-22 07:44:00 -070013#include "SkEndian.h"
14
krajcevskib5294e82014-07-30 08:34:51 -070015// Compression options. In general, the slow version is much more accurate, but
16// much slower. The fast option is much faster, but much less accurate. YMMV.
17#define COMPRESS_LATC_SLOW 0
18#define COMPRESS_LATC_FAST 1
19
20////////////////////////////////////////////////////////////////////////////////
21
krajcevski4ad76e32014-07-31 14:12:50 -070022// Generates an LATC palette. LATC constructs
23// a palette of eight colors from LUM0 and LUM1 using the algorithm:
24//
25// LUM0, if lum0 > lum1 and code(x,y) == 0
26// LUM1, if lum0 > lum1 and code(x,y) == 1
27// (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2
28// (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
29// (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
30// (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
31// (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
32// ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
33//
34// LUM0, if lum0 <= lum1 and code(x,y) == 0
35// LUM1, if lum0 <= lum1 and code(x,y) == 1
36// (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
37// (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
38// (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
39// ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
40// 0, if lum0 <= lum1 and code(x,y) == 6
41// 255, if lum0 <= lum1 and code(x,y) == 7
42
43static const int kLATCPaletteSize = 8;
44static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
45 palette[0] = lum0;
46 palette[1] = lum1;
47 if (lum0 > lum1) {
48 for (int i = 1; i < 7; i++) {
49 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
50 }
51 } else {
52 for (int i = 1; i < 5; i++) {
53 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
54 }
55 palette[6] = 0;
56 palette[7] = 255;
57 }
58}
59
60////////////////////////////////////////////////////////////////////////////////
61
krajcevskib5294e82014-07-30 08:34:51 -070062#if COMPRESS_LATC_SLOW
63
krajcevski6c354882014-07-22 07:44:00 -070064////////////////////////////////////////////////////////////////////////////////
65//
66// Utility Functions
67//
68////////////////////////////////////////////////////////////////////////////////
69
70// Absolute difference between two values. More correct than SkTAbs(a - b)
71// because it works on unsigned values.
72template <typename T> inline T abs_diff(const T &a, const T &b) {
73 return (a > b) ? (a - b) : (b - a);
74}
75
76static bool is_extremal(uint8_t pixel) {
77 return 0 == pixel || 255 == pixel;
78}
79
80typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
81
82// This function is used by both R11 EAC and LATC to compress 4x4 blocks
83// of 8-bit alpha into 64-bit values that comprise the compressed data.
84// For both formats, we need to make sure that the dimensions of the
85// src pixels are divisible by 4, and copy 4x4 blocks one at a time
86// for compression.
87static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
bsalomon98806072014-12-12 15:11:17 -080088 int width, int height, size_t rowBytes,
krajcevski6c354882014-07-22 07:44:00 -070089 A84x4To64BitProc proc) {
90 // Make sure that our data is well-formed enough to be considered for compression
91 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
92 return false;
93 }
94
95 int blocksX = width >> 2;
96 int blocksY = height >> 2;
97
98 uint8_t block[16];
99 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
100 for (int y = 0; y < blocksY; ++y) {
101 for (int x = 0; x < blocksX; ++x) {
102 // Load block
103 for (int k = 0; k < 4; ++k) {
104 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
105 }
106
107 // Compress it
108 *encPtr = proc(block);
109 ++encPtr;
110 }
111 src += 4 * rowBytes;
112 }
113
114 return true;
115}
116
117////////////////////////////////////////////////////////////////////////////////
118//
119// LATC compressor
120//
121////////////////////////////////////////////////////////////////////////////////
122
123// LATC compressed texels down into square 4x4 blocks
krajcevski6c354882014-07-22 07:44:00 -0700124static const int kLATCBlockSize = 4;
125static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
126
krajcevski6c354882014-07-22 07:44:00 -0700127// Compress a block by using the bounding box of the pixels. It is assumed that
128// there are no extremal pixels in this block otherwise we would have used
129// compressBlockBBIgnoreExtremal.
130static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
131 uint8_t minVal = 255;
132 uint8_t maxVal = 0;
133 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
134 minVal = SkTMin(pixels[i], minVal);
135 maxVal = SkTMax(pixels[i], maxVal);
136 }
137
138 SkASSERT(!is_extremal(minVal));
139 SkASSERT(!is_extremal(maxVal));
140
141 uint8_t palette[kLATCPaletteSize];
142 generate_latc_palette(palette, maxVal, minVal);
143
144 uint64_t indices = 0;
145 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
146
147 // Find the best palette index
148 uint8_t bestError = abs_diff(pixels[i], palette[0]);
149 uint8_t idx = 0;
150 for (int j = 1; j < kLATCPaletteSize; ++j) {
151 uint8_t error = abs_diff(pixels[i], palette[j]);
152 if (error < bestError) {
153 bestError = error;
154 idx = j;
155 }
156 }
157
158 indices <<= 3;
159 indices |= idx;
160 }
161
162 return
163 SkEndian_SwapLE64(
164 static_cast<uint64_t>(maxVal) |
165 (static_cast<uint64_t>(minVal) << 8) |
166 (indices << 16));
167}
168
169// Compress a block by using the bounding box of the pixels without taking into
170// account the extremal values. The generated palette will contain extremal values
171// and fewer points along the line segment to interpolate.
172static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
173 uint8_t minVal = 255;
174 uint8_t maxVal = 0;
175 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
176 if (is_extremal(pixels[i])) {
177 continue;
178 }
179
180 minVal = SkTMin(pixels[i], minVal);
181 maxVal = SkTMax(pixels[i], maxVal);
182 }
183
184 SkASSERT(!is_extremal(minVal));
185 SkASSERT(!is_extremal(maxVal));
186
187 uint8_t palette[kLATCPaletteSize];
188 generate_latc_palette(palette, minVal, maxVal);
189
190 uint64_t indices = 0;
191 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
192
193 // Find the best palette index
194 uint8_t idx = 0;
195 if (is_extremal(pixels[i])) {
196 if (0xFF == pixels[i]) {
197 idx = 7;
198 } else if (0 == pixels[i]) {
199 idx = 6;
200 } else {
201 SkFAIL("Pixel is extremal but not really?!");
202 }
203 } else {
204 uint8_t bestError = abs_diff(pixels[i], palette[0]);
205 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
206 uint8_t error = abs_diff(pixels[i], palette[j]);
207 if (error < bestError) {
208 bestError = error;
209 idx = j;
210 }
211 }
212 }
213
214 indices <<= 3;
215 indices |= idx;
216 }
217
218 return
219 SkEndian_SwapLE64(
220 static_cast<uint64_t>(minVal) |
221 (static_cast<uint64_t>(maxVal) << 8) |
222 (indices << 16));
223}
224
225
226// Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
227// values LUM0 and LUM1, and an index into the generated palette. Details of how
228// the palette is generated can be found in the comments of generatePalette above.
229//
230// We choose which palette type to use based on whether or not 'pixels' contains
231// any extremal values (0 or 255). If there are extremal values, then we use the
232// palette that has the extremal values built in. Otherwise, we use the full bounding
233// box.
234
235static uint64_t compress_latc_block(const uint8_t pixels[]) {
236 // Collect unique pixels
237 int nUniquePixels = 0;
238 uint8_t uniquePixels[kLATCPixelsPerBlock];
239 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
240 bool foundPixel = false;
241 for (int j = 0; j < nUniquePixels; ++j) {
242 foundPixel = foundPixel || uniquePixels[j] == pixels[i];
243 }
244
245 if (!foundPixel) {
246 uniquePixels[nUniquePixels] = pixels[i];
247 ++nUniquePixels;
248 }
249 }
250
251 // If there's only one unique pixel, then our compression is easy.
252 if (1 == nUniquePixels) {
253 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
254
255 // Similarly, if there are only two unique pixels, then our compression is
256 // easy again: place the pixels in the block header, and assign the indices
257 // with one or zero depending on which pixel they belong to.
258 } else if (2 == nUniquePixels) {
259 uint64_t outBlock = 0;
260 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
261 int idx = 0;
262 if (pixels[i] == uniquePixels[1]) {
263 idx = 1;
264 }
265
266 outBlock <<= 3;
267 outBlock |= idx;
268 }
269 outBlock <<= 16;
270 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
271 return SkEndian_SwapLE64(outBlock);
272 }
273
274 // Count non-maximal pixel values
275 int nonExtremalPixels = 0;
276 for (int i = 0; i < nUniquePixels; ++i) {
277 if (!is_extremal(uniquePixels[i])) {
278 ++nonExtremalPixels;
279 }
280 }
281
282 // If all the pixels are nonmaximal then compute the palette using
283 // the bounding box of all the pixels.
284 if (nonExtremalPixels == nUniquePixels) {
285 // This is really just for correctness, in all of my tests we
286 // never take this step. We don't lose too much perf here because
287 // most of the processing in this function is worth it for the
288 // 1 == nUniquePixels optimization.
289 return compress_latc_block_bb(pixels);
290 } else {
291 return compress_latc_block_bb_ignore_extremal(pixels);
292 }
293}
294
krajcevskib5294e82014-07-30 08:34:51 -0700295#endif // COMPRESS_LATC_SLOW
296
297////////////////////////////////////////////////////////////////////////////////
298
299#if COMPRESS_LATC_FAST
300
krajcevskia10555a2014-08-11 13:34:22 -0700301// Take the top three bits of each index and pack them into the low 12
krajcevskib5294e82014-07-30 08:34:51 -0700302// bits of the integer.
krajcevskia10555a2014-08-11 13:34:22 -0700303static inline uint32_t pack_index(uint32_t x) {
304 // Pack it in...
305#if defined (SK_CPU_BENDIAN)
306 return
307 (x >> 24) |
308 ((x >> 13) & 0x38) |
309 ((x >> 2) & 0x1C0) |
310 ((x << 9) & 0xE00);
311#else
312 return
313 (x & 0x7) |
314 ((x >> 5) & 0x38) |
315 ((x >> 10) & 0x1C0) |
316 ((x >> 15) & 0xE00);
317#endif
318}
319
320// Converts each 8-bit byte in the integer into an LATC index, and then packs
321// the indices into the low 12 bits of the integer.
krajcevskib5294e82014-07-30 08:34:51 -0700322static inline uint32_t convert_index(uint32_t x) {
323 // Since the palette is
324 // 255, 0, 219, 182, 146, 109, 73, 36
325 // we need to map the high three bits of each byte in the integer
326 // from
327 // 0 1 2 3 4 5 6 7
328 // to
329 // 1 7 6 5 4 3 2 0
330 //
331 // This first operation takes the mapping from
332 // 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0
pavel47eedcc2014-10-23 13:18:50 -0700333 x = 0x07070707 - SkTextureCompressor::ConvertToThreeBitIndex(x);
krajcevskib5294e82014-07-30 08:34:51 -0700334
335 // mask is 1 if index is non-zero
336 const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;
337
338 // add mask:
339 // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
340 x = (x + mask);
341
342 // Handle overflow:
343 // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
344 x |= (x >> 3) & 0x01010101;
345
346 // Mask out high bits:
347 // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
348 x &= 0x07070707;
krajcevskia10555a2014-08-11 13:34:22 -0700349
350 return pack_index(x);
krajcevskib5294e82014-07-30 08:34:51 -0700351}
352
bsalomon98806072014-12-12 15:11:17 -0800353typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, size_t rowBytes);
krajcevskib5294e82014-07-30 08:34:51 -0700354template<PackIndicesProc packIndicesProc>
bsalomon98806072014-12-12 15:11:17 -0800355static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, size_t rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700356 *(reinterpret_cast<uint64_t*>(*dstPtr)) =
357 SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
358 *dstPtr += 8;
359}
360
bsalomon98806072014-12-12 15:11:17 -0800361inline uint64_t PackRowMajor(const uint8_t *indices, size_t rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700362 uint64_t result = 0;
363 for (int i = 0; i < 4; ++i) {
364 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
365 result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
366 }
367 return result;
368}
369
bsalomon98806072014-12-12 15:11:17 -0800370inline uint64_t PackColumnMajor(const uint8_t *indices, size_t rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700371 // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
372 // a LOT faster.
373 uint8_t transposed[16];
374 for (int i = 0; i < 4; ++i) {
375 for (int j = 0; j < 4; ++j) {
376 transposed[j*4+i] = indices[i*rowBytes + j];
377 }
378 }
379
380 return PackRowMajor(transposed, 4);
381}
382
383static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
bsalomon98806072014-12-12 15:11:17 -0800384 int width, int height, size_t rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700385
386 if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
387 return false;
388 }
389
390 uint8_t** dstPtr = &dst;
391 for (int y = 0; y < height; y += 4) {
392 for (int x = 0; x < width; x += 4) {
393 compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
394 }
395 }
396
397 return true;
398}
399
400void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
401 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
402}
403
404#endif // COMPRESS_LATC_FAST
405
krajcevski4ad76e32014-07-31 14:12:50 -0700406void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
407 uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src)));
408 uint8_t lum0 = block & 0xFF;
409 uint8_t lum1 = (block >> 8) & 0xFF;
410
411 uint8_t palette[kLATCPaletteSize];
412 generate_latc_palette(palette, lum0, lum1);
413
414 block >>= 16;
415 for (int j = 0; j < 4; ++j) {
416 for (int i = 0; i < 4; ++i) {
417 dst[i] = palette[block & 0x7];
418 block >>= 3;
419 }
420 dst += dstRowBytes;
421 }
422}
423
krajcevski45a0bf52014-08-07 11:10:22 -0700424// This is the type passed as the CompressorType argument of the compressed
425// blitter for the LATC format. The static functions required to be in this
426// struct are documented in SkTextureCompressor_Blitter.h
427struct CompressorLATC {
428 static inline void CompressA8Vertical(uint8_t* dst, const uint8_t block[]) {
429 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
430 }
431
432 static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
433 int srcRowBytes) {
434 compress_a8_latc_block<PackRowMajor>(&dst, src, srcRowBytes);
435 }
436
krajcevskia10555a2014-08-11 13:34:22 -0700437#if PEDANTIC_BLIT_RECT
438 static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
439 const uint8_t* mask) {
440 // Pack the mask
441 uint64_t cmpMask = 0;
442 for (int i = 0; i < 4; ++i) {
443 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(src + i*srcRowBytes));
444 cmpMask |= static_cast<uint64_t>(pack_index(idx)) << 12*i;
445 }
446 cmpMask = SkEndian_SwapLE64(cmpMask << 16); // avoid header
447
448 uint64_t cmpSrc;
449 uint8_t *cmpSrcPtr = reinterpret_cast<uint8_t*>(&cmpSrc);
450 compress_a8_latc_block<PackRowMajor>(&cmpSrcPtr, src, srcRowBytes);
451
452 // Mask out header
453 cmpSrc = cmpSrc & cmpMask;
454
455 // Read destination encoding
456 uint64_t *cmpDst = reinterpret_cast<uint64_t*>(dst);
457
458 // If the destination is the encoding for a blank block, then we need
459 // to properly set the header
460 if (0 == cmpDst) {
461 *cmpDst = SkTEndian_SwapLE64(0x24924924924900FFULL);
462 }
463
464 // Set the new indices
465 *cmpDst &= ~cmpMask;
466 *cmpDst |= cmpSrc;
krajcevski45a0bf52014-08-07 11:10:22 -0700467 }
krajcevskia10555a2014-08-11 13:34:22 -0700468#endif // PEDANTIC_BLIT_RECT
krajcevski45a0bf52014-08-07 11:10:22 -0700469};
470
krajcevski6c354882014-07-22 07:44:00 -0700471////////////////////////////////////////////////////////////////////////////////
472
473namespace SkTextureCompressor {
474
bsalomon98806072014-12-12 15:11:17 -0800475bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, size_t rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700476#if COMPRESS_LATC_FAST
477 return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
478#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700479 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
krajcevskib5294e82014-07-30 08:34:51 -0700480#else
481#error "Must choose either fast or slow LATC compression"
482#endif
krajcevski6c354882014-07-22 07:44:00 -0700483}
484
krajcevskib8ccc2f2014-08-07 08:15:14 -0700485SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer,
486 SkTBlitterAllocator* allocator) {
487 if ((width % 4) != 0 || (height % 4) != 0) {
488 return NULL;
489 }
490
krajcevskib5294e82014-07-30 08:34:51 -0700491#if COMPRESS_LATC_FAST
krajcevskib8ccc2f2014-08-07 08:15:14 -0700492 // Memset the output buffer to an encoding that decodes to zero. We must do this
493 // in order to avoid having uninitialized values in the buffer if the blitter
494 // decides not to write certain scanlines (and skip entire rows of blocks).
495 // In the case of LATC, if everything is zero, then LUM0 and LUM1 are also zero,
496 // and they will only be non-zero (0xFF) if the index is 7. So bzero will do just fine.
497 // (8 bytes per block) * (w * h / 16 blocks) = w * h / 2
498 sk_bzero(outputBuffer, width * height / 2);
499
500 return allocator->createT<
krajcevski45a0bf52014-08-07 11:10:22 -0700501 SkTCompressedAlphaBlitter<4, 8, CompressorLATC>, int, int, void* >
krajcevskib5294e82014-07-30 08:34:51 -0700502 (width, height, outputBuffer);
503#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700504 // TODO (krajcevski)
505 return NULL;
krajcevskib5294e82014-07-30 08:34:51 -0700506#endif
krajcevski6c354882014-07-22 07:44:00 -0700507}
508
krajcevski4ad76e32014-07-31 14:12:50 -0700509void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
510 for (int j = 0; j < height; j += 4) {
511 for (int i = 0; i < width; i += 4) {
512 decompress_latc_block(dst + i, dstRowBytes, src);
513 src += 8;
514 }
515 dst += 4 * dstRowBytes;
516 }
517}
518
krajcevski6c354882014-07-22 07:44:00 -0700519} // SkTextureCompressor