blob: 1e5e142ddb8d9911eb5de3f419b8f379f978a70f [file] [log] [blame]
krajcevski6c354882014-07-22 07:44:00 -07001/*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkTextureCompressor_LATC.h"
krajcevskib5294e82014-07-30 08:34:51 -07009#include "SkTextureCompressor_Blitter.h"
krajcevski6c354882014-07-22 07:44:00 -070010
krajcevskib8ccc2f2014-08-07 08:15:14 -070011#include "SkBlitter.h"
krajcevski6c354882014-07-22 07:44:00 -070012#include "SkEndian.h"
13
krajcevskib5294e82014-07-30 08:34:51 -070014// Compression options. In general, the slow version is much more accurate, but
15// much slower. The fast option is much faster, but much less accurate. YMMV.
16#define COMPRESS_LATC_SLOW 0
17#define COMPRESS_LATC_FAST 1
18
19////////////////////////////////////////////////////////////////////////////////
20
krajcevski4ad76e32014-07-31 14:12:50 -070021// Generates an LATC palette. LATC constructs
22// a palette of eight colors from LUM0 and LUM1 using the algorithm:
23//
24// LUM0, if lum0 > lum1 and code(x,y) == 0
25// LUM1, if lum0 > lum1 and code(x,y) == 1
26// (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2
27// (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
28// (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
29// (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
30// (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
31// ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
32//
33// LUM0, if lum0 <= lum1 and code(x,y) == 0
34// LUM1, if lum0 <= lum1 and code(x,y) == 1
35// (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
36// (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
37// (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
38// ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
39// 0, if lum0 <= lum1 and code(x,y) == 6
40// 255, if lum0 <= lum1 and code(x,y) == 7
41
42static const int kLATCPaletteSize = 8;
43static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
44 palette[0] = lum0;
45 palette[1] = lum1;
46 if (lum0 > lum1) {
47 for (int i = 1; i < 7; i++) {
48 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
49 }
50 } else {
51 for (int i = 1; i < 5; i++) {
52 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
53 }
54 palette[6] = 0;
55 palette[7] = 255;
56 }
57}
58
59////////////////////////////////////////////////////////////////////////////////
60
krajcevskib5294e82014-07-30 08:34:51 -070061#if COMPRESS_LATC_SLOW
62
krajcevski6c354882014-07-22 07:44:00 -070063////////////////////////////////////////////////////////////////////////////////
64//
65// Utility Functions
66//
67////////////////////////////////////////////////////////////////////////////////
68
69// Absolute difference between two values. More correct than SkTAbs(a - b)
70// because it works on unsigned values.
71template <typename T> inline T abs_diff(const T &a, const T &b) {
72 return (a > b) ? (a - b) : (b - a);
73}
74
75static bool is_extremal(uint8_t pixel) {
76 return 0 == pixel || 255 == pixel;
77}
78
79typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
80
81// This function is used by both R11 EAC and LATC to compress 4x4 blocks
82// of 8-bit alpha into 64-bit values that comprise the compressed data.
83// For both formats, we need to make sure that the dimensions of the
84// src pixels are divisible by 4, and copy 4x4 blocks one at a time
85// for compression.
86static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
87 int width, int height, int rowBytes,
88 A84x4To64BitProc proc) {
89 // Make sure that our data is well-formed enough to be considered for compression
90 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
91 return false;
92 }
93
94 int blocksX = width >> 2;
95 int blocksY = height >> 2;
96
97 uint8_t block[16];
98 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
99 for (int y = 0; y < blocksY; ++y) {
100 for (int x = 0; x < blocksX; ++x) {
101 // Load block
102 for (int k = 0; k < 4; ++k) {
103 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
104 }
105
106 // Compress it
107 *encPtr = proc(block);
108 ++encPtr;
109 }
110 src += 4 * rowBytes;
111 }
112
113 return true;
114}
115
116////////////////////////////////////////////////////////////////////////////////
117//
118// LATC compressor
119//
120////////////////////////////////////////////////////////////////////////////////
121
122// LATC compressed texels down into square 4x4 blocks
krajcevski6c354882014-07-22 07:44:00 -0700123static const int kLATCBlockSize = 4;
124static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
125
krajcevski6c354882014-07-22 07:44:00 -0700126// Compress a block by using the bounding box of the pixels. It is assumed that
127// there are no extremal pixels in this block otherwise we would have used
128// compressBlockBBIgnoreExtremal.
129static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
130 uint8_t minVal = 255;
131 uint8_t maxVal = 0;
132 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
133 minVal = SkTMin(pixels[i], minVal);
134 maxVal = SkTMax(pixels[i], maxVal);
135 }
136
137 SkASSERT(!is_extremal(minVal));
138 SkASSERT(!is_extremal(maxVal));
139
140 uint8_t palette[kLATCPaletteSize];
141 generate_latc_palette(palette, maxVal, minVal);
142
143 uint64_t indices = 0;
144 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
145
146 // Find the best palette index
147 uint8_t bestError = abs_diff(pixels[i], palette[0]);
148 uint8_t idx = 0;
149 for (int j = 1; j < kLATCPaletteSize; ++j) {
150 uint8_t error = abs_diff(pixels[i], palette[j]);
151 if (error < bestError) {
152 bestError = error;
153 idx = j;
154 }
155 }
156
157 indices <<= 3;
158 indices |= idx;
159 }
160
161 return
162 SkEndian_SwapLE64(
163 static_cast<uint64_t>(maxVal) |
164 (static_cast<uint64_t>(minVal) << 8) |
165 (indices << 16));
166}
167
168// Compress a block by using the bounding box of the pixels without taking into
169// account the extremal values. The generated palette will contain extremal values
170// and fewer points along the line segment to interpolate.
171static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
172 uint8_t minVal = 255;
173 uint8_t maxVal = 0;
174 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
175 if (is_extremal(pixels[i])) {
176 continue;
177 }
178
179 minVal = SkTMin(pixels[i], minVal);
180 maxVal = SkTMax(pixels[i], maxVal);
181 }
182
183 SkASSERT(!is_extremal(minVal));
184 SkASSERT(!is_extremal(maxVal));
185
186 uint8_t palette[kLATCPaletteSize];
187 generate_latc_palette(palette, minVal, maxVal);
188
189 uint64_t indices = 0;
190 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
191
192 // Find the best palette index
193 uint8_t idx = 0;
194 if (is_extremal(pixels[i])) {
195 if (0xFF == pixels[i]) {
196 idx = 7;
197 } else if (0 == pixels[i]) {
198 idx = 6;
199 } else {
200 SkFAIL("Pixel is extremal but not really?!");
201 }
202 } else {
203 uint8_t bestError = abs_diff(pixels[i], palette[0]);
204 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
205 uint8_t error = abs_diff(pixels[i], palette[j]);
206 if (error < bestError) {
207 bestError = error;
208 idx = j;
209 }
210 }
211 }
212
213 indices <<= 3;
214 indices |= idx;
215 }
216
217 return
218 SkEndian_SwapLE64(
219 static_cast<uint64_t>(minVal) |
220 (static_cast<uint64_t>(maxVal) << 8) |
221 (indices << 16));
222}
223
224
225// Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
226// values LUM0 and LUM1, and an index into the generated palette. Details of how
227// the palette is generated can be found in the comments of generatePalette above.
228//
229// We choose which palette type to use based on whether or not 'pixels' contains
230// any extremal values (0 or 255). If there are extremal values, then we use the
231// palette that has the extremal values built in. Otherwise, we use the full bounding
232// box.
233
234static uint64_t compress_latc_block(const uint8_t pixels[]) {
235 // Collect unique pixels
236 int nUniquePixels = 0;
237 uint8_t uniquePixels[kLATCPixelsPerBlock];
238 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
239 bool foundPixel = false;
240 for (int j = 0; j < nUniquePixels; ++j) {
241 foundPixel = foundPixel || uniquePixels[j] == pixels[i];
242 }
243
244 if (!foundPixel) {
245 uniquePixels[nUniquePixels] = pixels[i];
246 ++nUniquePixels;
247 }
248 }
249
250 // If there's only one unique pixel, then our compression is easy.
251 if (1 == nUniquePixels) {
252 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
253
254 // Similarly, if there are only two unique pixels, then our compression is
255 // easy again: place the pixels in the block header, and assign the indices
256 // with one or zero depending on which pixel they belong to.
257 } else if (2 == nUniquePixels) {
258 uint64_t outBlock = 0;
259 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
260 int idx = 0;
261 if (pixels[i] == uniquePixels[1]) {
262 idx = 1;
263 }
264
265 outBlock <<= 3;
266 outBlock |= idx;
267 }
268 outBlock <<= 16;
269 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
270 return SkEndian_SwapLE64(outBlock);
271 }
272
273 // Count non-maximal pixel values
274 int nonExtremalPixels = 0;
275 for (int i = 0; i < nUniquePixels; ++i) {
276 if (!is_extremal(uniquePixels[i])) {
277 ++nonExtremalPixels;
278 }
279 }
280
281 // If all the pixels are nonmaximal then compute the palette using
282 // the bounding box of all the pixels.
283 if (nonExtremalPixels == nUniquePixels) {
284 // This is really just for correctness, in all of my tests we
285 // never take this step. We don't lose too much perf here because
286 // most of the processing in this function is worth it for the
287 // 1 == nUniquePixels optimization.
288 return compress_latc_block_bb(pixels);
289 } else {
290 return compress_latc_block_bb_ignore_extremal(pixels);
291 }
292}
293
krajcevskib5294e82014-07-30 08:34:51 -0700294#endif // COMPRESS_LATC_SLOW
295
296////////////////////////////////////////////////////////////////////////////////
297
298#if COMPRESS_LATC_FAST
299
krajcevskia10555a2014-08-11 13:34:22 -0700300// Take the top three bits of each index and pack them into the low 12
krajcevskib5294e82014-07-30 08:34:51 -0700301// bits of the integer.
krajcevskia10555a2014-08-11 13:34:22 -0700302static inline uint32_t pack_index(uint32_t x) {
303 // Pack it in...
304#if defined (SK_CPU_BENDIAN)
305 return
306 (x >> 24) |
307 ((x >> 13) & 0x38) |
308 ((x >> 2) & 0x1C0) |
309 ((x << 9) & 0xE00);
310#else
311 return
312 (x & 0x7) |
313 ((x >> 5) & 0x38) |
314 ((x >> 10) & 0x1C0) |
315 ((x >> 15) & 0xE00);
316#endif
317}
318
319// Converts each 8-bit byte in the integer into an LATC index, and then packs
320// the indices into the low 12 bits of the integer.
krajcevskib5294e82014-07-30 08:34:51 -0700321static inline uint32_t convert_index(uint32_t x) {
322 // Since the palette is
323 // 255, 0, 219, 182, 146, 109, 73, 36
324 // we need to map the high three bits of each byte in the integer
325 // from
326 // 0 1 2 3 4 5 6 7
327 // to
328 // 1 7 6 5 4 3 2 0
329 //
330 // This first operation takes the mapping from
331 // 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0
332 x = 0x07070707 - ((x >> 5) & 0x07070707);
333
334 // mask is 1 if index is non-zero
335 const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;
336
337 // add mask:
338 // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
339 x = (x + mask);
340
341 // Handle overflow:
342 // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
343 x |= (x >> 3) & 0x01010101;
344
345 // Mask out high bits:
346 // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
347 x &= 0x07070707;
krajcevskia10555a2014-08-11 13:34:22 -0700348
349 return pack_index(x);
krajcevskib5294e82014-07-30 08:34:51 -0700350}
351
352typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes);
353template<PackIndicesProc packIndicesProc>
354static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) {
355 *(reinterpret_cast<uint64_t*>(*dstPtr)) =
356 SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
357 *dstPtr += 8;
358}
359
360inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) {
361 uint64_t result = 0;
362 for (int i = 0; i < 4; ++i) {
363 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
364 result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
365 }
366 return result;
367}
368
369inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) {
370 // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
371 // a LOT faster.
372 uint8_t transposed[16];
373 for (int i = 0; i < 4; ++i) {
374 for (int j = 0; j < 4; ++j) {
375 transposed[j*4+i] = indices[i*rowBytes + j];
376 }
377 }
378
379 return PackRowMajor(transposed, 4);
380}
381
382static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
383 int width, int height, int rowBytes) {
384
385 if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
386 return false;
387 }
388
389 uint8_t** dstPtr = &dst;
390 for (int y = 0; y < height; y += 4) {
391 for (int x = 0; x < width; x += 4) {
392 compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
393 }
394 }
395
396 return true;
397}
398
399void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
400 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
401}
402
403#endif // COMPRESS_LATC_FAST
404
krajcevski4ad76e32014-07-31 14:12:50 -0700405void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
406 uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src)));
407 uint8_t lum0 = block & 0xFF;
408 uint8_t lum1 = (block >> 8) & 0xFF;
409
410 uint8_t palette[kLATCPaletteSize];
411 generate_latc_palette(palette, lum0, lum1);
412
413 block >>= 16;
414 for (int j = 0; j < 4; ++j) {
415 for (int i = 0; i < 4; ++i) {
416 dst[i] = palette[block & 0x7];
417 block >>= 3;
418 }
419 dst += dstRowBytes;
420 }
421}
422
krajcevski45a0bf52014-08-07 11:10:22 -0700423// This is the type passed as the CompressorType argument of the compressed
424// blitter for the LATC format. The static functions required to be in this
425// struct are documented in SkTextureCompressor_Blitter.h
426struct CompressorLATC {
427 static inline void CompressA8Vertical(uint8_t* dst, const uint8_t block[]) {
428 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
429 }
430
431 static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
432 int srcRowBytes) {
433 compress_a8_latc_block<PackRowMajor>(&dst, src, srcRowBytes);
434 }
435
krajcevskia10555a2014-08-11 13:34:22 -0700436#if PEDANTIC_BLIT_RECT
437 static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
438 const uint8_t* mask) {
439 // Pack the mask
440 uint64_t cmpMask = 0;
441 for (int i = 0; i < 4; ++i) {
442 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(src + i*srcRowBytes));
443 cmpMask |= static_cast<uint64_t>(pack_index(idx)) << 12*i;
444 }
445 cmpMask = SkEndian_SwapLE64(cmpMask << 16); // avoid header
446
447 uint64_t cmpSrc;
448 uint8_t *cmpSrcPtr = reinterpret_cast<uint8_t*>(&cmpSrc);
449 compress_a8_latc_block<PackRowMajor>(&cmpSrcPtr, src, srcRowBytes);
450
451 // Mask out header
452 cmpSrc = cmpSrc & cmpMask;
453
454 // Read destination encoding
455 uint64_t *cmpDst = reinterpret_cast<uint64_t*>(dst);
456
457 // If the destination is the encoding for a blank block, then we need
458 // to properly set the header
459 if (0 == cmpDst) {
460 *cmpDst = SkTEndian_SwapLE64(0x24924924924900FFULL);
461 }
462
463 // Set the new indices
464 *cmpDst &= ~cmpMask;
465 *cmpDst |= cmpSrc;
krajcevski45a0bf52014-08-07 11:10:22 -0700466 }
krajcevskia10555a2014-08-11 13:34:22 -0700467#endif // PEDANTIC_BLIT_RECT
krajcevski45a0bf52014-08-07 11:10:22 -0700468};
469
krajcevski6c354882014-07-22 07:44:00 -0700470////////////////////////////////////////////////////////////////////////////////
471
472namespace SkTextureCompressor {
473
474bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700475#if COMPRESS_LATC_FAST
476 return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
477#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700478 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
krajcevskib5294e82014-07-30 08:34:51 -0700479#else
480#error "Must choose either fast or slow LATC compression"
481#endif
krajcevski6c354882014-07-22 07:44:00 -0700482}
483
krajcevskib8ccc2f2014-08-07 08:15:14 -0700484SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer,
485 SkTBlitterAllocator* allocator) {
486 if ((width % 4) != 0 || (height % 4) != 0) {
487 return NULL;
488 }
489
krajcevskib5294e82014-07-30 08:34:51 -0700490#if COMPRESS_LATC_FAST
krajcevskib8ccc2f2014-08-07 08:15:14 -0700491 // Memset the output buffer to an encoding that decodes to zero. We must do this
492 // in order to avoid having uninitialized values in the buffer if the blitter
493 // decides not to write certain scanlines (and skip entire rows of blocks).
494 // In the case of LATC, if everything is zero, then LUM0 and LUM1 are also zero,
495 // and they will only be non-zero (0xFF) if the index is 7. So bzero will do just fine.
496 // (8 bytes per block) * (w * h / 16 blocks) = w * h / 2
497 sk_bzero(outputBuffer, width * height / 2);
498
499 return allocator->createT<
krajcevski45a0bf52014-08-07 11:10:22 -0700500 SkTCompressedAlphaBlitter<4, 8, CompressorLATC>, int, int, void* >
krajcevskib5294e82014-07-30 08:34:51 -0700501 (width, height, outputBuffer);
502#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700503 // TODO (krajcevski)
504 return NULL;
krajcevskib5294e82014-07-30 08:34:51 -0700505#endif
krajcevski6c354882014-07-22 07:44:00 -0700506}
507
krajcevski4ad76e32014-07-31 14:12:50 -0700508void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
509 for (int j = 0; j < height; j += 4) {
510 for (int i = 0; i < width; i += 4) {
511 decompress_latc_block(dst + i, dstRowBytes, src);
512 src += 8;
513 }
514 dst += 4 * dstRowBytes;
515 }
516}
517
krajcevski6c354882014-07-22 07:44:00 -0700518} // SkTextureCompressor