blob: 5db0fc6c04e609c4ad3c72a0393a3ec9ee870e51 [file] [log] [blame]
krajcevski6c354882014-07-22 07:44:00 -07001/*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkTextureCompressor_LATC.h"
krajcevskib5294e82014-07-30 08:34:51 -07009#include "SkTextureCompressor_Blitter.h"
krajcevski6c354882014-07-22 07:44:00 -070010
11#include "SkEndian.h"
12
krajcevskib5294e82014-07-30 08:34:51 -070013// Compression options. In general, the slow version is much more accurate, but
14// much slower. The fast option is much faster, but much less accurate. YMMV.
15#define COMPRESS_LATC_SLOW 0
16#define COMPRESS_LATC_FAST 1
17
18////////////////////////////////////////////////////////////////////////////////
19
krajcevski4ad76e32014-07-31 14:12:50 -070020// Generates an LATC palette. LATC constructs
21// a palette of eight colors from LUM0 and LUM1 using the algorithm:
22//
23// LUM0, if lum0 > lum1 and code(x,y) == 0
24// LUM1, if lum0 > lum1 and code(x,y) == 1
25// (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2
26// (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
27// (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
28// (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
29// (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
30// ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
31//
32// LUM0, if lum0 <= lum1 and code(x,y) == 0
33// LUM1, if lum0 <= lum1 and code(x,y) == 1
34// (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
35// (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
36// (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
37// ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
38// 0, if lum0 <= lum1 and code(x,y) == 6
39// 255, if lum0 <= lum1 and code(x,y) == 7
40
41static const int kLATCPaletteSize = 8;
42static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
43 palette[0] = lum0;
44 palette[1] = lum1;
45 if (lum0 > lum1) {
46 for (int i = 1; i < 7; i++) {
47 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
48 }
49 } else {
50 for (int i = 1; i < 5; i++) {
51 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
52 }
53 palette[6] = 0;
54 palette[7] = 255;
55 }
56}
57
58////////////////////////////////////////////////////////////////////////////////
59
krajcevskib5294e82014-07-30 08:34:51 -070060#if COMPRESS_LATC_SLOW
61
krajcevski6c354882014-07-22 07:44:00 -070062////////////////////////////////////////////////////////////////////////////////
63//
64// Utility Functions
65//
66////////////////////////////////////////////////////////////////////////////////
67
68// Absolute difference between two values. More correct than SkTAbs(a - b)
69// because it works on unsigned values.
70template <typename T> inline T abs_diff(const T &a, const T &b) {
71 return (a > b) ? (a - b) : (b - a);
72}
73
74static bool is_extremal(uint8_t pixel) {
75 return 0 == pixel || 255 == pixel;
76}
77
78typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
79
80// This function is used by both R11 EAC and LATC to compress 4x4 blocks
81// of 8-bit alpha into 64-bit values that comprise the compressed data.
82// For both formats, we need to make sure that the dimensions of the
83// src pixels are divisible by 4, and copy 4x4 blocks one at a time
84// for compression.
85static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
86 int width, int height, int rowBytes,
87 A84x4To64BitProc proc) {
88 // Make sure that our data is well-formed enough to be considered for compression
89 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
90 return false;
91 }
92
93 int blocksX = width >> 2;
94 int blocksY = height >> 2;
95
96 uint8_t block[16];
97 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
98 for (int y = 0; y < blocksY; ++y) {
99 for (int x = 0; x < blocksX; ++x) {
100 // Load block
101 for (int k = 0; k < 4; ++k) {
102 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
103 }
104
105 // Compress it
106 *encPtr = proc(block);
107 ++encPtr;
108 }
109 src += 4 * rowBytes;
110 }
111
112 return true;
113}
114
115////////////////////////////////////////////////////////////////////////////////
116//
117// LATC compressor
118//
119////////////////////////////////////////////////////////////////////////////////
120
121// LATC compressed texels down into square 4x4 blocks
krajcevski6c354882014-07-22 07:44:00 -0700122static const int kLATCBlockSize = 4;
123static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
124
krajcevski6c354882014-07-22 07:44:00 -0700125// Compress a block by using the bounding box of the pixels. It is assumed that
126// there are no extremal pixels in this block otherwise we would have used
127// compressBlockBBIgnoreExtremal.
128static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
129 uint8_t minVal = 255;
130 uint8_t maxVal = 0;
131 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
132 minVal = SkTMin(pixels[i], minVal);
133 maxVal = SkTMax(pixels[i], maxVal);
134 }
135
136 SkASSERT(!is_extremal(minVal));
137 SkASSERT(!is_extremal(maxVal));
138
139 uint8_t palette[kLATCPaletteSize];
140 generate_latc_palette(palette, maxVal, minVal);
141
142 uint64_t indices = 0;
143 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
144
145 // Find the best palette index
146 uint8_t bestError = abs_diff(pixels[i], palette[0]);
147 uint8_t idx = 0;
148 for (int j = 1; j < kLATCPaletteSize; ++j) {
149 uint8_t error = abs_diff(pixels[i], palette[j]);
150 if (error < bestError) {
151 bestError = error;
152 idx = j;
153 }
154 }
155
156 indices <<= 3;
157 indices |= idx;
158 }
159
160 return
161 SkEndian_SwapLE64(
162 static_cast<uint64_t>(maxVal) |
163 (static_cast<uint64_t>(minVal) << 8) |
164 (indices << 16));
165}
166
167// Compress a block by using the bounding box of the pixels without taking into
168// account the extremal values. The generated palette will contain extremal values
169// and fewer points along the line segment to interpolate.
170static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
171 uint8_t minVal = 255;
172 uint8_t maxVal = 0;
173 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
174 if (is_extremal(pixels[i])) {
175 continue;
176 }
177
178 minVal = SkTMin(pixels[i], minVal);
179 maxVal = SkTMax(pixels[i], maxVal);
180 }
181
182 SkASSERT(!is_extremal(minVal));
183 SkASSERT(!is_extremal(maxVal));
184
185 uint8_t palette[kLATCPaletteSize];
186 generate_latc_palette(palette, minVal, maxVal);
187
188 uint64_t indices = 0;
189 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
190
191 // Find the best palette index
192 uint8_t idx = 0;
193 if (is_extremal(pixels[i])) {
194 if (0xFF == pixels[i]) {
195 idx = 7;
196 } else if (0 == pixels[i]) {
197 idx = 6;
198 } else {
199 SkFAIL("Pixel is extremal but not really?!");
200 }
201 } else {
202 uint8_t bestError = abs_diff(pixels[i], palette[0]);
203 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
204 uint8_t error = abs_diff(pixels[i], palette[j]);
205 if (error < bestError) {
206 bestError = error;
207 idx = j;
208 }
209 }
210 }
211
212 indices <<= 3;
213 indices |= idx;
214 }
215
216 return
217 SkEndian_SwapLE64(
218 static_cast<uint64_t>(minVal) |
219 (static_cast<uint64_t>(maxVal) << 8) |
220 (indices << 16));
221}
222
223
224// Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
225// values LUM0 and LUM1, and an index into the generated palette. Details of how
226// the palette is generated can be found in the comments of generatePalette above.
227//
228// We choose which palette type to use based on whether or not 'pixels' contains
229// any extremal values (0 or 255). If there are extremal values, then we use the
230// palette that has the extremal values built in. Otherwise, we use the full bounding
231// box.
232
233static uint64_t compress_latc_block(const uint8_t pixels[]) {
234 // Collect unique pixels
235 int nUniquePixels = 0;
236 uint8_t uniquePixels[kLATCPixelsPerBlock];
237 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
238 bool foundPixel = false;
239 for (int j = 0; j < nUniquePixels; ++j) {
240 foundPixel = foundPixel || uniquePixels[j] == pixels[i];
241 }
242
243 if (!foundPixel) {
244 uniquePixels[nUniquePixels] = pixels[i];
245 ++nUniquePixels;
246 }
247 }
248
249 // If there's only one unique pixel, then our compression is easy.
250 if (1 == nUniquePixels) {
251 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
252
253 // Similarly, if there are only two unique pixels, then our compression is
254 // easy again: place the pixels in the block header, and assign the indices
255 // with one or zero depending on which pixel they belong to.
256 } else if (2 == nUniquePixels) {
257 uint64_t outBlock = 0;
258 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
259 int idx = 0;
260 if (pixels[i] == uniquePixels[1]) {
261 idx = 1;
262 }
263
264 outBlock <<= 3;
265 outBlock |= idx;
266 }
267 outBlock <<= 16;
268 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
269 return SkEndian_SwapLE64(outBlock);
270 }
271
272 // Count non-maximal pixel values
273 int nonExtremalPixels = 0;
274 for (int i = 0; i < nUniquePixels; ++i) {
275 if (!is_extremal(uniquePixels[i])) {
276 ++nonExtremalPixels;
277 }
278 }
279
280 // If all the pixels are nonmaximal then compute the palette using
281 // the bounding box of all the pixels.
282 if (nonExtremalPixels == nUniquePixels) {
283 // This is really just for correctness, in all of my tests we
284 // never take this step. We don't lose too much perf here because
285 // most of the processing in this function is worth it for the
286 // 1 == nUniquePixels optimization.
287 return compress_latc_block_bb(pixels);
288 } else {
289 return compress_latc_block_bb_ignore_extremal(pixels);
290 }
291}
292
krajcevskib5294e82014-07-30 08:34:51 -0700293#endif // COMPRESS_LATC_SLOW
294
295////////////////////////////////////////////////////////////////////////////////
296
297#if COMPRESS_LATC_FAST
298
299// Take the top three indices of each int and pack them into the low 12
300// bits of the integer.
301static inline uint32_t convert_index(uint32_t x) {
302 // Since the palette is
303 // 255, 0, 219, 182, 146, 109, 73, 36
304 // we need to map the high three bits of each byte in the integer
305 // from
306 // 0 1 2 3 4 5 6 7
307 // to
308 // 1 7 6 5 4 3 2 0
309 //
310 // This first operation takes the mapping from
311 // 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0
312 x = 0x07070707 - ((x >> 5) & 0x07070707);
313
314 // mask is 1 if index is non-zero
315 const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;
316
317 // add mask:
318 // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
319 x = (x + mask);
320
321 // Handle overflow:
322 // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
323 x |= (x >> 3) & 0x01010101;
324
325 // Mask out high bits:
326 // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
327 x &= 0x07070707;
328
329 // Pack it in...
330#if defined (SK_CPU_BENDIAN)
331 return
332 (x >> 24) |
333 ((x >> 13) & 0x38) |
334 ((x >> 2) & 0x1C0) |
335 ((x << 9) & 0xE00);
336#else
337 return
338 (x & 0x7) |
339 ((x >> 5) & 0x38) |
340 ((x >> 10) & 0x1C0) |
341 ((x >> 15) & 0xE00);
342#endif
343}
344
345typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes);
346template<PackIndicesProc packIndicesProc>
347static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) {
348 *(reinterpret_cast<uint64_t*>(*dstPtr)) =
349 SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
350 *dstPtr += 8;
351}
352
353inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) {
354 uint64_t result = 0;
355 for (int i = 0; i < 4; ++i) {
356 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
357 result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
358 }
359 return result;
360}
361
362inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) {
363 // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
364 // a LOT faster.
365 uint8_t transposed[16];
366 for (int i = 0; i < 4; ++i) {
367 for (int j = 0; j < 4; ++j) {
368 transposed[j*4+i] = indices[i*rowBytes + j];
369 }
370 }
371
372 return PackRowMajor(transposed, 4);
373}
374
375static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
376 int width, int height, int rowBytes) {
377
378 if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
379 return false;
380 }
381
382 uint8_t** dstPtr = &dst;
383 for (int y = 0; y < height; y += 4) {
384 for (int x = 0; x < width; x += 4) {
385 compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
386 }
387 }
388
389 return true;
390}
391
392void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
393 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
394}
395
396#endif // COMPRESS_LATC_FAST
397
krajcevski4ad76e32014-07-31 14:12:50 -0700398void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
399 uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src)));
400 uint8_t lum0 = block & 0xFF;
401 uint8_t lum1 = (block >> 8) & 0xFF;
402
403 uint8_t palette[kLATCPaletteSize];
404 generate_latc_palette(palette, lum0, lum1);
405
406 block >>= 16;
407 for (int j = 0; j < 4; ++j) {
408 for (int i = 0; i < 4; ++i) {
409 dst[i] = palette[block & 0x7];
410 block >>= 3;
411 }
412 dst += dstRowBytes;
413 }
414}
415
krajcevski6c354882014-07-22 07:44:00 -0700416////////////////////////////////////////////////////////////////////////////////
417
418namespace SkTextureCompressor {
419
420bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
krajcevskib5294e82014-07-30 08:34:51 -0700421#if COMPRESS_LATC_FAST
422 return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
423#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700424 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
krajcevskib5294e82014-07-30 08:34:51 -0700425#else
426#error "Must choose either fast or slow LATC compression"
427#endif
krajcevski6c354882014-07-22 07:44:00 -0700428}
429
430SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) {
krajcevskib5294e82014-07-30 08:34:51 -0700431#if COMPRESS_LATC_FAST
432 return new
433 SkTCompressedAlphaBlitter<4, 8, CompressA8LATCBlockVertical>
434 (width, height, outputBuffer);
435#elif COMPRESS_LATC_SLOW
krajcevski6c354882014-07-22 07:44:00 -0700436 // TODO (krajcevski)
437 return NULL;
krajcevskib5294e82014-07-30 08:34:51 -0700438#endif
krajcevski6c354882014-07-22 07:44:00 -0700439}
440
krajcevski4ad76e32014-07-31 14:12:50 -0700441void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
442 for (int j = 0; j < height; j += 4) {
443 for (int i = 0; i < width; i += 4) {
444 decompress_latc_block(dst + i, dstRowBytes, src);
445 src += 8;
446 }
447 dst += 4 * dstRowBytes;
448 }
449}
450
krajcevski6c354882014-07-22 07:44:00 -0700451} // SkTextureCompressor