Blame - src/utils/SkTextureCompressor_LATC.cpp - platform/external/skia

blob: 5db0fc6c04e609c4ad3c72a0393a3ec9ee870e51 [file] [log] [blame]

krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	1	/*
				2	* Copyright 2014 Google Inc.
				3	*
				4	* Use of this source code is governed by a BSD-style license that can be
				5	* found in the LICENSE file.
				6	*/
				7
				8	#include "SkTextureCompressor_LATC.h"
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	9	#include "SkTextureCompressor_Blitter.h"
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	10
				11	#include "SkEndian.h"
				12
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	13	// Compression options. In general, the slow version is much more accurate, but
				14	// much slower. The fast option is much faster, but much less accurate. YMMV.
				15	#define COMPRESS_LATC_SLOW 0
				16	#define COMPRESS_LATC_FAST 1
				17
				18	////////////////////////////////////////////////////////////////////////////////
				19
krajcevski	4ad76e3	2014-07-31 14:12:50 -0700	[diff] [blame^]	20	// Generates an LATC palette. LATC constructs
				21	// a palette of eight colors from LUM0 and LUM1 using the algorithm:
				22	//
				23	// LUM0, if lum0 > lum1 and code(x,y) == 0
				24	// LUM1, if lum0 > lum1 and code(x,y) == 1
				25	// (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2
				26	// (5LUM0+2LUM1)/7, if lum0 > lum1 and code(x,y) == 3
				27	// (4LUM0+3LUM1)/7, if lum0 > lum1 and code(x,y) == 4
				28	// (3LUM0+4LUM1)/7, if lum0 > lum1 and code(x,y) == 5
				29	// (2LUM0+5LUM1)/7, if lum0 > lum1 and code(x,y) == 6
				30	// ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
				31	//
				32	// LUM0, if lum0 <= lum1 and code(x,y) == 0
				33	// LUM1, if lum0 <= lum1 and code(x,y) == 1
				34	// (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
				35	// (3LUM0+2LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
				36	// (2LUM0+3LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
				37	// ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
				38	// 0, if lum0 <= lum1 and code(x,y) == 6
				39	// 255, if lum0 <= lum1 and code(x,y) == 7
				40
				41	static const int kLATCPaletteSize = 8;
				42	static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
				43	palette[0] = lum0;
				44	palette[1] = lum1;
				45	if (lum0 > lum1) {
				46	for (int i = 1; i < 7; i++) {
				47	palette[i+1] = ((7-i)lum0 + ilum1) / 7;
				48	}
				49	} else {
				50	for (int i = 1; i < 5; i++) {
				51	palette[i+1] = ((5-i)lum0 + ilum1) / 5;
				52	}
				53	palette[6] = 0;
				54	palette[7] = 255;
				55	}
				56	}
				57
				58	////////////////////////////////////////////////////////////////////////////////
				59
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	60	#if COMPRESS_LATC_SLOW
				61
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	62	////////////////////////////////////////////////////////////////////////////////
				63	//
				64	// Utility Functions
				65	//
				66	////////////////////////////////////////////////////////////////////////////////
				67
				68	// Absolute difference between two values. More correct than SkTAbs(a - b)
				69	// because it works on unsigned values.
				70	template <typename T> inline T abs_diff(const T &a, const T &b) {
				71	return (a > b) ? (a - b) : (b - a);
				72	}
				73
				74	static bool is_extremal(uint8_t pixel) {
				75	return 0 == pixel \|\| 255 == pixel;
				76	}
				77
				78	typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
				79
				80	// This function is used by both R11 EAC and LATC to compress 4x4 blocks
				81	// of 8-bit alpha into 64-bit values that comprise the compressed data.
				82	// For both formats, we need to make sure that the dimensions of the
				83	// src pixels are divisible by 4, and copy 4x4 blocks one at a time
				84	// for compression.
				85	static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
				86	int width, int height, int rowBytes,
				87	A84x4To64BitProc proc) {
				88	// Make sure that our data is well-formed enough to be considered for compression
				89	if (0 == width \|\| 0 == height \|\| (width % 4) != 0 \|\| (height % 4) != 0) {
				90	return false;
				91	}
				92
				93	int blocksX = width >> 2;
				94	int blocksY = height >> 2;
				95
				96	uint8_t block[16];
				97	uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
				98	for (int y = 0; y < blocksY; ++y) {
				99	for (int x = 0; x < blocksX; ++x) {
				100	// Load block
				101	for (int k = 0; k < 4; ++k) {
				102	memcpy(block + k4, src + krowBytes + 4*x, 4);
				103	}
				104
				105	// Compress it
				106	*encPtr = proc(block);
				107	++encPtr;
				108	}
				109	src += 4 * rowBytes;
				110	}
				111
				112	return true;
				113	}
				114
				115	////////////////////////////////////////////////////////////////////////////////
				116	//
				117	// LATC compressor
				118	//
				119	////////////////////////////////////////////////////////////////////////////////
				120
				121	// LATC compressed texels down into square 4x4 blocks
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	122	static const int kLATCBlockSize = 4;
				123	static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
				124
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	125	// Compress a block by using the bounding box of the pixels. It is assumed that
				126	// there are no extremal pixels in this block otherwise we would have used
				127	// compressBlockBBIgnoreExtremal.
				128	static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
				129	uint8_t minVal = 255;
				130	uint8_t maxVal = 0;
				131	for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
				132	minVal = SkTMin(pixels[i], minVal);
				133	maxVal = SkTMax(pixels[i], maxVal);
				134	}
				135
				136	SkASSERT(!is_extremal(minVal));
				137	SkASSERT(!is_extremal(maxVal));
				138
				139	uint8_t palette[kLATCPaletteSize];
				140	generate_latc_palette(palette, maxVal, minVal);
				141
				142	uint64_t indices = 0;
				143	for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
				144
				145	// Find the best palette index
				146	uint8_t bestError = abs_diff(pixels[i], palette[0]);
				147	uint8_t idx = 0;
				148	for (int j = 1; j < kLATCPaletteSize; ++j) {
				149	uint8_t error = abs_diff(pixels[i], palette[j]);
				150	if (error < bestError) {
				151	bestError = error;
				152	idx = j;
				153	}
				154	}
				155
				156	indices <<= 3;
				157	indices \|= idx;
				158	}
				159
				160	return
				161	SkEndian_SwapLE64(
				162	static_cast<uint64_t>(maxVal) \|
				163	(static_cast<uint64_t>(minVal) << 8) \|
				164	(indices << 16));
				165	}
				166
				167	// Compress a block by using the bounding box of the pixels without taking into
				168	// account the extremal values. The generated palette will contain extremal values
				169	// and fewer points along the line segment to interpolate.
				170	static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
				171	uint8_t minVal = 255;
				172	uint8_t maxVal = 0;
				173	for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
				174	if (is_extremal(pixels[i])) {
				175	continue;
				176	}
				177
				178	minVal = SkTMin(pixels[i], minVal);
				179	maxVal = SkTMax(pixels[i], maxVal);
				180	}
				181
				182	SkASSERT(!is_extremal(minVal));
				183	SkASSERT(!is_extremal(maxVal));
				184
				185	uint8_t palette[kLATCPaletteSize];
				186	generate_latc_palette(palette, minVal, maxVal);
				187
				188	uint64_t indices = 0;
				189	for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
				190
				191	// Find the best palette index
				192	uint8_t idx = 0;
				193	if (is_extremal(pixels[i])) {
				194	if (0xFF == pixels[i]) {
				195	idx = 7;
				196	} else if (0 == pixels[i]) {
				197	idx = 6;
				198	} else {
				199	SkFAIL("Pixel is extremal but not really?!");
				200	}
				201	} else {
				202	uint8_t bestError = abs_diff(pixels[i], palette[0]);
				203	for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
				204	uint8_t error = abs_diff(pixels[i], palette[j]);
				205	if (error < bestError) {
				206	bestError = error;
				207	idx = j;
				208	}
				209	}
				210	}
				211
				212	indices <<= 3;
				213	indices \|= idx;
				214	}
				215
				216	return
				217	SkEndian_SwapLE64(
				218	static_cast<uint64_t>(minVal) \|
				219	(static_cast<uint64_t>(maxVal) << 8) \|
				220	(indices << 16));
				221	}
				222
				223
				224	// Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
				225	// values LUM0 and LUM1, and an index into the generated palette. Details of how
				226	// the palette is generated can be found in the comments of generatePalette above.
				227	//
				228	// We choose which palette type to use based on whether or not 'pixels' contains
				229	// any extremal values (0 or 255). If there are extremal values, then we use the
				230	// palette that has the extremal values built in. Otherwise, we use the full bounding
				231	// box.
				232
				233	static uint64_t compress_latc_block(const uint8_t pixels[]) {
				234	// Collect unique pixels
				235	int nUniquePixels = 0;
				236	uint8_t uniquePixels[kLATCPixelsPerBlock];
				237	for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
				238	bool foundPixel = false;
				239	for (int j = 0; j < nUniquePixels; ++j) {
				240	foundPixel = foundPixel \|\| uniquePixels[j] == pixels[i];
				241	}
				242
				243	if (!foundPixel) {
				244	uniquePixels[nUniquePixels] = pixels[i];
				245	++nUniquePixels;
				246	}
				247	}
				248
				249	// If there's only one unique pixel, then our compression is easy.
				250	if (1 == nUniquePixels) {
				251	return SkEndian_SwapLE64(pixels[0] \| (pixels[0] << 8));
				252
				253	// Similarly, if there are only two unique pixels, then our compression is
				254	// easy again: place the pixels in the block header, and assign the indices
				255	// with one or zero depending on which pixel they belong to.
				256	} else if (2 == nUniquePixels) {
				257	uint64_t outBlock = 0;
				258	for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
				259	int idx = 0;
				260	if (pixels[i] == uniquePixels[1]) {
				261	idx = 1;
				262	}
				263
				264	outBlock <<= 3;
				265	outBlock \|= idx;
				266	}
				267	outBlock <<= 16;
				268	outBlock \|= (uniquePixels[0] \| (uniquePixels[1] << 8));
				269	return SkEndian_SwapLE64(outBlock);
				270	}
				271
				272	// Count non-maximal pixel values
				273	int nonExtremalPixels = 0;
				274	for (int i = 0; i < nUniquePixels; ++i) {
				275	if (!is_extremal(uniquePixels[i])) {
				276	++nonExtremalPixels;
				277	}
				278	}
				279
				280	// If all the pixels are nonmaximal then compute the palette using
				281	// the bounding box of all the pixels.
				282	if (nonExtremalPixels == nUniquePixels) {
				283	// This is really just for correctness, in all of my tests we
				284	// never take this step. We don't lose too much perf here because
				285	// most of the processing in this function is worth it for the
				286	// 1 == nUniquePixels optimization.
				287	return compress_latc_block_bb(pixels);
				288	} else {
				289	return compress_latc_block_bb_ignore_extremal(pixels);
				290	}
				291	}
				292
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	293	#endif // COMPRESS_LATC_SLOW
				294
				295	////////////////////////////////////////////////////////////////////////////////
				296
				297	#if COMPRESS_LATC_FAST
				298
				299	// Take the top three indices of each int and pack them into the low 12
				300	// bits of the integer.
				301	static inline uint32_t convert_index(uint32_t x) {
				302	// Since the palette is
				303	// 255, 0, 219, 182, 146, 109, 73, 36
				304	// we need to map the high three bits of each byte in the integer
				305	// from
				306	// 0 1 2 3 4 5 6 7
				307	// to
				308	// 1 7 6 5 4 3 2 0
				309	//
				310	// This first operation takes the mapping from
				311	// 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0
				312	x = 0x07070707 - ((x >> 5) & 0x07070707);
				313
				314	// mask is 1 if index is non-zero
				315	const uint32_t mask = (x \| (x >> 1) \| (x >> 2)) & 0x01010101;
				316
				317	// add mask:
				318	// 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
				319	x = (x + mask);
				320
				321	// Handle overflow:
				322	// 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
				323	x \|= (x >> 3) & 0x01010101;
				324
				325	// Mask out high bits:
				326	// 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
				327	x &= 0x07070707;
				328
				329	// Pack it in...
				330	#if defined (SK_CPU_BENDIAN)
				331	return
				332	(x >> 24) \|
				333	((x >> 13) & 0x38) \|
				334	((x >> 2) & 0x1C0) \|
				335	((x << 9) & 0xE00);
				336	#else
				337	return
				338	(x & 0x7) \|
				339	((x >> 5) & 0x38) \|
				340	((x >> 10) & 0x1C0) \|
				341	((x >> 15) & 0xE00);
				342	#endif
				343	}
				344
				345	typedef uint64_t (PackIndicesProc)(const uint8_t alpha, int rowBytes);
				346	template<PackIndicesProc packIndicesProc>
				347	static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) {
				348	(reinterpret_cast<uint64_t>(*dstPtr)) =
				349	SkEndian_SwapLE64(0xFF \| (packIndicesProc(src, rowBytes) << 16));
				350	*dstPtr += 8;
				351	}
				352
				353	inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) {
				354	uint64_t result = 0;
				355	for (int i = 0; i < 4; ++i) {
				356	const uint32_t idx = (reinterpret_cast<const uint32_t>(indices + i*rowBytes));
				357	result \|= static_cast<uint64_t>(convert_index(idx)) << 12*i;
				358	}
				359	return result;
				360	}
				361
				362	inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) {
				363	// !SPEED! Blarg, this is kind of annoying. SSE4 can make this
				364	// a LOT faster.
				365	uint8_t transposed[16];
				366	for (int i = 0; i < 4; ++i) {
				367	for (int j = 0; j < 4; ++j) {
				368	transposed[j4+i] = indices[irowBytes + j];
				369	}
				370	}
				371
				372	return PackRowMajor(transposed, 4);
				373	}
				374
				375	static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
				376	int width, int height, int rowBytes) {
				377
				378	if (width < 0 \|\| ((width % 4) != 0) \|\| height < 0 \|\| ((height % 4) != 0)) {
				379	return false;
				380	}
				381
				382	uint8_t** dstPtr = &dst;
				383	for (int y = 0; y < height; y += 4) {
				384	for (int x = 0; x < width; x += 4) {
				385	compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
				386	}
				387	}
				388
				389	return true;
				390	}
				391
				392	void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
				393	compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
				394	}
				395
				396	#endif // COMPRESS_LATC_FAST
				397
krajcevski	4ad76e3	2014-07-31 14:12:50 -0700	[diff] [blame^]	398	void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
				399	uint64_t block = SkEndian_SwapLE64((reinterpret_cast<const uint64_t >(src)));
				400	uint8_t lum0 = block & 0xFF;
				401	uint8_t lum1 = (block >> 8) & 0xFF;
				402
				403	uint8_t palette[kLATCPaletteSize];
				404	generate_latc_palette(palette, lum0, lum1);
				405
				406	block >>= 16;
				407	for (int j = 0; j < 4; ++j) {
				408	for (int i = 0; i < 4; ++i) {
				409	dst[i] = palette[block & 0x7];
				410	block >>= 3;
				411	}
				412	dst += dstRowBytes;
				413	}
				414	}
				415
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	416	////////////////////////////////////////////////////////////////////////////////
				417
				418	namespace SkTextureCompressor {
				419
				420	bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	421	#if COMPRESS_LATC_FAST
				422	return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
				423	#elif COMPRESS_LATC_SLOW
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	424	return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	425	#else
				426	#error "Must choose either fast or slow LATC compression"
				427	#endif
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	428	}
				429
				430	SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer) {
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	431	#if COMPRESS_LATC_FAST
				432	return new
				433	SkTCompressedAlphaBlitter<4, 8, CompressA8LATCBlockVertical>
				434	(width, height, outputBuffer);
				435	#elif COMPRESS_LATC_SLOW
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	436	// TODO (krajcevski)
				437	return NULL;
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	438	#endif
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	439	}
				440
krajcevski	4ad76e3	2014-07-31 14:12:50 -0700	[diff] [blame^]	441	void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
				442	for (int j = 0; j < height; j += 4) {
				443	for (int i = 0; i < width; i += 4) {
				444	decompress_latc_block(dst + i, dstRowBytes, src);
				445	src += 8;
				446	}
				447	dst += 4 * dstRowBytes;
				448	}
				449	}
				450
krajcevski	6c35488	2014-07-22 07:44:00 -0700	[diff] [blame]	451	} // SkTextureCompressor