Blame - src/utils/SkTextureCompressor_ASTC.cpp - platform/external/skqp

blob: 7969359e98495f51c3878970af103dc2216beb0c [file] [log] [blame]

krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	1	/*
				2	* Copyright 2014 Google Inc.
				3	*
				4	* Use of this source code is governed by a BSD-style license that can be
				5	* found in the LICENSE file.
				6	*/
				7
				8	#include "SkTextureCompressor_ASTC.h"
krajcevski	10a350c	2014-07-29 07:24:58 -0700	[diff] [blame]	9	#include "SkTextureCompressor_Blitter.h"
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	10
				11	#include "SkBlitter.h"
				12	#include "SkEndian.h"
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	13	#include "SkMath.h"
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	14
				15	// This table contains the weight values for each texel. This is used in determining
				16	// how to convert a 12x12 grid of alpha values into a 6x5 grid of index values. Since
				17	// we have a 6x5 grid, that gives 30 values that we have to compute. For each index,
				18	// we store up to 20 different triplets of values. In order the triplets are:
				19	// weight, texel-x, texel-y
				20	// The weight value corresponds to the amount that this index contributes to the final
				21	// index value of the given texel. Hence, we need to reconstruct the 6x5 index grid
				22	// from their relative contribution to the 12x12 texel grid.
				23	//
				24	// The algorithm is something like this:
				25	// foreach index i:
				26	// total-weight = 0;
				27	// total-alpha = 0;
				28	// for w = 1 to 20:
				29	// weight = table[i][w*3];
				30	// texel-x = table[i][w*3 + 1];
				31	// texel-y = table[i][w*3 + 2];
				32	// if weight >= 0:
				33	// total-weight += weight;
				34	// total-alpha += weight * alphas[texel-x][texel-y];
				35	//
				36	// total-alpha /= total-weight;
				37	// index = top three bits of total-alpha
				38	//
				39	// If the associated index does not contribute to 20 different texels (e.g. it's in
				40	// a corner), then the extra texels are stored with -1's in the table.
				41
krajcevski	4881a4d	2014-07-25 10:23:42 -0700	[diff] [blame]	42	static const int8_t k6x5To12x12Table[30][60] = {
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	43	{ 16, 0, 0, 9, 1, 0, 1, 2, 0, 10, 0, 1, 6, 1, 1, 1, 2, 1, 4, 0, 2, 2,
				44	1, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
				45	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				46	{ 7, 1, 0, 15, 2, 0, 10, 3, 0, 3, 4, 0, 4, 1, 1, 9, 2, 1, 6, 3, 1, 2,
				47	4, 1, 2, 1, 2, 4, 2, 2, 3, 3, 2, 1, 4, 2, -1, 0, 0, -1, 0, 0, -1, 0,
				48	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				49	{ 6, 3, 0, 13, 4, 0, 12, 5, 0, 4, 6, 0, 4, 3, 1, 8, 4, 1, 8, 5, 1, 3,
				50	6, 1, 1, 3, 2, 3, 4, 2, 3, 5, 2, 1, 6, 2, -1, 0, 0, -1, 0, 0, -1, 0,
				51	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				52	{ 4, 5, 0, 12, 6, 0, 13, 7, 0, 6, 8, 0, 2, 5, 1, 7, 6, 1, 8, 7, 1, 4,
				53	8, 1, 1, 5, 2, 3, 6, 2, 3, 7, 2, 2, 8, 2, -1, 0, 0, -1, 0, 0, -1, 0,
				54	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				55	{ 3, 7, 0, 10, 8, 0, 15, 9, 0, 7, 10, 0, 2, 7, 1, 6, 8, 1, 9, 9, 1, 4,
				56	10, 1, 1, 7, 2, 2, 8, 2, 4, 9, 2, 2, 10, 2, -1, 0, 0, -1, 0, 0, -1, 0,
				57	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				58	{ 1, 9, 0, 9, 10, 0, 16, 11, 0, 1, 9, 1, 6, 10, 1, 10, 11, 1, 2, 10, 2, 4,
				59	11, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
				60	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				61	{ 6, 0, 1, 3, 1, 1, 12, 0, 2, 7, 1, 2, 1, 2, 2, 15, 0, 3, 8, 1, 3, 1,
				62	2, 3, 9, 0, 4, 5, 1, 4, 1, 2, 4, 3, 0, 5, 2, 1, 5, -1, 0, 0, -1, 0,
				63	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				64	{ 3, 1, 1, 6, 2, 1, 4, 3, 1, 1, 4, 1, 5, 1, 2, 11, 2, 2, 7, 3, 2, 2,
				65	4, 2, 7, 1, 3, 14, 2, 3, 9, 3, 3, 3, 4, 3, 4, 1, 4, 8, 2, 4, 6, 3,
				66	4, 2, 4, 4, 1, 1, 5, 3, 2, 5, 2, 3, 5, 1, 4, 5}, // n = 20
				67	{ 2, 3, 1, 5, 4, 1, 4, 5, 1, 1, 6, 1, 5, 3, 2, 10, 4, 2, 9, 5, 2, 3,
				68	6, 2, 6, 3, 3, 12, 4, 3, 11, 5, 3, 4, 6, 3, 3, 3, 4, 7, 4, 4, 7, 5,
				69	4, 2, 6, 4, 1, 3, 5, 2, 4, 5, 2, 5, 5, 1, 6, 5}, // n = 20
				70	{ 2, 5, 1, 5, 6, 1, 5, 7, 1, 2, 8, 1, 3, 5, 2, 9, 6, 2, 10, 7, 2, 4,
				71	8, 2, 4, 5, 3, 11, 6, 3, 12, 7, 3, 6, 8, 3, 2, 5, 4, 7, 6, 4, 7, 7,
				72	4, 3, 8, 4, 1, 5, 5, 2, 6, 5, 2, 7, 5, 1, 8, 5}, // n = 20
				73	{ 1, 7, 1, 4, 8, 1, 6, 9, 1, 3, 10, 1, 2, 7, 2, 8, 8, 2, 11, 9, 2, 5,
				74	10, 2, 3, 7, 3, 9, 8, 3, 14, 9, 3, 7, 10, 3, 2, 7, 4, 6, 8, 4, 8, 9,
				75	4, 4, 10, 4, 1, 7, 5, 2, 8, 5, 3, 9, 5, 1, 10, 5}, // n = 20
				76	{ 3, 10, 1, 6, 11, 1, 1, 9, 2, 7, 10, 2, 12, 11, 2, 1, 9, 3, 8, 10, 3, 15,
				77	11, 3, 1, 9, 4, 5, 10, 4, 9, 11, 4, 2, 10, 5, 3, 11, 5, -1, 0, 0, -1, 0,
				78	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				79	{ 1, 0, 3, 1, 1, 3, 7, 0, 4, 4, 1, 4, 13, 0, 5, 7, 1, 5, 1, 2, 5, 13,
				80	0, 6, 7, 1, 6, 1, 2, 6, 7, 0, 7, 4, 1, 7, 1, 0, 8, 1, 1, 8, -1, 0,
				81	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				82	{ 1, 2, 3, 1, 3, 3, 3, 1, 4, 7, 2, 4, 4, 3, 4, 1, 4, 4, 6, 1, 5, 12,
				83	2, 5, 8, 3, 5, 2, 4, 5, 6, 1, 6, 12, 2, 6, 8, 3, 6, 2, 4, 6, 3, 1,
				84	7, 7, 2, 7, 4, 3, 7, 1, 4, 7, 1, 2, 8, 1, 3, 8}, // n = 20
				85	{ 1, 4, 3, 1, 5, 3, 3, 3, 4, 6, 4, 4, 5, 5, 4, 2, 6, 4, 5, 3, 5, 11,
				86	4, 5, 10, 5, 5, 3, 6, 5, 5, 3, 6, 11, 4, 6, 10, 5, 6, 3, 6, 6, 3, 3,
				87	7, 6, 4, 7, 5, 5, 7, 2, 6, 7, 1, 4, 8, 1, 5, 8}, // n = 20
				88	{ 1, 6, 3, 1, 7, 3, 2, 5, 4, 5, 6, 4, 6, 7, 4, 3, 8, 4, 3, 5, 5, 10,
				89	6, 5, 11, 7, 5, 5, 8, 5, 3, 5, 6, 10, 6, 6, 11, 7, 6, 5, 8, 6, 2, 5,
				90	7, 5, 6, 7, 6, 7, 7, 3, 8, 7, 1, 6, 8, 1, 7, 8}, // n = 20
				91	{ 1, 8, 3, 1, 9, 3, 1, 7, 4, 4, 8, 4, 7, 9, 4, 3, 10, 4, 2, 7, 5, 8,
				92	8, 5, 12, 9, 5, 6, 10, 5, 2, 7, 6, 8, 8, 6, 12, 9, 6, 6, 10, 6, 1, 7,
				93	7, 4, 8, 7, 7, 9, 7, 3, 10, 7, 1, 8, 8, 1, 9, 8}, // n = 20
				94	{ 1, 10, 3, 1, 11, 3, 4, 10, 4, 7, 11, 4, 1, 9, 5, 7, 10, 5, 13, 11, 5, 1,
				95	9, 6, 7, 10, 6, 13, 11, 6, 4, 10, 7, 7, 11, 7, 1, 10, 8, 1, 11, 8, -1, 0,
				96	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				97	{ 3, 0, 6, 2, 1, 6, 9, 0, 7, 5, 1, 7, 1, 2, 7, 15, 0, 8, 8, 1, 8, 1,
				98	2, 8, 12, 0, 9, 7, 1, 9, 1, 2, 9, 6, 0, 10, 3, 1, 10, -1, 0, 0, -1, 0,
				99	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				100	{ 1, 1, 6, 3, 2, 6, 2, 3, 6, 1, 4, 6, 4, 1, 7, 8, 2, 7, 6, 3, 7, 2,
				101	4, 7, 7, 1, 8, 14, 2, 8, 9, 3, 8, 3, 4, 8, 5, 1, 9, 11, 2, 9, 8, 3,
				102	9, 2, 4, 9, 3, 1, 10, 6, 2, 10, 4, 3, 10, 1, 4, 10}, // n = 20
				103	{ 1, 3, 6, 2, 4, 6, 2, 5, 6, 1, 6, 6, 3, 3, 7, 7, 4, 7, 7, 5, 7, 2,
				104	6, 7, 6, 3, 8, 12, 4, 8, 11, 5, 8, 4, 6, 8, 4, 3, 9, 10, 4, 9, 9, 5,
				105	9, 3, 6, 9, 2, 3, 10, 5, 4, 10, 5, 5, 10, 2, 6, 10}, // n = 20
				106	{ 1, 5, 6, 2, 6, 6, 2, 7, 6, 1, 8, 6, 2, 5, 7, 7, 6, 7, 7, 7, 7, 3,
				107	8, 7, 4, 5, 8, 11, 6, 8, 12, 7, 8, 6, 8, 8, 3, 5, 9, 9, 6, 9, 10, 7,
				108	9, 5, 8, 9, 1, 5, 10, 4, 6, 10, 5, 7, 10, 2, 8, 10}, // n = 20
				109	{ 1, 7, 6, 2, 8, 6, 3, 9, 6, 1, 10, 6, 2, 7, 7, 6, 8, 7, 8, 9, 7, 4,
				110	10, 7, 3, 7, 8, 9, 8, 8, 14, 9, 8, 7, 10, 8, 2, 7, 9, 7, 8, 9, 11, 9,
				111	9, 5, 10, 9, 1, 7, 10, 4, 8, 10, 6, 9, 10, 3, 10, 10}, // n = 20
				112	{ 2, 10, 6, 3, 11, 6, 1, 9, 7, 5, 10, 7, 9, 11, 7, 1, 9, 8, 8, 10, 8, 15,
				113	11, 8, 1, 9, 9, 7, 10, 9, 12, 11, 9, 3, 10, 10, 6, 11, 10, -1, 0, 0, -1, 0,
				114	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				115	{ 4, 0, 9, 2, 1, 9, 10, 0, 10, 6, 1, 10, 1, 2, 10, 16, 0, 11, 9, 1, 11, 1,
				116	2, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
				117	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				118	{ 2, 1, 9, 4, 2, 9, 2, 3, 9, 1, 4, 9, 4, 1, 10, 9, 2, 10, 6, 3, 10, 2,
				119	4, 10, 7, 1, 11, 15, 2, 11, 10, 3, 11, 3, 4, 11, -1, 0, 0, -1, 0, 0, -1, 0,
				120	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				121	{ 2, 3, 9, 3, 4, 9, 3, 5, 9, 1, 6, 9, 4, 3, 10, 8, 4, 10, 7, 5, 10, 2,
				122	6, 10, 6, 3, 11, 13, 4, 11, 12, 5, 11, 4, 6, 11, -1, 0, 0, -1, 0, 0, -1, 0,
				123	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				124	{ 1, 5, 9, 3, 6, 9, 3, 7, 9, 1, 8, 9, 3, 5, 10, 8, 6, 10, 8, 7, 10, 4,
				125	8, 10, 4, 5, 11, 12, 6, 11, 13, 7, 11, 6, 8, 11, -1, 0, 0, -1, 0, 0, -1, 0,
				126	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				127	{ 1, 7, 9, 3, 8, 9, 4, 9, 9, 2, 10, 9, 2, 7, 10, 6, 8, 10, 9, 9, 10, 4,
				128	10, 10, 3, 7, 11, 10, 8, 11, 15, 9, 11, 7, 10, 11, -1, 0, 0, -1, 0, 0, -1, 0,
				129	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
				130	{ 2, 10, 9, 4, 11, 9, 1, 9, 10, 6, 10, 10, 10, 11, 10, 1, 9, 11, 9, 10, 11, 16,
				131	11, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
				132	0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0} // n = 20
				133	};
				134
				135	// Returns the alpha value of a texel at position (x, y) from src.
				136	// (x, y) are assumed to be in the range [0, 12).
bsalomon	9880607	2014-12-12 15:11:17 -0800	[diff] [blame]	137	inline uint8_t GetAlpha(const uint8_t *src, size_t rowBytes, int x, int y) {
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	138	SkASSERT(x >= 0 && x < 12);
				139	SkASSERT(y >= 0 && y < 12);
				140	SkASSERT(rowBytes >= 12);
				141	return (src + yrowBytes + x);
				142	}
				143
bsalomon	9880607	2014-12-12 15:11:17 -0800	[diff] [blame]	144	inline uint8_t GetAlphaTranspose(const uint8_t *src, size_t rowBytes, int x, int y) {
krajcevski	10a350c	2014-07-29 07:24:58 -0700	[diff] [blame]	145	return GetAlpha(src, rowBytes, y, x);
				146	}
				147
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	148	// Output the 16 bytes stored in top and bottom and advance the pointer. The bytes
				149	// are stored as the integers are represented in memory, so they should be swapped
				150	// if necessary.
				151	static inline void send_packing(uint8_t** dst, const uint64_t top, const uint64_t bottom) {
				152	uint64_t* dst64 = reinterpret_cast<uint64_t>(dst);
				153	dst64[0] = top;
				154	dst64[1] = bottom;
				155	*dst += 16;
				156	}
				157
				158	// Compresses an ASTC block, by looking up the proper contributions from
				159	// k6x5To12x12Table and computing an index from the associated values.
bsalomon	9880607	2014-12-12 15:11:17 -0800	[diff] [blame]	160	typedef uint8_t (GetAlphaProc)(const uint8_t src, size_t rowBytes, int x, int y);
krajcevski	10a350c	2014-07-29 07:24:58 -0700	[diff] [blame]	161
				162	template<GetAlphaProc getAlphaProc>
bsalomon	9880607	2014-12-12 15:11:17 -0800	[diff] [blame]	163	static void compress_a8_astc_block(uint8_t** dst, const uint8_t* src, size_t rowBytes) {
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	164	// Check for single color
				165	bool constant = true;
				166	const uint32_t firstInt = (reinterpret_cast<const uint32_t>(src));
				167	for (int i = 0; i < 12; ++i) {
				168	const uint32_t rowInt = reinterpret_cast<const uint32_t >(src + i*rowBytes);
				169	constant = constant && (rowInt[0] == firstInt);
				170	constant = constant && (rowInt[1] == firstInt);
				171	constant = constant && (rowInt[2] == firstInt);
				172	}
				173
				174	if (constant) {
				175	if (0 == firstInt) {
				176	// All of the indices are set to zero, and the colors are
				177	// v0 = 0, v1 = 255, so everything will be transparent.
				178	send_packing(dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
				179	return;
				180	} else if (0xFFFFFFFF == firstInt) {
				181	// All of the indices are set to zero, and the colors are
				182	// v0 = 255, v1 = 0, so everything will be opaque.
				183	send_packing(dst, SkTEndian_SwapLE64(0x000000000001FE0173ULL), 0);
				184	return;
				185	}
				186	}
				187
				188	uint8_t indices[30]; // 6x5 index grid
				189	for (int idx = 0; idx < 30; ++idx) {
				190	int weightTot = 0;
				191	int alphaTot = 0;
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	192	for (int w = 0; w < 20; ++w) {
krajcevski	4881a4d	2014-07-25 10:23:42 -0700	[diff] [blame]	193	const int8_t weight = k6x5To12x12Table[idx][w*3];
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	194	if (weight > 0) {
				195	const int x = k6x5To12x12Table[idx][w*3 + 1];
				196	const int y = k6x5To12x12Table[idx][w*3 + 2];
				197	weightTot += weight;
krajcevski	10a350c	2014-07-29 07:24:58 -0700	[diff] [blame]	198	alphaTot += weight * getAlphaProc(src, rowBytes, x, y);
krajcevski	4881a4d	2014-07-25 10:23:42 -0700	[diff] [blame]	199	} else {
				200	// In our table, not every entry has 20 weights, and all
				201	// of them are nonzero. Once we hit a negative weight, we
				202	// know that all of the other weights are not valid either.
				203	break;
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	204	}
				205	}
				206
krajcevski	4881a4d	2014-07-25 10:23:42 -0700	[diff] [blame]	207	indices[idx] = (alphaTot / weightTot) >> 5;
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	208	}
				209
				210	// Pack indices... The ASTC block layout is fairly complicated. An extensive
				211	// description can be found here:
				212	// https://www.opengl.org/registry/specs/KHR/texture_compression_astc_hdr.txt
				213	//
				214	// Here is a summary of the options that we've chosen:
				215	// 1. Block mode: 0b00101110011
				216	// - 6x5 texel grid
				217	// - Single plane
				218	// - Low-precision index values
				219	// - Index range 0-7 (three bits per index)
				220	// 2. Partitions: 0b00
				221	// - One partition
				222	// 3. Color Endpoint Mode: 0b0000
				223	// - Direct luminance -- e0=(v0,v0,v0,0xFF); e1=(v1,v1,v1,0xFF);
				224	// 4. 8-bit endpoints:
				225	// v0 = 0, v1 = 255
				226	//
				227	// The rest of the block contains the 30 index values from before, which
				228	// are currently stored in the indices variable.
				229
				230	uint64_t top = 0x0000000001FE000173ULL;
				231	uint64_t bottom = 0;
				232
				233	for (int idx = 0; idx <= 20; ++idx) {
krajcevski	4881a4d	2014-07-25 10:23:42 -0700	[diff] [blame]	234	const uint8_t index = indices[idx];
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	235	bottom \|= static_cast<uint64_t>(index) << (61-(idx*3));
				236	}
				237
				238	// index 21 straddles top and bottom
				239	{
krajcevski	4881a4d	2014-07-25 10:23:42 -0700	[diff] [blame]	240	const uint8_t index = indices[21];
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	241	bottom \|= index & 1;
				242	top \|= static_cast<uint64_t>((index >> 2) \| (index & 2)) << 62;
				243	}
				244
				245	for (int idx = 22; idx < 30; ++idx) {
krajcevski	4881a4d	2014-07-25 10:23:42 -0700	[diff] [blame]	246	const uint8_t index = indices[idx];
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	247	top \|= static_cast<uint64_t>(index) << (59-(idx-22)*3);
				248	}
				249
				250	// Reverse each 3-bit index since indices are read in reverse order...
				251	uint64_t t = (bottom ^ (bottom >> 2)) & 0x2492492492492492ULL;
				252	bottom = bottom ^ t ^ (t << 2);
				253
				254	t = (top ^ (top >> 2)) & 0x0924924000000000ULL;
				255	top = top ^ t ^ (t << 2);
				256
				257	send_packing(dst, SkEndian_SwapLE64(top), SkEndian_SwapLE64(bottom));
				258	}
				259
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	260	inline void CompressA8ASTCBlockVertical(uint8_t* dst, const uint8_t* src) {
krajcevski	10a350c	2014-07-29 07:24:58 -0700	[diff] [blame]	261	compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
				262	}
				263
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	264	////////////////////////////////////////////////////////////////////////////////
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	265	//
				266	// ASTC Decoder
				267	//
				268	// Full details available in the spec:
				269	// http://www.khronos.org/registry/gles/extensions/OES/OES_texture_compression_astc.txt
				270	//
				271	////////////////////////////////////////////////////////////////////////////////
				272
				273	// Enable this to assert whenever a decoded block has invalid ASTC values. Otherwise,
				274	// each invalid block will result in a disgusting magenta color.
				275	#define ASSERT_ASTC_DECODE_ERROR 0
				276
				277	// Reverse 64-bit integer taken from TAOCP 4a, although it's better
				278	// documented at this site:
				279	// http://matthewarcus.wordpress.com/2012/11/18/reversing-a-64-bit-word/
				280
				281	template <typename T, T m, int k>
				282	static inline T swap_bits(T p) {
				283	T q = ((p>>k)^p) & m;
				284	return p^q^(q<<k);
				285	}
				286
				287	static inline uint64_t reverse64(uint64_t n) {
halcanary	dea60f6	2014-08-12 09:28:57 -0700	[diff] [blame]	288	static const uint64_t m0 = 0x5555555555555555ULL;
				289	static const uint64_t m1 = 0x0300c0303030c303ULL;
				290	static const uint64_t m2 = 0x00c0300c03f0003fULL;
				291	static const uint64_t m3 = 0x00000ffc00003fffULL;
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	292	n = ((n>>1)&m0) \| (n&m0)<<1;
				293	n = swap_bits<uint64_t, m1, 4>(n);
				294	n = swap_bits<uint64_t, m2, 8>(n);
				295	n = swap_bits<uint64_t, m3, 20>(n);
				296	n = (n >> 34) \| (n << 30);
				297	return n;
				298	}
				299
				300	// An ASTC block is 128 bits. We represent it as two 64-bit integers in order
				301	// to efficiently operate on the block using bitwise operations.
				302	struct ASTCBlock {
				303	uint64_t fLow;
				304	uint64_t fHigh;
				305
				306	// Reverses the bits of an ASTC block, making the LSB of the
				307	// 128 bit block the MSB.
				308	inline void reverse() {
				309	const uint64_t newLow = reverse64(this->fHigh);
				310	this->fHigh = reverse64(this->fLow);
				311	this->fLow = newLow;
				312	}
				313	};
				314
				315	// Writes the given color to every pixel in the block. This is used by void-extent
				316	// blocks (a special constant-color encoding of a block) and by the error function.
				317	static inline void write_constant_color(uint8_t* dst, int blockDimX, int blockDimY,
				318	int dstRowBytes, SkColor color) {
				319	for (int y = 0; y < blockDimY; ++y) {
				320	SkColor dstColors = reinterpret_cast<SkColor>(dst);
				321	for (int x = 0; x < blockDimX; ++x) {
				322	dstColors[x] = color;
				323	}
				324	dst += dstRowBytes;
				325	}
				326	}
				327
				328	// Sets the entire block to the ASTC "error" color, a disgusting magenta
				329	// that's not supposed to appear in natural images.
				330	static inline void write_error_color(uint8_t* dst, int blockDimX, int blockDimY,
				331	int dstRowBytes) {
				332	static const SkColor kASTCErrorColor = SkColorSetRGB(0xFF, 0, 0xFF);
				333
				334	#if ASSERT_ASTC_DECODE_ERROR
				335	SkDEBUGFAIL("ASTC decoding error!\n");
				336	#endif
				337
				338	write_constant_color(dst, blockDimX, blockDimY, dstRowBytes, kASTCErrorColor);
				339	}
				340
				341	// Reads up to 64 bits of the ASTC block starting from bit
				342	// 'from' and going up to but not including bit 'to'. 'from' starts
				343	// counting from the LSB, counting up to the MSB. Returns -1 on
				344	// error.
				345	static uint64_t read_astc_bits(const ASTCBlock &block, int from, int to) {
				346	SkASSERT(0 <= from && from <= 128);
				347	SkASSERT(0 <= to && to <= 128);
				348
				349	const int nBits = to - from;
				350	if (0 == nBits) {
				351	return 0;
				352	}
				353
				354	if (nBits < 0 \|\| 64 <= nBits) {
				355	SkDEBUGFAIL("ASTC -- shouldn't read more than 64 bits");
				356	return -1;
				357	}
				358
				359	// Remember, the 'to' bit isn't read.
				360	uint64_t result = 0;
				361	if (to <= 64) {
				362	// All desired bits are in the low 64-bits.
				363	result = (block.fLow >> from) & ((1ULL << nBits) - 1);
				364	} else if (from >= 64) {
				365	// All desired bits are in the high 64-bits.
				366	result = (block.fHigh >> (from - 64)) & ((1ULL << nBits) - 1);
				367	} else {
				368	// from < 64 && to > 64
				369	SkASSERT(nBits > (64 - from));
				370	const int nLow = 64 - from;
				371	const int nHigh = nBits - nLow;
				372	result =
				373	((block.fLow >> from) & ((1ULL << nLow) - 1)) \|
				374	((block.fHigh & ((1ULL << nHigh) - 1)) << nLow);
				375	}
				376
				377	return result;
				378	}
				379
				380	// Returns the number of bits needed to represent a number
				381	// in the given power-of-two range (excluding the power of two itself).
				382	static inline int bits_for_range(int x) {
				383	SkASSERT(SkIsPow2(x));
				384	SkASSERT(0 != x);
				385	// Since we know it's a power of two, there should only be one bit set,
				386	// meaning the number of trailing zeros is 31 minus the number of leading
				387	// zeros.
				388	return 31 - SkCLZ(x);
				389	}
				390
				391	// Clamps an integer to the range [0, 255]
				392	static inline int clamp_byte(int x) {
				393	return SkClampMax(x, 255);
				394	}
				395
				396	// Helper function defined in the ASTC spec, section C.2.14
				397	// It transfers a few bits of precision from one value to another.
				398	static inline void bit_transfer_signed(int a, int b) {
				399	*b >>= 1;
				400	b \|= a & 0x80;
				401	*a >>= 1;
				402	*a &= 0x3F;
				403	if ( (*a & 0x20) != 0 ) {
				404	*a -= 0x40;
				405	}
				406	}
				407
				408	// Helper function defined in the ASTC spec, section C.2.14
				409	// It uses the value in the blue channel to tint the red and green
				410	static inline SkColor blue_contract(int a, int r, int g, int b) {
				411	return SkColorSetARGB(a, (r + b) >> 1, (g + b) >> 1, b);
				412	}
				413
				414	// Helper function that decodes two colors from eight values. If isRGB is true,
				415	// then the pointer 'v' contains six values and the last two are considered to be
				416	// 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
				417	// corresponds to the decode procedure for the following endpoint modes:
				418	// kLDR_RGB_Direct_ColorEndpointMode
				419	// kLDR_RGBA_Direct_ColorEndpointMode
				420	static inline void decode_rgba_direct(const int v, SkColor endpoints, bool isRGB) {
				421
				422	int v6 = 0xFF;
				423	int v7 = 0xFF;
				424	if (!isRGB) {
				425	v6 = v[6];
				426	v7 = v[7];
				427	}
				428
				429	const int s0 = v[0] + v[2] + v[4];
				430	const int s1 = v[1] + v[3] + v[5];
				431
				432	if (s1 >= s0) {
				433	endpoints[0] = SkColorSetARGB(v6, v[0], v[2], v[4]);
				434	endpoints[1] = SkColorSetARGB(v7, v[1], v[3], v[5]);
				435	} else {
				436	endpoints[0] = blue_contract(v7, v[1], v[3], v[5]);
				437	endpoints[1] = blue_contract(v6, v[0], v[2], v[4]);
				438	}
				439	}
				440
				441	// Helper function that decodes two colors from six values. If isRGB is true,
				442	// then the pointer 'v' contains four values and the last two are considered to be
				443	// 0xFF. If isRGB is false, then all six values come from the pointer 'v'. This
				444	// corresponds to the decode procedure for the following endpoint modes:
				445	// kLDR_RGB_BaseScale_ColorEndpointMode
				446	// kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode
				447	static inline void decode_rgba_basescale(const int v, SkColor endpoints, bool isRGB) {
				448
				449	int v4 = 0xFF;
				450	int v5 = 0xFF;
				451	if (!isRGB) {
				452	v4 = v[4];
				453	v5 = v[5];
				454	}
				455
				456	endpoints[0] = SkColorSetARGB(v4,
				457	(v[0]*v[3]) >> 8,
				458	(v[1]*v[3]) >> 8,
				459	(v[2]*v[3]) >> 8);
				460	endpoints[1] = SkColorSetARGB(v5, v[0], v[1], v[2]);
				461	}
				462
				463	// Helper function that decodes two colors from eight values. If isRGB is true,
				464	// then the pointer 'v' contains six values and the last two are considered to be
				465	// 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
				466	// corresponds to the decode procedure for the following endpoint modes:
				467	// kLDR_RGB_BaseOffset_ColorEndpointMode
				468	// kLDR_RGBA_BaseOffset_ColorEndpointMode
				469	//
				470	// If isRGB is true, then treat this as if v6 and v7 are meant to encode full alpha values.
				471	static inline void decode_rgba_baseoffset(const int v, SkColor endpoints, bool isRGB) {
				472	int v0 = v[0];
				473	int v1 = v[1];
				474	int v2 = v[2];
				475	int v3 = v[3];
				476	int v4 = v[4];
				477	int v5 = v[5];
				478	int v6 = isRGB ? 0xFF : v[6];
				479	// The 0 is here because this is an offset, not a direct value
				480	int v7 = isRGB ? 0 : v[7];
				481
				482	bit_transfer_signed(&v1, &v0);
				483	bit_transfer_signed(&v3, &v2);
				484	bit_transfer_signed(&v5, &v4);
				485	if (!isRGB) {
				486	bit_transfer_signed(&v7, &v6);
				487	}
				488
				489	int c[2][4];
				490	if ((v1 + v3 + v5) >= 0) {
				491	c[0][0] = v6;
				492	c[0][1] = v0;
				493	c[0][2] = v2;
				494	c[0][3] = v4;
				495
				496	c[1][0] = v6 + v7;
				497	c[1][1] = v0 + v1;
				498	c[1][2] = v2 + v3;
				499	c[1][3] = v4 + v5;
				500	} else {
				501	c[0][0] = v6 + v7;
				502	c[0][1] = (v0 + v1 + v4 + v5) >> 1;
				503	c[0][2] = (v2 + v3 + v4 + v5) >> 1;
				504	c[0][3] = v4 + v5;
				505
				506	c[1][0] = v6;
				507	c[1][1] = (v0 + v4) >> 1;
				508	c[1][2] = (v2 + v4) >> 1;
				509	c[1][3] = v4;
				510	}
				511
				512	endpoints[0] = SkColorSetARGB(clamp_byte(c[0][0]),
				513	clamp_byte(c[0][1]),
				514	clamp_byte(c[0][2]),
				515	clamp_byte(c[0][3]));
				516
				517	endpoints[1] = SkColorSetARGB(clamp_byte(c[1][0]),
				518	clamp_byte(c[1][1]),
				519	clamp_byte(c[1][2]),
				520	clamp_byte(c[1][3]));
				521	}
				522
				523
				524	// A helper class used to decode bit values from standard integer values.
				525	// We can't use this class with ASTCBlock because then it would need to
				526	// handle multi-value ranges, and it's non-trivial to lookup a range of bits
				527	// that splits across two different ints.
				528	template <typename T>
				529	class SkTBits {
				530	public:
				531	SkTBits(const T val) : fVal(val) { }
				532
				533	// Returns the bit at the given position
				534	T operator [](const int idx) const {
				535	return (fVal >> idx) & 1;
				536	}
				537
				538	// Returns the bits in the given range, inclusive
				539	T operator ()(const int end, const int start) const {
				540	SkASSERT(end >= start);
				541	return (fVal >> start) & ((1ULL << ((end - start) + 1)) - 1);
				542	}
				543
				544	private:
				545	const T fVal;
				546	};
				547
				548	// This algorithm matches the trit block decoding in the spec (Table C.2.14)
				549	static void decode_trit_block(int* dst, int nBits, const uint64_t &block) {
				550
				551	SkTBits<uint64_t> blockBits(block);
				552
				553	// According to the spec, a trit block, which contains five values,
				554	// has the following layout:
				555	//
				556	// 27 26 25 24 23 22 21 20 19 18 17 16
				557	// -----------------------------------------------
				558	// \|T7 \| m4 \|T6 T5 \| m3 \|T4 \|
				559	// -----------------------------------------------
				560	//
				561	// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
				562	// --------------------------------------------------------------
				563	// \| m2 \|T3 T2 \| m1 \|T1 T0 \| m0 \|
				564	// --------------------------------------------------------------
				565	//
				566	// Where the m's are variable width depending on the number of bits used
				567	// to encode the values (anywhere from 0 to 6). Since 3^5 = 243, the extra
				568	// byte labeled T (whose bits are interleaved where 0 is the LSB and 7 is
				569	// the MSB), contains five trit values. To decode the trit values, the spec
				570	// says that we need to follow the following algorithm:
				571	//
				572	// if T[4:2] = 111
				573	// C = { T[7:5], T[1:0] }; t4 = t3 = 2
				574	// else
				575	// C = T[4:0]
				576	//
				577	// if T[6:5] = 11
				578	// t4 = 2; t3 = T[7]
				579	// else
				580	// t4 = T[7]; t3 = T[6:5]
				581	//
				582	// if C[1:0] = 11
				583	// t2 = 2; t1 = C[4]; t0 = { C[3], C[2]&~C[3] }
				584	// else if C[3:2] = 11
				585	// t2 = 2; t1 = 2; t0 = C[1:0]
				586	// else
				587	// t2 = C[4]; t1 = C[3:2]; t0 = { C[1], C[0]&~C[1] }
				588	//
				589	// The following C++ code is meant to mirror this layout and algorithm as
				590	// closely as possible.
				591
				592	int m[5];
				593	if (0 == nBits) {
				594	memset(m, 0, sizeof(m));
				595	} else {
				596	SkASSERT(nBits < 8);
				597	m[0] = static_cast<int>(blockBits(nBits - 1, 0));
				598	m[1] = static_cast<int>(blockBits(2*nBits - 1 + 2, nBits + 2));
				599	m[2] = static_cast<int>(blockBits(3nBits - 1 + 4, 2nBits + 4));
				600	m[3] = static_cast<int>(blockBits(4nBits - 1 + 5, 3nBits + 5));
				601	m[4] = static_cast<int>(blockBits(5nBits - 1 + 7, 4nBits + 7));
				602	}
				603
				604	int T =
				605	static_cast<int>(blockBits(nBits + 1, nBits)) \|
				606	(static_cast<int>(blockBits(2nBits + 2 + 1, 2nBits + 2)) << 2) \|
				607	(static_cast<int>(blockBits[3*nBits + 4] << 4)) \|
				608	(static_cast<int>(blockBits(4nBits + 5 + 1, 4nBits + 5)) << 5) \|
				609	(static_cast<int>(blockBits[5*nBits + 7] << 7));
				610
				611	int t[5];
				612
				613	int C;
				614	SkTBits<int> Tbits(T);
				615	if (0x7 == Tbits(4, 2)) {
				616	C = (Tbits(7, 5) << 2) \| Tbits(1, 0);
				617	t[3] = t[4] = 2;
				618	} else {
				619	C = Tbits(4, 0);
				620	if (Tbits(6, 5) == 0x3) {
				621	t[4] = 2; t[3] = Tbits[7];
				622	} else {
				623	t[4] = Tbits[7]; t[3] = Tbits(6, 5);
				624	}
				625	}
				626
				627	SkTBits<int> Cbits(C);
				628	if (Cbits(1, 0) == 0x3) {
				629	t[2] = 2;
				630	t[1] = Cbits[4];
				631	t[0] = (Cbits[3] << 1) \| (Cbits[2] & (0x1 & ~(Cbits[3])));
				632	} else if (Cbits(3, 2) == 0x3) {
				633	t[2] = 2;
				634	t[1] = 2;
				635	t[0] = Cbits(1, 0);
				636	} else {
				637	t[2] = Cbits[4];
				638	t[1] = Cbits(3, 2);
				639	t[0] = (Cbits[1] << 1) \| (Cbits[0] & (0x1 & ~(Cbits[1])));
				640	}
				641
				642	#ifdef SK_DEBUG
				643	// Make sure all of the decoded values have a trit less than three
				644	// and a bit value within the range of the allocated bits.
				645	for (int i = 0; i < 5; ++i) {
				646	SkASSERT(t[i] < 3);
				647	SkASSERT(m[i] < (1 << nBits));
				648	}
				649	#endif
				650
				651	for (int i = 0; i < 5; ++i) {
				652	*dst = (t[i] << nBits) + m[i];
				653	++dst;
				654	}
				655	}
				656
				657	// This algorithm matches the quint block decoding in the spec (Table C.2.15)
				658	static void decode_quint_block(int* dst, int nBits, const uint64_t &block) {
				659	SkTBits<uint64_t> blockBits(block);
				660
				661	// According to the spec, a quint block, which contains three values,
				662	// has the following layout:
				663	//
				664	//
				665	// 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
				666	// --------------------------------------------------------------------------
				667	// \|Q6 Q5 \| m2 \|Q4 Q3 \| m1 \|Q2 Q1 Q0 \| m0 \|
				668	// --------------------------------------------------------------------------
				669	//
				670	// Where the m's are variable width depending on the number of bits used
				671	// to encode the values (anywhere from 0 to 4). Since 5^3 = 125, the extra
				672	// 7-bit value labeled Q (whose bits are interleaved where 0 is the LSB and 6 is
				673	// the MSB), contains three quint values. To decode the quint values, the spec
				674	// says that we need to follow the following algorithm:
				675	//
				676	// if Q[2:1] = 11 and Q[6:5] = 00
				677	// q2 = { Q[0], Q[4]&~Q[0], Q[3]&~Q[0] }; q1 = q0 = 4
				678	// else
				679	// if Q[2:1] = 11
				680	// q2 = 4; C = { Q[4:3], ~Q[6:5], Q[0] }
				681	// else
				682	// q2 = T[6:5]; C = Q[4:0]
				683	//
				684	// if C[2:0] = 101
				685	// q1 = 4; q0 = C[4:3]
				686	// else
				687	// q1 = C[4:3]; q0 = C[2:0]
				688	//
				689	// The following C++ code is meant to mirror this layout and algorithm as
				690	// closely as possible.
				691
				692	int m[3];
				693	if (0 == nBits) {
				694	memset(m, 0, sizeof(m));
				695	} else {
				696	SkASSERT(nBits < 8);
				697	m[0] = static_cast<int>(blockBits(nBits - 1, 0));
				698	m[1] = static_cast<int>(blockBits(2*nBits - 1 + 3, nBits + 3));
				699	m[2] = static_cast<int>(blockBits(3nBits - 1 + 5, 2nBits + 5));
				700	}
				701
				702	int Q =
				703	static_cast<int>(blockBits(nBits + 2, nBits)) \|
				704	(static_cast<int>(blockBits(2nBits + 3 + 1, 2nBits + 3)) << 3) \|
				705	(static_cast<int>(blockBits(3nBits + 5 + 1, 3nBits + 5)) << 5);
				706
				707	int q[3];
				708	SkTBits<int> Qbits(Q); // quantum?
				709
				710	if (Qbits(2, 1) == 0x3 && Qbits(6, 5) == 0) {
				711	const int notBitZero = (0x1 & ~(Qbits[0]));
				712	q[2] = (Qbits[0] << 2) \| ((Qbits[4] & notBitZero) << 1) \| (Qbits[3] & notBitZero);
				713	q[1] = 4;
				714	q[0] = 4;
				715	} else {
				716	int C;
				717	if (Qbits(2, 1) == 0x3) {
				718	q[2] = 4;
				719	C = (Qbits(4, 3) << 3) \| ((0x3 & ~(Qbits(6, 5))) << 1) \| Qbits[0];
				720	} else {
				721	q[2] = Qbits(6, 5);
				722	C = Qbits(4, 0);
				723	}
				724
				725	SkTBits<int> Cbits(C);
				726	if (Cbits(2, 0) == 0x5) {
				727	q[1] = 4;
				728	q[0] = Cbits(4, 3);
				729	} else {
				730	q[1] = Cbits(4, 3);
				731	q[0] = Cbits(2, 0);
				732	}
				733	}
				734
				735	#ifdef SK_DEBUG
				736	for (int i = 0; i < 3; ++i) {
				737	SkASSERT(q[i] < 5);
				738	SkASSERT(m[i] < (1 << nBits));
				739	}
				740	#endif
				741
				742	for (int i = 0; i < 3; ++i) {
				743	*dst = (q[i] << nBits) + m[i];
				744	++dst;
				745	}
				746	}
				747
				748	// Function that decodes a sequence of integers stored as an ISE (Integer
				749	// Sequence Encoding) bit stream. The full details of this function are outlined
				750	// in section C.2.12 of the ASTC spec. A brief overview is as follows:
				751	//
				752	// - Each integer in the sequence is bounded by a specific range r.
				753	// - The range of each value determines the way the bit stream is interpreted,
				754	// - If the range is a power of two, then the sequence is a sequence of bits
				755	// - If the range is of the form 3*2^n, then the sequence is stored as a
				756	// sequence of blocks, each block contains 5 trits and 5 bit sequences, which
				757	// decodes into 5 values.
				758	// - Similarly, if the range is of the form 5*2^n, then the sequence is stored as a
				759	// sequence of blocks, each block contains 3 quints and 3 bit sequences, which
				760	// decodes into 3 values.
				761	static bool decode_integer_sequence(
				762	int* dst, // The array holding the destination bits
				763	int dstSize, // The maximum size of the array
				764	int nVals, // The number of values that we'd like to decode
				765	const ASTCBlock &block, // The block that we're decoding from
				766	int startBit, // The bit from which we're going to do the reading
				767	int endBit, // The bit at which we stop reading (not inclusive)
				768	bool bReadForward, // If true, then read LSB -> MSB, else read MSB -> LSB
				769	int nBits, // The number of bits representing this encoding
				770	int nTrits, // The number of trits representing this encoding
				771	int nQuints // The number of quints representing this encoding
				772	) {
				773	// If we want more values than we have, then fail.
				774	if (nVals > dstSize) {
				775	return false;
				776	}
				777
				778	ASTCBlock src = block;
				779
				780	if (!bReadForward) {
				781	src.reverse();
				782	startBit = 128 - startBit;
				783	endBit = 128 - endBit;
				784	}
				785
				786	while (nVals > 0) {
				787
				788	if (nTrits > 0) {
				789	SkASSERT(0 == nQuints);
				790
				791	int endBlockBit = startBit + 8 + 5*nBits;
				792	if (endBlockBit > endBit) {
				793	endBlockBit = endBit;
				794	}
				795
krajcevski	95b1b3d	2014-08-07 12:58:38 -0700	[diff] [blame]	796	// Trit blocks are three values large.
				797	int trits[5];
				798	decode_trit_block(trits, nBits, read_astc_bits(src, startBit, endBlockBit));
				799	memcpy(dst, trits, SkMin32(nVals, 5)*sizeof(int));
				800
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	801	dst += 5;
				802	nVals -= 5;
				803	startBit = endBlockBit;
				804
				805	} else if (nQuints > 0) {
				806	SkASSERT(0 == nTrits);
				807
				808	int endBlockBit = startBit + 7 + 3*nBits;
				809	if (endBlockBit > endBit) {
				810	endBlockBit = endBit;
				811	}
				812
krajcevski	95b1b3d	2014-08-07 12:58:38 -0700	[diff] [blame]	813	// Quint blocks are three values large
				814	int quints[3];
				815	decode_quint_block(quints, nBits, read_astc_bits(src, startBit, endBlockBit));
				816	memcpy(dst, quints, SkMin32(nVals, 3)*sizeof(int));
				817
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	818	dst += 3;
				819	nVals -= 3;
				820	startBit = endBlockBit;
				821
				822	} else {
				823	// Just read the bits, but don't read more than we have...
				824	int endValBit = startBit + nBits;
				825	if (endValBit > endBit) {
				826	endValBit = endBit;
				827	}
				828
				829	SkASSERT(endValBit - startBit < 31);
				830	*dst = static_cast<int>(read_astc_bits(src, startBit, endValBit));
				831	++dst;
				832	--nVals;
				833	startBit = endValBit;
				834	}
				835	}
				836
				837	return true;
				838	}
				839
				840	// Helper function that unquantizes some (seemingly random) generated
				841	// numbers... meant to match the ASTC hardware. This function is used
				842	// to unquantize both colors (Table C.2.16) and weights (Table C.2.26)
				843	static inline int unquantize_value(unsigned mask, int A, int B, int C, int D) {
				844	int T = D * C + B;
				845	T = T ^ A;
				846	T = (A & mask) \| (T >> 2);
				847	SkASSERT(T < 256);
				848	return T;
				849	}
				850
				851	// Helper function to replicate the bits in x that represents an oldPrec
				852	// precision integer into a prec precision integer. For example:
				853	// 255 == replicate_bits(7, 3, 8);
				854	static inline int replicate_bits(int x, int oldPrec, int prec) {
				855	while (oldPrec < prec) {
				856	const int toShift = SkMin32(prec-oldPrec, oldPrec);
				857	x = (x << toShift) \| (x >> (oldPrec - toShift));
				858	oldPrec += toShift;
				859	}
				860
				861	// Make sure that no bits are set outside the desired precision.
				862	SkASSERT((-(1 << prec) & x) == 0);
				863	return x;
				864	}
				865
				866	// Returns the unquantized value of a color that's represented only as
				867	// a set of bits.
				868	static inline int unquantize_bits_color(int val, int nBits) {
				869	return replicate_bits(val, nBits, 8);
				870	}
				871
				872	// Returns the unquantized value of a color that's represented as a
				873	// trit followed by nBits bits. This algorithm follows the sequence
				874	// defined in section C.2.13 of the ASTC spec.
				875	static inline int unquantize_trit_color(int val, int nBits) {
				876	SkASSERT(nBits > 0);
				877	SkASSERT(nBits < 7);
				878
				879	const int D = (val >> nBits) & 0x3;
				880	SkASSERT(D < 3);
				881
				882	const int A = -(val & 0x1) & 0x1FF;
				883
				884	static const int Cvals[6] = { 204, 93, 44, 22, 11, 5 };
				885	const int C = Cvals[nBits - 1];
				886
				887	int B = 0;
				888	const SkTBits<int> valBits(val);
				889	switch (nBits) {
				890	case 1:
				891	B = 0;
				892	break;
				893
				894	case 2: {
				895	const int b = valBits[1];
				896	B = (b << 1) \| (b << 2) \| (b << 4) \| (b << 8);
				897	}
				898	break;
				899
				900	case 3: {
				901	const int cb = valBits(2, 1);
				902	B = cb \| (cb << 2) \| (cb << 7);
				903	}
				904	break;
				905
				906	case 4: {
				907	const int dcb = valBits(3, 1);
				908	B = dcb \| (dcb << 6);
				909	}
				910	break;
				911
				912	case 5: {
				913	const int edcb = valBits(4, 1);
				914	B = (edcb << 5) \| (edcb >> 2);
				915	}
				916	break;
				917
				918	case 6: {
				919	const int fedcb = valBits(5, 1);
				920	B = (fedcb << 4) \| (fedcb >> 4);
				921	}
				922	break;
				923	}
				924
				925	return unquantize_value(0x80, A, B, C, D);
				926	}
				927
				928	// Returns the unquantized value of a color that's represented as a
				929	// quint followed by nBits bits. This algorithm follows the sequence
				930	// defined in section C.2.13 of the ASTC spec.
				931	static inline int unquantize_quint_color(int val, int nBits) {
				932	const int D = (val >> nBits) & 0x7;
				933	SkASSERT(D < 5);
				934
				935	const int A = -(val & 0x1) & 0x1FF;
				936
				937	static const int Cvals[5] = { 113, 54, 26, 13, 6 };
				938	SkASSERT(nBits > 0);
				939	SkASSERT(nBits < 6);
				940
				941	const int C = Cvals[nBits - 1];
				942
				943	int B = 0;
				944	const SkTBits<int> valBits(val);
				945	switch (nBits) {
				946	case 1:
				947	B = 0;
				948	break;
				949
				950	case 2: {
				951	const int b = valBits[1];
				952	B = (b << 2) \| (b << 3) \| (b << 8);
				953	}
				954	break;
				955
				956	case 3: {
				957	const int cb = valBits(2, 1);
				958	B = (cb >> 1) \| (cb << 1) \| (cb << 7);
				959	}
				960	break;
				961
				962	case 4: {
				963	const int dcb = valBits(3, 1);
				964	B = (dcb >> 1) \| (dcb << 6);
				965	}
				966	break;
				967
				968	case 5: {
				969	const int edcb = valBits(4, 1);
				970	B = (edcb << 5) \| (edcb >> 3);
				971	}
				972	break;
				973	}
				974
				975	return unquantize_value(0x80, A, B, C, D);
				976	}
				977
				978	// This algorithm takes a list of integers, stored in vals, and unquantizes them
				979	// in place. This follows the algorithm laid out in section C.2.13 of the ASTC spec.
				980	static void unquantize_colors(int *vals, int nVals, int nBits, int nTrits, int nQuints) {
				981	for (int i = 0; i < nVals; ++i) {
				982	if (nTrits > 0) {
				983	SkASSERT(nQuints == 0);
				984	vals[i] = unquantize_trit_color(vals[i], nBits);
				985	} else if (nQuints > 0) {
				986	SkASSERT(nTrits == 0);
				987	vals[i] = unquantize_quint_color(vals[i], nBits);
				988	} else {
				989	SkASSERT(nQuints == 0 && nTrits == 0);
				990	vals[i] = unquantize_bits_color(vals[i], nBits);
				991	}
				992	}
				993	}
				994
				995	// Returns an interpolated value between c0 and c1 based on the weight. This
				996	// follows the algorithm laid out in section C.2.19 of the ASTC spec.
				997	static int interpolate_channel(int c0, int c1, int weight) {
				998	SkASSERT(0 <= c0 && c0 < 256);
				999	SkASSERT(0 <= c1 && c1 < 256);
				1000
				1001	c0 = (c0 << 8) \| c0;
				1002	c1 = (c1 << 8) \| c1;
				1003
				1004	const int result = ((c0(64 - weight) + c1weight + 32) / 64) >> 8;
				1005
				1006	if (result > 255) {
				1007	return 255;
				1008	}
				1009
				1010	SkASSERT(result >= 0);
				1011	return result;
				1012	}
				1013
				1014	// Returns an interpolated color between the two endpoints based on the weight.
				1015	static SkColor interpolate_endpoints(const SkColor endpoints[2], int weight) {
				1016	return SkColorSetARGB(
				1017	interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight),
				1018	interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight),
				1019	interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight),
				1020	interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight));
				1021	}
				1022
				1023	// Returns an interpolated color between the two endpoints based on the weight.
				1024	// It uses separate weights for the channel depending on the value of the 'plane'
				1025	// variable. By default, all channels will use weight 0, and the value of plane
				1026	// means that weight1 will be used for:
				1027	// 0: red
				1028	// 1: green
				1029	// 2: blue
				1030	// 3: alpha
				1031	static SkColor interpolate_dual_endpoints(
				1032	const SkColor endpoints[2], int weight0, int weight1, int plane) {
				1033	int a = interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight0);
				1034	int r = interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight0);
				1035	int g = interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight0);
				1036	int b = interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight0);
				1037
				1038	switch (plane) {
				1039
				1040	case 0:
				1041	r = interpolate_channel(
				1042	SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight1);
				1043	break;
				1044
				1045	case 1:
				1046	g = interpolate_channel(
				1047	SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight1);
				1048	break;
				1049
				1050	case 2:
				1051	b = interpolate_channel(
				1052	SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight1);
				1053	break;
				1054
				1055	case 3:
				1056	a = interpolate_channel(
				1057	SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight1);
				1058	break;
				1059
				1060	default:
				1061	SkDEBUGFAIL("Plane should be 0-3");
				1062	break;
				1063	}
				1064
				1065	return SkColorSetARGB(a, r, g, b);
				1066	}
				1067
				1068	// A struct of decoded values that we use to carry around information
				1069	// about the block. dimX and dimY are the dimension in texels of the block,
				1070	// for which there is only a limited subset of valid values:
				1071	//
				1072	// 4x4, 5x4, 5x5, 6x5, 6x6, 8x5, 8x6, 8x8, 10x5, 10x6, 10x8, 10x10, 12x10, 12x12
				1073
				1074	struct ASTCDecompressionData {
				1075	ASTCDecompressionData(int dimX, int dimY) : fDimX(dimX), fDimY(dimY) { }
				1076	const int fDimX; // the X dimension of the decompressed block
				1077	const int fDimY; // the Y dimension of the decompressed block
				1078	ASTCBlock fBlock; // the block data
				1079	int fBlockMode; // the block header that contains the block mode.
				1080
				1081	bool fDualPlaneEnabled; // is this block compressing dual weight planes?
				1082	int fDualPlane; // the independent plane in dual plane mode.
				1083
				1084	bool fVoidExtent; // is this block a single color?
				1085	bool fError; // does this block have an error encoding?
				1086
				1087	int fWeightDimX; // the x dimension of the weight grid
				1088	int fWeightDimY; // the y dimension of the weight grid
				1089
				1090	int fWeightBits; // the number of bits used for each weight value
				1091	int fWeightTrits; // the number of trits used for each weight value
				1092	int fWeightQuints; // the number of quints used for each weight value
				1093
				1094	int fPartCount; // the number of partitions in this block
				1095	int fPartIndex; // the partition index: only relevant if fPartCount > 0
				1096
				1097	// CEM values can be anything in the range 0-15, and each corresponds to a different
				1098	// mode that represents the color data. We only support LDR modes.
				1099	enum ColorEndpointMode {
				1100	kLDR_Luminance_Direct_ColorEndpointMode = 0,
				1101	kLDR_Luminance_BaseOffset_ColorEndpointMode = 1,
				1102	kHDR_Luminance_LargeRange_ColorEndpointMode = 2,
				1103	kHDR_Luminance_SmallRange_ColorEndpointMode = 3,
				1104	kLDR_LuminanceAlpha_Direct_ColorEndpointMode = 4,
				1105	kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode = 5,
				1106	kLDR_RGB_BaseScale_ColorEndpointMode = 6,
				1107	kHDR_RGB_BaseScale_ColorEndpointMode = 7,
				1108	kLDR_RGB_Direct_ColorEndpointMode = 8,
				1109	kLDR_RGB_BaseOffset_ColorEndpointMode = 9,
				1110	kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode = 10,
				1111	kHDR_RGB_ColorEndpointMode = 11,
				1112	kLDR_RGBA_Direct_ColorEndpointMode = 12,
				1113	kLDR_RGBA_BaseOffset_ColorEndpointMode = 13,
				1114	kHDR_RGB_LDRAlpha_ColorEndpointMode = 14,
				1115	kHDR_RGB_HDRAlpha_ColorEndpointMode = 15
				1116	};
				1117	static const int kMaxColorEndpointModes = 16;
				1118
				1119	// the color endpoint modes for this block.
				1120	static const int kMaxPartitions = 4;
				1121	ColorEndpointMode fCEM[kMaxPartitions];
				1122
				1123	int fColorStartBit; // The bit position of the first bit of the color data
				1124	int fColorEndBit; // The bit position of the last possible bit of the color data
				1125
				1126	// Returns the number of partitions for this block.
				1127	int numPartitions() const {
				1128	return fPartCount;
				1129	}
				1130
				1131	// Returns the total number of weight values that are stored in this block
				1132	int numWeights() const {
				1133	return fWeightDimX * fWeightDimY * (fDualPlaneEnabled ? 2 : 1);
				1134	}
				1135
				1136	#ifdef SK_DEBUG
				1137	// Returns the maximum value that any weight can take. We really only use
				1138	// this function for debugging.
				1139	int maxWeightValue() const {
				1140	int maxVal = (1 << fWeightBits);
				1141	if (fWeightTrits > 0) {
				1142	SkASSERT(0 == fWeightQuints);
				1143	maxVal *= 3;
				1144	} else if (fWeightQuints > 0) {
				1145	SkASSERT(0 == fWeightTrits);
				1146	maxVal *= 5;
				1147	}
				1148	return maxVal - 1;
				1149	}
				1150	#endif
				1151
				1152	// The number of bits needed to represent the texel weight data. This
				1153	// comes from the 'data size determination' section of the ASTC spec (C.2.22)
				1154	int numWeightBits() const {
				1155	const int nWeights = this->numWeights();
				1156	return
				1157	((nWeights8fWeightTrits + 4) / 5) +
				1158	((nWeights7fWeightQuints + 2) / 3) +
				1159	(nWeights*fWeightBits);
				1160	}
				1161
				1162	// Returns the number of color values stored in this block. The number of
				1163	// values stored is directly a function of the color endpoint modes.
				1164	int numColorValues() const {
				1165	int numValues = 0;
				1166	for (int i = 0; i < this->numPartitions(); ++i) {
				1167	int cemInt = static_cast<int>(fCEM[i]);
				1168	numValues += ((cemInt >> 2) + 1) * 2;
				1169	}
				1170
				1171	return numValues;
				1172	}
				1173
				1174	// Figures out the number of bits available for color values, and fills
				1175	// in the maximum encoding that will fit the number of color values that
				1176	// we need. Returns false on error. (See section C.2.22 of the spec)
				1177	bool getColorValueEncoding(int nBits, int nTrits, int *nQuints) const {
				1178	if (NULL == nBits \|\| NULL == nTrits \|\| NULL == nQuints) {
				1179	return false;
				1180	}
				1181
				1182	const int nColorVals = this->numColorValues();
				1183	if (nColorVals <= 0) {
				1184	return false;
				1185	}
				1186
				1187	const int colorBits = fColorEndBit - fColorStartBit;
				1188	SkASSERT(colorBits > 0);
				1189
				1190	// This is the minimum amount of accuracy required by the spec.
				1191	if (colorBits < ((13 * nColorVals + 4) / 5)) {
				1192	return false;
				1193	}
				1194
				1195	// Values can be represented as at most 8-bit values.
				1196	// !SPEED! place this in a lookup table based on colorBits and nColorVals
				1197	for (int i = 255; i > 0; --i) {
				1198	int range = i + 1;
				1199	int bits = 0, trits = 0, quints = 0;
				1200	bool valid = false;
				1201	if (SkIsPow2(range)) {
				1202	bits = bits_for_range(range);
				1203	valid = true;
				1204	} else if ((range % 3) == 0 && SkIsPow2(range/3)) {
				1205	trits = 1;
				1206	bits = bits_for_range(range/3);
				1207	valid = true;
				1208	} else if ((range % 5) == 0 && SkIsPow2(range/5)) {
				1209	quints = 1;
				1210	bits = bits_for_range(range/5);
				1211	valid = true;
				1212	}
				1213
				1214	if (valid) {
				1215	const int actualColorBits =
				1216	((nColorVals8trits + 4) / 5) +
				1217	((nColorVals7quints + 2) / 3) +
				1218	(nColorVals*bits);
				1219	if (actualColorBits <= colorBits) {
				1220	*nTrits = trits;
				1221	*nQuints = quints;
				1222	*nBits = bits;
				1223	return true;
				1224	}
				1225	}
				1226	}
				1227
				1228	return false;
				1229	}
				1230
				1231	// Converts the sequence of color values into endpoints. The algorithm here
				1232	// corresponds to the values determined by section C.2.14 of the ASTC spec
				1233	void colorEndpoints(SkColor endpoints[4][2], const int* colorValues) const {
				1234	for (int i = 0; i < this->numPartitions(); ++i) {
				1235	switch (fCEM[i]) {
				1236	case kLDR_Luminance_Direct_ColorEndpointMode: {
				1237	const int* v = colorValues;
				1238	endpoints[i][0] = SkColorSetARGB(0xFF, v[0], v[0], v[0]);
				1239	endpoints[i][1] = SkColorSetARGB(0xFF, v[1], v[1], v[1]);
				1240
				1241	colorValues += 2;
				1242	}
				1243	break;
				1244
				1245	case kLDR_Luminance_BaseOffset_ColorEndpointMode: {
				1246	const int* v = colorValues;
				1247	const int L0 = (v[0] >> 2) \| (v[1] & 0xC0);
				1248	const int L1 = clamp_byte(L0 + (v[1] & 0x3F));
				1249
				1250	endpoints[i][0] = SkColorSetARGB(0xFF, L0, L0, L0);
				1251	endpoints[i][1] = SkColorSetARGB(0xFF, L1, L1, L1);
				1252
				1253	colorValues += 2;
				1254	}
				1255	break;
				1256
				1257	case kLDR_LuminanceAlpha_Direct_ColorEndpointMode: {
				1258	const int* v = colorValues;
				1259
				1260	endpoints[i][0] = SkColorSetARGB(v[2], v[0], v[0], v[0]);
				1261	endpoints[i][1] = SkColorSetARGB(v[3], v[1], v[1], v[1]);
				1262
				1263	colorValues += 4;
				1264	}
				1265	break;
				1266
				1267	case kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode: {
				1268	int v0 = colorValues[0];
				1269	int v1 = colorValues[1];
				1270	int v2 = colorValues[2];
				1271	int v3 = colorValues[3];
				1272
				1273	bit_transfer_signed(&v1, &v0);
				1274	bit_transfer_signed(&v3, &v2);
				1275
				1276	endpoints[i][0] = SkColorSetARGB(v2, v0, v0, v0);
				1277	endpoints[i][1] = SkColorSetARGB(
				1278	clamp_byte(v3+v2),
				1279	clamp_byte(v1+v0),
				1280	clamp_byte(v1+v0),
				1281	clamp_byte(v1+v0));
				1282
				1283	colorValues += 4;
				1284	}
				1285	break;
				1286
				1287	case kLDR_RGB_BaseScale_ColorEndpointMode: {
				1288	decode_rgba_basescale(colorValues, endpoints[i], true);
				1289	colorValues += 4;
				1290	}
				1291	break;
				1292
				1293	case kLDR_RGB_Direct_ColorEndpointMode: {
				1294	decode_rgba_direct(colorValues, endpoints[i], true);
				1295	colorValues += 6;
				1296	}
				1297	break;
				1298
				1299	case kLDR_RGB_BaseOffset_ColorEndpointMode: {
				1300	decode_rgba_baseoffset(colorValues, endpoints[i], true);
				1301	colorValues += 6;
				1302	}
				1303	break;
				1304
				1305	case kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode: {
				1306	decode_rgba_basescale(colorValues, endpoints[i], false);
				1307	colorValues += 6;
				1308	}
				1309	break;
				1310
				1311	case kLDR_RGBA_Direct_ColorEndpointMode: {
				1312	decode_rgba_direct(colorValues, endpoints[i], false);
				1313	colorValues += 8;
				1314	}
				1315	break;
				1316
				1317	case kLDR_RGBA_BaseOffset_ColorEndpointMode: {
				1318	decode_rgba_baseoffset(colorValues, endpoints[i], false);
				1319	colorValues += 8;
				1320	}
				1321	break;
				1322
				1323	default:
				1324	SkDEBUGFAIL("HDR mode unsupported! This should be caught sooner.");
				1325	break;
				1326	}
				1327	}
				1328	}
				1329
				1330	// Follows the procedure from section C.2.17 of the ASTC specification
				1331	int unquantizeWeight(int x) const {
				1332	SkASSERT(x <= this->maxWeightValue());
				1333
				1334	const int D = (x >> fWeightBits) & 0x7;
				1335	const int A = -(x & 0x1) & 0x7F;
				1336
				1337	SkTBits<int> xbits(x);
				1338
				1339	int T = 0;
				1340	if (fWeightTrits > 0) {
				1341	SkASSERT(0 == fWeightQuints);
				1342	switch (fWeightBits) {
				1343	case 0: {
				1344	// x is a single trit
				1345	SkASSERT(x < 3);
				1346
				1347	static const int kUnquantizationTable[3] = { 0, 32, 63 };
				1348	T = kUnquantizationTable[x];
				1349	}
				1350	break;
				1351
				1352	case 1: {
				1353	const int B = 0;
				1354	const int C = 50;
				1355	T = unquantize_value(0x20, A, B, C, D);
				1356	}
				1357	break;
				1358
				1359	case 2: {
				1360	const int b = xbits[1];
				1361	const int B = b \| (b << 2) \| (b << 6);
				1362	const int C = 23;
				1363	T = unquantize_value(0x20, A, B, C, D);
				1364	}
				1365	break;
				1366
				1367	case 3: {
				1368	const int cb = xbits(2, 1);
				1369	const int B = cb \| (cb << 5);
				1370	const int C = 11;
				1371	T = unquantize_value(0x20, A, B, C, D);
				1372	}
				1373	break;
				1374
				1375	default:
				1376	SkDEBUGFAIL("Too many bits for trit encoding");
				1377	break;
				1378	}
				1379
				1380	} else if (fWeightQuints > 0) {
				1381	SkASSERT(0 == fWeightTrits);
				1382	switch (fWeightBits) {
				1383	case 0: {
				1384	// x is a single quint
				1385	SkASSERT(x < 5);
				1386
				1387	static const int kUnquantizationTable[5] = { 0, 16, 32, 47, 63 };
				1388	T = kUnquantizationTable[x];
				1389	}
				1390	break;
				1391
				1392	case 1: {
				1393	const int B = 0;
				1394	const int C = 28;
				1395	T = unquantize_value(0x20, A, B, C, D);
				1396	}
				1397	break;
				1398
				1399	case 2: {
				1400	const int b = xbits[1];
				1401	const int B = (b << 1) \| (b << 6);
				1402	const int C = 13;
				1403	T = unquantize_value(0x20, A, B, C, D);
				1404	}
				1405	break;
				1406
				1407	default:
				1408	SkDEBUGFAIL("Too many bits for quint encoding");
				1409	break;
				1410	}
				1411	} else {
				1412	SkASSERT(0 == fWeightTrits);
				1413	SkASSERT(0 == fWeightQuints);
				1414
				1415	T = replicate_bits(x, fWeightBits, 6);
				1416	}
				1417
				1418	// This should bring the value within [0, 63]..
				1419	SkASSERT(T <= 63);
				1420
				1421	if (T > 32) {
				1422	T += 1;
				1423	}
				1424
				1425	SkASSERT(T <= 64);
				1426
				1427	return T;
				1428	}
				1429
				1430	// Returns the weight at the associated index. If the index is out of bounds, it
				1431	// returns zero. It also chooses the weight appropriately based on the given dual
				1432	// plane.
				1433	int getWeight(const int* unquantizedWeights, int idx, bool dualPlane) const {
				1434	const int maxIdx = (fDualPlaneEnabled ? 2 : 1) * fWeightDimX * fWeightDimY - 1;
				1435	if (fDualPlaneEnabled) {
				1436	const int effectiveIdx = 2*idx + (dualPlane ? 1 : 0);
				1437	if (effectiveIdx > maxIdx) {
				1438	return 0;
				1439	}
				1440	return unquantizedWeights[effectiveIdx];
				1441	}
				1442
				1443	SkASSERT(!dualPlane);
				1444
				1445	if (idx > maxIdx) {
				1446	return 0;
				1447	} else {
				1448	return unquantizedWeights[idx];
				1449	}
				1450	}
				1451
				1452	// This computes the effective weight at location (s, t) of the block. This
				1453	// weight is computed by sampling the texel weight grid (it's usually not 1-1), and
				1454	// then applying a bilerp. The algorithm outlined here follows the algorithm
				1455	// defined in section C.2.18 of the ASTC spec.
				1456	int infillWeight(const int* unquantizedValues, int s, int t, bool dualPlane) const {
				1457	const int Ds = (1024 + fDimX/2) / (fDimX - 1);
				1458	const int Dt = (1024 + fDimY/2) / (fDimY - 1);
				1459
				1460	const int cs = Ds * s;
				1461	const int ct = Dt * t;
				1462
				1463	const int gs = (cs*(fWeightDimX - 1) + 32) >> 6;
				1464	const int gt = (ct*(fWeightDimY - 1) + 32) >> 6;
				1465
				1466	const int js = gs >> 4;
				1467	const int jt = gt >> 4;
				1468
				1469	const int fs = gs & 0xF;
				1470	const int ft = gt & 0xF;
				1471
				1472	const int idx = js + jt*fWeightDimX;
				1473	const int p00 = this->getWeight(unquantizedValues, idx, dualPlane);
				1474	const int p01 = this->getWeight(unquantizedValues, idx + 1, dualPlane);
				1475	const int p10 = this->getWeight(unquantizedValues, idx + fWeightDimX, dualPlane);
				1476	const int p11 = this->getWeight(unquantizedValues, idx + fWeightDimX + 1, dualPlane);
				1477
				1478	const int w11 = (fs*ft + 8) >> 4;
				1479	const int w10 = ft - w11;
				1480	const int w01 = fs - w11;
				1481	const int w00 = 16 - fs - ft + w11;
				1482
				1483	const int weight = (p00w00 + p01w01 + p10w10 + p11w11 + 8) >> 4;
				1484	SkASSERT(weight <= 64);
				1485	return weight;
				1486	}
				1487
				1488	// Unquantizes the decoded texel weights as described in section C.2.17 of
				1489	// the ASTC specification. Additionally, it populates texelWeights with
				1490	// the expanded weight grid, which is computed according to section C.2.18
				1491	void texelWeights(int texelWeights[2][12][12], const int* texelValues) const {
				1492	// Unquantized texel weights...
				1493	int unquantizedValues[144*2]; // 12x12 blocks with dual plane decoding...
				1494	SkASSERT(this->numWeights() <= 144*2);
				1495
				1496	// Unquantize the weights and cache them
				1497	for (int j = 0; j < this->numWeights(); ++j) {
				1498	unquantizedValues[j] = this->unquantizeWeight(texelValues[j]);
				1499	}
				1500
				1501	// Do weight infill...
				1502	for (int y = 0; y < fDimY; ++y) {
				1503	for (int x = 0; x < fDimX; ++x) {
				1504	texelWeights[0][x][y] = this->infillWeight(unquantizedValues, x, y, false);
				1505	if (fDualPlaneEnabled) {
				1506	texelWeights[1][x][y] = this->infillWeight(unquantizedValues, x, y, true);
				1507	}
				1508	}
				1509	}
				1510	}
				1511
				1512	// Returns the partition for the texel located at position (x, y).
				1513	// Adapted from C.2.21 of the ASTC specification
				1514	int getPartition(int x, int y) const {
				1515	const int partitionCount = this->numPartitions();
				1516	int seed = fPartIndex;
				1517	if ((fDimX * fDimY) < 31) {
				1518	x <<= 1;
				1519	y <<= 1;
				1520	}
				1521
				1522	seed += (partitionCount - 1) * 1024;
				1523
				1524	uint32_t p = seed;
				1525	p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
				1526	p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
				1527	p ^= p << 6; p ^= p >> 17;
				1528
				1529	uint32_t rnum = p;
				1530	uint8_t seed1 = rnum & 0xF;
				1531	uint8_t seed2 = (rnum >> 4) & 0xF;
				1532	uint8_t seed3 = (rnum >> 8) & 0xF;
				1533	uint8_t seed4 = (rnum >> 12) & 0xF;
				1534	uint8_t seed5 = (rnum >> 16) & 0xF;
				1535	uint8_t seed6 = (rnum >> 20) & 0xF;
				1536	uint8_t seed7 = (rnum >> 24) & 0xF;
				1537	uint8_t seed8 = (rnum >> 28) & 0xF;
				1538	uint8_t seed9 = (rnum >> 18) & 0xF;
				1539	uint8_t seed10 = (rnum >> 22) & 0xF;
				1540	uint8_t seed11 = (rnum >> 26) & 0xF;
				1541	uint8_t seed12 = ((rnum >> 30) \| (rnum << 2)) & 0xF;
				1542
				1543	seed1 = seed1; seed2 = seed2;
				1544	seed3 = seed3; seed4 = seed4;
				1545	seed5 = seed5; seed6 = seed6;
				1546	seed7 = seed7; seed8 = seed8;
				1547	seed9 = seed9; seed10 = seed10;
				1548	seed11 = seed11; seed12 = seed12;
				1549
				1550	int sh1, sh2, sh3;
				1551	if (0 != (seed & 1)) {
				1552	sh1 = (0 != (seed & 2))? 4 : 5;
				1553	sh2 = (partitionCount == 3)? 6 : 5;
				1554	} else {
				1555	sh1 = (partitionCount==3)? 6 : 5;
				1556	sh2 = (0 != (seed & 2))? 4 : 5;
				1557	}
				1558	sh3 = (0 != (seed & 0x10))? sh1 : sh2;
				1559
				1560	seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
				1561	seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
				1562	seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
				1563
				1564	const int z = 0;
				1565	int a = seed1x + seed2y + seed11*z + (rnum >> 14);
				1566	int b = seed3x + seed4y + seed12*z + (rnum >> 10);
				1567	int c = seed5x + seed6y + seed9 *z + (rnum >> 6);
				1568	int d = seed7x + seed8y + seed10*z + (rnum >> 2);
				1569
				1570	a &= 0x3F;
				1571	b &= 0x3F;
				1572	c &= 0x3F;
				1573	d &= 0x3F;
				1574
				1575	if (partitionCount < 4) {
				1576	d = 0;
				1577	}
				1578
				1579	if (partitionCount < 3) {
				1580	c = 0;
				1581	}
				1582
				1583	if (a >= b && a >= c && a >= d) {
				1584	return 0;
				1585	} else if (b >= c && b >= d) {
				1586	return 1;
				1587	} else if (c >= d) {
				1588	return 2;
				1589	} else {
				1590	return 3;
				1591	}
				1592	}
				1593
				1594	// Performs the proper interpolation of the texel based on the
				1595	// endpoints and weights.
				1596	SkColor getTexel(const SkColor endpoints[4][2],
				1597	const int weights[2][12][12],
				1598	int x, int y) const {
				1599	int part = 0;
				1600	if (this->numPartitions() > 1) {
				1601	part = this->getPartition(x, y);
				1602	}
				1603
				1604	SkColor result;
				1605	if (fDualPlaneEnabled) {
				1606	result = interpolate_dual_endpoints(
				1607	endpoints[part], weights[0][x][y], weights[1][x][y], fDualPlane);
				1608	} else {
				1609	result = interpolate_endpoints(endpoints[part], weights[0][x][y]);
				1610	}
				1611
				1612	#if 1
				1613	// !FIXME! if we're writing directly to a bitmap, then we don't need
				1614	// to swap the red and blue channels, but since we're usually being used
				1615	// by the SkImageDecoder_astc module, the results are expected to be in RGBA.
				1616	result = SkColorSetARGB(
				1617	SkColorGetA(result), SkColorGetB(result), SkColorGetG(result), SkColorGetR(result));
				1618	#endif
				1619
				1620	return result;
				1621	}
				1622
				1623	void decode() {
				1624	// First decode the block mode.
				1625	this->decodeBlockMode();
				1626
				1627	// Now we can decode the partition information.
				1628	fPartIndex = static_cast<int>(read_astc_bits(fBlock, 11, 23));
				1629	fPartCount = (fPartIndex & 0x3) + 1;
				1630	fPartIndex >>= 2;
				1631
				1632	// This is illegal
				1633	if (fDualPlaneEnabled && this->numPartitions() == 4) {
				1634	fError = true;
				1635	return;
				1636	}
				1637
				1638	// Based on the partition info, we can decode the color information.
				1639	this->decodeColorData();
				1640	}
				1641
				1642	// Decodes the dual plane based on the given bit location. The final
				1643	// location, if the dual plane is enabled, is also the end of our color data.
				1644	// This function is only meant to be used from this->decodeColorData()
				1645	void decodeDualPlane(int bitLoc) {
				1646	if (fDualPlaneEnabled) {
				1647	fDualPlane = static_cast<int>(read_astc_bits(fBlock, bitLoc - 2, bitLoc));
				1648	fColorEndBit = bitLoc - 2;
				1649	} else {
				1650	fColorEndBit = bitLoc;
				1651	}
				1652	}
				1653
				1654	// Decodes the color information based on the ASTC spec.
				1655	void decodeColorData() {
				1656
				1657	// By default, the last color bit is at the end of the texel weights
				1658	const int lastWeight = 128 - this->numWeightBits();
				1659
				1660	// If we have a dual plane then it will be at this location, too.
				1661	int dualPlaneBitLoc = lastWeight;
				1662
				1663	// If there's only one partition, then our job is (relatively) easy.
				1664	if (this->numPartitions() == 1) {
				1665	fCEM[0] = static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 13, 17));
				1666	fColorStartBit = 17;
				1667
				1668	// Handle dual plane mode...
				1669	this->decodeDualPlane(dualPlaneBitLoc);
				1670
				1671	return;
				1672	}
				1673
				1674	// If we have more than one partition, then we need to make
				1675	// room for the partition index.
				1676	fColorStartBit = 29;
				1677
				1678	// Read the base CEM. If it's zero, then we have no additional
				1679	// CEM data and the endpoints for each partition share the same CEM.
				1680	const int baseCEM = static_cast<int>(read_astc_bits(fBlock, 23, 25));
				1681	if (0 == baseCEM) {
				1682
				1683	const ColorEndpointMode sameCEM =
				1684	static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 25, 29));
				1685
				1686	for (int i = 0; i < kMaxPartitions; ++i) {
				1687	fCEM[i] = sameCEM;
				1688	}
				1689
				1690	// Handle dual plane mode...
				1691	this->decodeDualPlane(dualPlaneBitLoc);
				1692
				1693	return;
				1694	}
				1695
				1696	// Move the dual plane selector bits down based on how many
				1697	// partitions the block contains.
				1698	switch (this->numPartitions()) {
				1699	case 2:
				1700	dualPlaneBitLoc -= 2;
				1701	break;
				1702
				1703	case 3:
				1704	dualPlaneBitLoc -= 5;
				1705	break;
				1706
				1707	case 4:
				1708	dualPlaneBitLoc -= 8;
				1709	break;
				1710
				1711	default:
				1712	SkDEBUGFAIL("Internal ASTC decoding error.");
				1713	break;
				1714	}
				1715
				1716	// The rest of the CEM config will be between the dual plane bit selector
				1717	// and the texel weight grid.
				1718	const int lowCEM = static_cast<int>(read_astc_bits(fBlock, 23, 29));
krajcevski	95b1b3d	2014-08-07 12:58:38 -0700	[diff] [blame]	1719	SkASSERT(lastWeight >= dualPlaneBitLoc);
				1720	SkASSERT(lastWeight - dualPlaneBitLoc < 31);
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	1721	int fullCEM = static_cast<int>(read_astc_bits(fBlock, dualPlaneBitLoc, lastWeight));
				1722
				1723	// Attach the config at the end of the weight grid to the CEM values
				1724	// in the beginning of the block.
				1725	fullCEM = (fullCEM << 6) \| lowCEM;
				1726
				1727	// Ignore the two least significant bits, since those are our baseCEM above.
				1728	fullCEM = fullCEM >> 2;
				1729
				1730	int C[kMaxPartitions]; // Next, decode C and M from the spec (Table C.2.12)
				1731	for (int i = 0; i < this->numPartitions(); ++i) {
				1732	C[i] = fullCEM & 1;
				1733	fullCEM = fullCEM >> 1;
				1734	}
				1735
				1736	int M[kMaxPartitions];
				1737	for (int i = 0; i < this->numPartitions(); ++i) {
				1738	M[i] = fullCEM & 0x3;
				1739	fullCEM = fullCEM >> 2;
				1740	}
				1741
				1742	// Construct our CEMs..
				1743	SkASSERT(baseCEM > 0);
				1744	for (int i = 0; i < this->numPartitions(); ++i) {
				1745	int cem = (baseCEM - 1) * 4;
				1746	cem += (0 == C[i])? 0 : 4;
				1747	cem += M[i];
				1748
				1749	SkASSERT(cem < 16);
				1750	fCEM[i] = static_cast<ColorEndpointMode>(cem);
				1751	}
				1752
				1753	// Finally, if we have dual plane mode, then read the plane selector.
				1754	this->decodeDualPlane(dualPlaneBitLoc);
				1755	}
				1756
				1757	// Decodes the block mode. This function determines whether or not we use
				1758	// dual plane encoding, the size of the texel weight grid, and the number of
				1759	// bits, trits and quints that are used to encode it. For more information,
				1760	// see section C.2.10 of the ASTC spec.
				1761	//
				1762	// For 2D blocks, the Block Mode field is laid out as follows:
				1763	//
				1764	// -------------------------------------------------------------------------
				1765	// 10 9 8 7 6 5 4 3 2 1 0 Width Height Notes
				1766	// -------------------------------------------------------------------------
				1767	// D H B A R0 0 0 R2 R1 B+4 A+2
				1768	// D H B A R0 0 1 R2 R1 B+8 A+2
				1769	// D H B A R0 1 0 R2 R1 A+2 B+8
				1770	// D H 0 B A R0 1 1 R2 R1 A+2 B+6
				1771	// D H 1 B A R0 1 1 R2 R1 B+2 A+2
				1772	// D H 0 0 A R0 R2 R1 0 0 12 A+2
				1773	// D H 0 1 A R0 R2 R1 0 0 A+2 12
				1774	// D H 1 1 0 0 R0 R2 R1 0 0 6 10
				1775	// D H 1 1 0 1 R0 R2 R1 0 0 10 6
				1776	// B 1 0 A R0 R2 R1 0 0 A+6 B+6 D=0, H=0
				1777	// x x 1 1 1 1 1 1 1 0 0 - - Void-extent
				1778	// x x 1 1 1 x x x x 0 0 - - Reserved*
				1779	// x x x x x x x 0 0 0 0 - - Reserved
				1780	// -------------------------------------------------------------------------
				1781	//
				1782	// D - dual plane enabled
				1783	// H, R - used to determine the number of bits/trits/quints in texel weight encoding
				1784	// R is a three bit value whose LSB is R0 and MSB is R1
				1785	// Width, Height - dimensions of the texel weight grid (determined by A and B)
				1786
				1787	void decodeBlockMode() {
				1788	const int blockMode = static_cast<int>(read_astc_bits(fBlock, 0, 11));
				1789
				1790	// Check for special void extent encoding
				1791	fVoidExtent = (blockMode & 0x1FF) == 0x1FC;
				1792
				1793	// Check for reserved block modes
				1794	fError = ((blockMode & 0x1C3) == 0x1C0) \|\| ((blockMode & 0xF) == 0);
				1795
				1796	// Neither reserved nor void-extent, decode as usual
				1797	// This code corresponds to table C.2.8 of the ASTC spec
				1798	bool highPrecision = false;
				1799	int R = 0;
				1800	if ((blockMode & 0x3) == 0) {
				1801	R = ((0xC & blockMode) >> 1) \| ((0x10 & blockMode) >> 4);
				1802	const int bitsSevenAndEight = (blockMode & 0x180) >> 7;
				1803	SkASSERT(0 <= bitsSevenAndEight && bitsSevenAndEight < 4);
				1804
				1805	const int A = (blockMode >> 5) & 0x3;
				1806	const int B = (blockMode >> 9) & 0x3;
				1807
				1808	fDualPlaneEnabled = (blockMode >> 10) & 0x1;
				1809	highPrecision = (blockMode >> 9) & 0x1;
				1810
				1811	switch (bitsSevenAndEight) {
				1812	default:
				1813	case 0:
				1814	fWeightDimX = 12;
				1815	fWeightDimY = A + 2;
				1816	break;
				1817
				1818	case 1:
				1819	fWeightDimX = A + 2;
				1820	fWeightDimY = 12;
				1821	break;
				1822
				1823	case 2:
				1824	fWeightDimX = A + 6;
				1825	fWeightDimY = B + 6;
				1826	fDualPlaneEnabled = false;
				1827	highPrecision = false;
				1828	break;
				1829
				1830	case 3:
				1831	if (0 == A) {
				1832	fWeightDimX = 6;
				1833	fWeightDimY = 10;
				1834	} else {
				1835	fWeightDimX = 10;
				1836	fWeightDimY = 6;
				1837	}
				1838	break;
				1839	}
				1840	} else { // (blockMode & 0x3) != 0
				1841	R = ((blockMode & 0x3) << 1) \| ((blockMode & 0x10) >> 4);
				1842
				1843	const int bitsTwoAndThree = (blockMode >> 2) & 0x3;
				1844	SkASSERT(0 <= bitsTwoAndThree && bitsTwoAndThree < 4);
				1845
				1846	const int A = (blockMode >> 5) & 0x3;
				1847	const int B = (blockMode >> 7) & 0x3;
				1848
				1849	fDualPlaneEnabled = (blockMode >> 10) & 0x1;
				1850	highPrecision = (blockMode >> 9) & 0x1;
				1851
				1852	switch (bitsTwoAndThree) {
				1853	case 0:
				1854	fWeightDimX = B + 4;
				1855	fWeightDimY = A + 2;
				1856	break;
				1857	case 1:
				1858	fWeightDimX = B + 8;
				1859	fWeightDimY = A + 2;
				1860	break;
				1861	case 2:
				1862	fWeightDimX = A + 2;
				1863	fWeightDimY = B + 8;
				1864	break;
				1865	case 3:
				1866	if ((B & 0x2) == 0) {
				1867	fWeightDimX = A + 2;
				1868	fWeightDimY = (B & 1) + 6;
				1869	} else {
				1870	fWeightDimX = (B & 1) + 2;
				1871	fWeightDimY = A + 2;
				1872	}
				1873	break;
				1874	}
				1875	}
				1876
				1877	// We should have set the values of R and highPrecision
				1878	// from decoding the block mode, these are used to determine
				1879	// the proper dimensions of our weight grid.
				1880	if ((R & 0x6) == 0) {
				1881	fError = true;
				1882	} else {
				1883	static const int kBitAllocationTable[2][6][3] = {
				1884	{
				1885	{ 1, 0, 0 },
				1886	{ 0, 1, 0 },
				1887	{ 2, 0, 0 },
				1888	{ 0, 0, 1 },
				1889	{ 1, 1, 0 },
				1890	{ 3, 0, 0 }
				1891	},
				1892	{
				1893	{ 1, 0, 1 },
				1894	{ 2, 1, 0 },
				1895	{ 4, 0, 0 },
				1896	{ 2, 0, 1 },
				1897	{ 3, 1, 0 },
				1898	{ 5, 0, 0 }
				1899	}
				1900	};
				1901
				1902	fWeightBits = kBitAllocationTable[highPrecision][R - 2][0];
				1903	fWeightTrits = kBitAllocationTable[highPrecision][R - 2][1];
				1904	fWeightQuints = kBitAllocationTable[highPrecision][R - 2][2];
				1905	}
				1906	}
				1907	};
				1908
				1909	// Reads an ASTC block from the given pointer.
				1910	static inline void read_astc_block(ASTCDecompressionData dst, const uint8_t src) {
				1911	const uint64_t* qword = reinterpret_cast<const uint64_t*>(src);
				1912	dst->fBlock.fLow = SkEndian_SwapLE64(qword[0]);
				1913	dst->fBlock.fHigh = SkEndian_SwapLE64(qword[1]);
				1914	dst->decode();
				1915	}
				1916
				1917	// Take a known void-extent block, and write out the values as a constant color.
				1918	static void decompress_void_extent(uint8_t* dst, int dstRowBytes,
				1919	const ASTCDecompressionData &data) {
				1920	// The top 64 bits contain 4 16-bit RGBA values.
				1921	int a = (static_cast<int>(read_astc_bits(data.fBlock, 112, 128)) + 255) >> 8;
				1922	int b = (static_cast<int>(read_astc_bits(data.fBlock, 96, 112)) + 255) >> 8;
				1923	int g = (static_cast<int>(read_astc_bits(data.fBlock, 80, 96)) + 255) >> 8;
				1924	int r = (static_cast<int>(read_astc_bits(data.fBlock, 64, 80)) + 255) >> 8;
				1925
				1926	write_constant_color(dst, data.fDimX, data.fDimY, dstRowBytes, SkColorSetARGB(a, r, g, b));
				1927	}
				1928
				1929	// Decompresses a single ASTC block. It's assumed that data.fDimX and data.fDimY are
				1930	// set and that the block has already been decoded (i.e. data.decode() has been called)
				1931	static void decompress_astc_block(uint8_t* dst, int dstRowBytes,
				1932	const ASTCDecompressionData &data) {
				1933	if (data.fError) {
				1934	write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
				1935	return;
				1936	}
				1937
				1938	if (data.fVoidExtent) {
				1939	decompress_void_extent(dst, dstRowBytes, data);
				1940	return;
				1941	}
				1942
				1943	// According to the spec, any more than 64 values is illegal. (C.2.24)
				1944	static const int kMaxTexelValues = 64;
				1945
				1946	// Decode the texel weights.
				1947	int texelValues[kMaxTexelValues];
				1948	bool success = decode_integer_sequence(
				1949	texelValues, kMaxTexelValues, data.numWeights(),
				1950	// texel data goes to the end of the 128 bit block.
				1951	data.fBlock, 128, 128 - data.numWeightBits(), false,
				1952	data.fWeightBits, data.fWeightTrits, data.fWeightQuints);
				1953
				1954	if (!success) {
				1955	write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
				1956	return;
				1957	}
				1958
				1959	// Decode the color endpoints
				1960	int colorBits, colorTrits, colorQuints;
				1961	if (!data.getColorValueEncoding(&colorBits, &colorTrits, &colorQuints)) {
				1962	write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
				1963	return;
				1964	}
				1965
				1966	// According to the spec, any more than 18 color values is illegal. (C.2.24)
				1967	static const int kMaxColorValues = 18;
				1968
				1969	int colorValues[kMaxColorValues];
				1970	success = decode_integer_sequence(
				1971	colorValues, kMaxColorValues, data.numColorValues(),
				1972	data.fBlock, data.fColorStartBit, data.fColorEndBit, true,
				1973	colorBits, colorTrits, colorQuints);
				1974
				1975	if (!success) {
				1976	write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
				1977	return;
				1978	}
				1979
				1980	// Unquantize the color values after they've been decoded.
				1981	unquantize_colors(colorValues, data.numColorValues(), colorBits, colorTrits, colorQuints);
				1982
				1983	// Decode the colors into the appropriate endpoints.
				1984	SkColor endpoints[4][2];
				1985	data.colorEndpoints(endpoints, colorValues);
				1986
				1987	// Do texel infill and decode the texel values.
				1988	int texelWeights[2][12][12];
				1989	data.texelWeights(texelWeights, texelValues);
				1990
				1991	// Write the texels by interpolating them based on the information
				1992	// stored in the block.
				1993	dst += data.fDimY * dstRowBytes;
				1994	for (int y = 0; y < data.fDimY; ++y) {
				1995	dst -= dstRowBytes;
				1996	SkColor* colorPtr = reinterpret_cast<SkColor*>(dst);
				1997	for (int x = 0; x < data.fDimX; ++x) {
				1998	colorPtr[x] = data.getTexel(endpoints, texelWeights, x, y);
				1999	}
				2000	}
				2001	}
				2002
krajcevski	a10555a	2014-08-11 13:34:22 -0700	[diff] [blame]	2003	////////////////////////////////////////////////////////////////////////////////
				2004	//
				2005	// ASTC Comrpession Struct
				2006	//
				2007	////////////////////////////////////////////////////////////////////////////////
				2008
krajcevski	45a0bf5	2014-08-07 11:10:22 -0700	[diff] [blame]	2009	// This is the type passed as the CompressorType argument of the compressed
				2010	// blitter for the ASTC format. The static functions required to be in this
				2011	// struct are documented in SkTextureCompressor_Blitter.h
				2012	struct CompressorASTC {
				2013	static inline void CompressA8Vertical(uint8_t* dst, const uint8_t* src) {
				2014	compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
				2015	}
				2016
				2017	static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
				2018	int srcRowBytes) {
				2019	compress_a8_astc_block<GetAlpha>(&dst, src, srcRowBytes);
				2020	}
				2021
krajcevski	a10555a	2014-08-11 13:34:22 -0700	[diff] [blame]	2022	#if PEDANTIC_BLIT_RECT
				2023	static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
				2024	const uint8_t* mask) {
				2025	// TODO: krajcevski
				2026	// This is kind of difficult for ASTC because the weight values are calculated
				2027	// as an average of the actual weights. The best we can do is decompress the
				2028	// weights and recalculate them based on the new texel values. This should
				2029	// be "not too bad" since we know that anytime we hit this function, we're
				2030	// compressing 12x12 block dimension alpha-only, and we know the layout
				2031	// of the block
				2032	SkFAIL("Implement me!");
krajcevski	45a0bf5	2014-08-07 11:10:22 -0700	[diff] [blame]	2033	}
krajcevski	a10555a	2014-08-11 13:34:22 -0700	[diff] [blame]	2034	#endif
krajcevski	45a0bf5	2014-08-07 11:10:22 -0700	[diff] [blame]	2035	};
				2036
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	2037	////////////////////////////////////////////////////////////////////////////////
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	2038
				2039	namespace SkTextureCompressor {
				2040
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	2041	bool CompressA8To12x12ASTC(uint8_t* dst, const uint8_t* src,
bsalomon	9880607	2014-12-12 15:11:17 -0800	[diff] [blame]	2042	int width, int height, size_t rowBytes) {
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	2043	if (width < 0 \|\| ((width % 12) != 0) \|\| height < 0 \|\| ((height % 12) != 0)) {
				2044	return false;
				2045	}
				2046
				2047	uint8_t** dstPtr = &dst;
krajcevski	b5294e8	2014-07-30 08:34:51 -0700	[diff] [blame]	2048	for (int y = 0; y < height; y += 12) {
				2049	for (int x = 0; x < width; x += 12) {
krajcevski	10a350c	2014-07-29 07:24:58 -0700	[diff] [blame]	2050	compress_a8_astc_block<GetAlpha>(dstPtr, src + y*rowBytes + x, rowBytes);
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	2051	}
				2052	}
				2053
				2054	return true;
				2055	}
				2056
krajcevski	b8ccc2f	2014-08-07 08:15:14 -0700	[diff] [blame]	2057	SkBlitter* CreateASTCBlitter(int width, int height, void* outputBuffer,
				2058	SkTBlitterAllocator* allocator) {
				2059	if ((width % 12) != 0 \|\| (height % 12) != 0) {
				2060	return NULL;
				2061	}
				2062
				2063	// Memset the output buffer to an encoding that decodes to zero. We must do this
				2064	// in order to avoid having uninitialized values in the buffer if the blitter
				2065	// decides not to write certain scanlines (and skip entire rows of blocks).
				2066	// In the case of ASTC, if everything index is zero, then the interpolated value
				2067	// will decode to zero provided we have the right header. We use the encoding
				2068	// from recognizing all zero blocks from above.
				2069	const int nBlocks = (width * height / 144);
				2070	uint8_t dst = reinterpret_cast<uint8_t >(outputBuffer);
				2071	for (int i = 0; i < nBlocks; ++i) {
				2072	send_packing(&dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
				2073	}
				2074
				2075	return allocator->createT<
krajcevski	45a0bf5	2014-08-07 11:10:22 -0700	[diff] [blame]	2076	SkTCompressedAlphaBlitter<12, 16, CompressorASTC>, int, int, void* >
krajcevski	10a350c	2014-07-29 07:24:58 -0700	[diff] [blame]	2077	(width, height, outputBuffer);
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	2078	}
				2079
krajcevski	3c7edda	2014-08-06 12:47:59 -0700	[diff] [blame]	2080	void DecompressASTC(uint8_t* dst, int dstRowBytes, const uint8_t* src,
				2081	int width, int height, int blockDimX, int blockDimY) {
				2082	// ASTC is encoded in what they call "raster order", so that the first
				2083	// block is the bottom-left block in the image, and the first pixel
				2084	// is the bottom-left pixel of the image
				2085	dst += height * dstRowBytes;
				2086
				2087	ASTCDecompressionData data(blockDimX, blockDimY);
				2088	for (int y = 0; y < height; y += blockDimY) {
				2089	dst -= blockDimY * dstRowBytes;
				2090	SkColor colorPtr = reinterpret_cast<SkColor>(dst);
				2091	for (int x = 0; x < width; x += blockDimX) {
				2092	read_astc_block(&data, src);
				2093	decompress_astc_block(reinterpret_cast<uint8_t*>(colorPtr + x), dstRowBytes, data);
				2094
				2095	// ASTC encoded blocks are 16 bytes (128 bits) large.
				2096	src += 16;
				2097	}
				2098	}
				2099	}
				2100
krajcevski	b2ef181	2014-07-25 07:33:01 -0700	[diff] [blame]	2101	} // SkTextureCompressor