| /* libs/opengles/dxt.cpp |
| ** |
| ** Copyright 2007, The Android Open Source Project |
| ** |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| */ |
| |
| #define TIMING 0 |
| |
| #if TIMING |
| #include <sys/time.h> // for optimization timing |
| #include <stdio.h> |
| #include <stdlib.h> |
| #endif |
| |
| #include <GLES/gl.h> |
| #include <utils/Endian.h> |
| |
| #include "context.h" |
| |
| #define TIMING 0 |
| |
| namespace android { |
| |
| static uint8_t avg23tab[64*64]; |
| static volatile int tables_initialized = 0; |
| |
| // Definitions below are equivalent to these over the valid range of arguments |
| // #define div5(x) ((x)/5) |
| // #define div7(x) ((x)/7) |
| |
| // Use fixed-point to divide by 5 and 7 |
| // 3277 = 2^14/5 + 1 |
| // 2341 = 2^14/7 + 1 |
| #define div5(x) (((x)*3277) >> 14) |
| #define div7(x) (((x)*2341) >> 14) |
| |
| // Table with entry [a << 6 | b] = (2*a + b)/3 for 0 <= a,b < 64 |
| #define avg23(x0,x1) avg23tab[((x0) << 6) | (x1)] |
| |
| // Extract 5/6/5 RGB |
| #define red(x) (((x) >> 11) & 0x1f) |
| #define green(x) (((x) >> 5) & 0x3f) |
| #define blue(x) ( (x) & 0x1f) |
| |
| /* |
| * Convert 5/6/5 RGB (as 3 ints) to 8/8/8 |
| * |
| * Operation count: 8 <<, 0 &, 5 | |
| */ |
| inline static int rgb565SepTo888(int r, int g, int b) |
| |
| { |
| return ((((r << 3) | (r >> 2)) << 16) | |
| (((g << 2) | (g >> 4)) << 8) | |
| ((b << 3) | (b >> 2))); |
| } |
| |
| /* |
| * Convert 5/6/5 RGB (as a single 16-bit word) to 8/8/8 |
| * |
| * r4r3r2r1 r0g5g4g3 g2g1g0b4 b3b2b1b0 rgb |
| * r4r3r2 r1r0g5g4 g3g2g1g0 b4b3b2b1 b0 0 0 0 rgb << 3 |
| * r4r3r2r1 r0r4r3r2 g5g4g3g2 g1g0g5g4 b4b3b2b1 b0b4b3b2 desired result |
| * |
| * Construct the 24-bit RGB word as: |
| * |
| * r4r3r2r1 r0------ -------- -------- -------- -------- (rgb << 8) & 0xf80000 |
| * r4r3r2 -------- -------- -------- -------- (rgb << 3) & 0x070000 |
| * g5g4g3g2 g1g0---- -------- -------- (rgb << 5) & 0x00fc00 |
| * g5g4 -------- -------- (rgb >> 1) & 0x000300 |
| * b4b3b2b1 b0------ (rgb << 3) & 0x0000f8 |
| * b4b3b2 (rgb >> 2) & 0x000007 |
| * |
| * Operation count: 5 <<, 6 &, 5 | (n.b. rgb >> 3 is used twice) |
| */ |
| inline static int rgb565To888(int rgb) |
| |
| { |
| int rgb3 = rgb >> 3; |
| return (((rgb << 8) & 0xf80000) | |
| ( rgb3 & 0x070000) | |
| ((rgb << 5) & 0x00fc00) | |
| ((rgb >> 1) & 0x000300) | |
| ( rgb3 & 0x0000f8) | |
| ((rgb >> 2) & 0x000007)); |
| } |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| static uint32_t swap(uint32_t x) { |
| int b0 = (x >> 24) & 0xff; |
| int b1 = (x >> 16) & 0xff; |
| int b2 = (x >> 8) & 0xff; |
| int b3 = (x ) & 0xff; |
| |
| return (uint32_t)((b3 << 24) | (b2 << 16) | (b1 << 8) | b0); |
| } |
| #endif |
| |
| static void |
| init_tables() |
| { |
| if (tables_initialized) { |
| return; |
| } |
| |
| for (int i = 0; i < 64; i++) { |
| for (int j = 0; j < 64; j++) { |
| int avg = (2*i + j)/3; |
| avg23tab[(i << 6) | j] = avg; |
| } |
| } |
| |
| asm volatile ("" : : : "memory"); |
| tables_initialized = 1; |
| } |
| |
| /* |
| * Utility to scan a DXT1 compressed texture to determine whether it |
| * contains a transparent pixel (color0 < color1, code == 3). This |
| * may be useful if the application lacks information as to whether |
| * the true format is GL_COMPRESSED_RGB_S3TC_DXT1_EXT or |
| * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT. |
| */ |
| bool |
| DXT1HasAlpha(const GLvoid *data, int width, int height) { |
| #if TIMING |
| struct timeval start_t, end_t; |
| struct timezone tz; |
| |
| gettimeofday(&start_t, &tz); |
| #endif |
| |
| bool hasAlpha = false; |
| |
| int xblocks = (width + 3)/4; |
| int yblocks = (height + 3)/4; |
| int numblocks = xblocks*yblocks; |
| |
| uint32_t const *d32 = (uint32_t *)data; |
| for (int b = 0; b < numblocks; b++) { |
| uint32_t colors = *d32++; |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| colors = swap(colors); |
| #endif |
| |
| uint16_t color0 = colors & 0xffff; |
| uint16_t color1 = colors >> 16; |
| |
| if (color0 < color1) { |
| // There's no need to endian-swap within 'bits' |
| // since we don't care which pixel is the transparent one |
| uint32_t bits = *d32++; |
| |
| // Detect if any (odd, even) pair of bits are '11' |
| // bits: b31 b30 b29 ... b3 b2 b1 b0 |
| // bits >> 1: b31 b31 b30 ... b4 b3 b2 b1 |
| // &: b31 (b31 & b30) (b29 & b28) ... (b2 & b1) (b1 & b0) |
| // & 0x55..: 0 (b31 & b30) 0 ... 0 (b1 & b0) |
| if (((bits & (bits >> 1)) & 0x55555555) != 0) { |
| hasAlpha = true; |
| goto done; |
| } |
| } else { |
| // Skip 4 bytes |
| ++d32; |
| } |
| } |
| |
| done: |
| #if TIMING |
| gettimeofday(&end_t, &tz); |
| long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 + |
| (end_t.tv_usec - start_t.tv_usec); |
| |
| printf("Scanned w=%d h=%d in %ld usec\n", width, height, usec); |
| #endif |
| |
| return hasAlpha; |
| } |
| |
| static void |
| decodeDXT1(const GLvoid *data, int width, int height, |
| void *surface, int stride, |
| bool hasAlpha) |
| |
| { |
| init_tables(); |
| |
| uint32_t const *d32 = (uint32_t *)data; |
| |
| // Color table for the current block |
| uint16_t c[4]; |
| c[0] = c[1] = c[2] = c[3] = 0; |
| |
| // Specified colors from the previous block |
| uint16_t prev_color0 = 0x0000; |
| uint16_t prev_color1 = 0x0000; |
| |
| uint16_t* rowPtr = (uint16_t*)surface; |
| for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) { |
| uint16_t *blockPtr = rowPtr; |
| for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) { |
| uint32_t colors = *d32++; |
| uint32_t bits = *d32++; |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| colors = swap(colors); |
| bits = swap(bits); |
| #endif |
| |
| // Raw colors |
| uint16_t color0 = colors & 0xffff; |
| uint16_t color1 = colors >> 16; |
| |
| // If the new block has the same base colors as the |
| // previous one, we don't need to recompute the color |
| // table c[] |
| if (color0 != prev_color0 || color1 != prev_color1) { |
| // Store raw colors for comparison with next block |
| prev_color0 = color0; |
| prev_color1 = color1; |
| |
| int r0 = red(color0); |
| int g0 = green(color0); |
| int b0 = blue(color0); |
| |
| int r1 = red(color1); |
| int g1 = green(color1); |
| int b1 = blue(color1); |
| |
| if (hasAlpha) { |
| c[0] = (r0 << 11) | ((g0 >> 1) << 6) | (b0 << 1) | 0x1; |
| c[1] = (r1 << 11) | ((g1 >> 1) << 6) | (b1 << 1) | 0x1; |
| } else { |
| c[0] = color0; |
| c[1] = color1; |
| } |
| |
| int r2, g2, b2, r3, g3, b3, a3; |
| |
| int bbits = bits >> 1; |
| bool has2 = ((bbits & ~bits) & 0x55555555) != 0; |
| bool has3 = ((bbits & bits) & 0x55555555) != 0; |
| |
| if (has2 || has3) { |
| if (color0 > color1) { |
| r2 = avg23(r0, r1); |
| g2 = avg23(g0, g1); |
| b2 = avg23(b0, b1); |
| |
| r3 = avg23(r1, r0); |
| g3 = avg23(g1, g0); |
| b3 = avg23(b1, b0); |
| a3 = 1; |
| } else { |
| r2 = (r0 + r1) >> 1; |
| g2 = (g0 + g1) >> 1; |
| b2 = (b0 + b1) >> 1; |
| |
| r3 = g3 = b3 = a3 = 0; |
| } |
| if (hasAlpha) { |
| c[2] = (r2 << 11) | ((g2 >> 1) << 6) | |
| (b2 << 1) | 0x1; |
| c[3] = (r3 << 11) | ((g3 >> 1) << 6) | |
| (b3 << 1) | a3; |
| } else { |
| c[2] = (r2 << 11) | (g2 << 5) | b2; |
| c[3] = (r3 << 11) | (g3 << 5) | b3; |
| } |
| } |
| } |
| |
| uint16_t* blockRowPtr = blockPtr; |
| for (int y = 0; y < 4; y++, blockRowPtr += stride) { |
| // Don't process rows past the botom |
| if (base_y + y >= height) { |
| break; |
| } |
| |
| int w = min(width - base_x, 4); |
| for (int x = 0; x < w; x++) { |
| int code = bits & 0x3; |
| bits >>= 2; |
| |
| blockRowPtr[x] = c[code]; |
| } |
| } |
| } |
| } |
| } |
| |
| // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE |
| static void |
| decodeDXT3(const GLvoid *data, int width, int height, |
| void *surface, int stride) |
| |
| { |
| init_tables(); |
| |
| uint32_t const *d32 = (uint32_t *)data; |
| |
| // Specified colors from the previous block |
| uint16_t prev_color0 = 0x0000; |
| uint16_t prev_color1 = 0x0000; |
| |
| // Color table for the current block |
| uint32_t c[4]; |
| c[0] = c[1] = c[2] = c[3] = 0; |
| |
| uint32_t* rowPtr = (uint32_t*)surface; |
| for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) { |
| uint32_t *blockPtr = rowPtr; |
| for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) { |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| uint32_t alphahi = *d32++; |
| uint32_t alphalo = *d32++; |
| alphahi = swap(alphahi); |
| alphalo = swap(alphalo); |
| #else |
| uint32_t alphalo = *d32++; |
| uint32_t alphahi = *d32++; |
| #endif |
| |
| uint32_t colors = *d32++; |
| uint32_t bits = *d32++; |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| colors = swap(colors); |
| bits = swap(bits); |
| #endif |
| |
| uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo; |
| |
| // Raw colors |
| uint16_t color0 = colors & 0xffff; |
| uint16_t color1 = colors >> 16; |
| |
| // If the new block has the same base colors as the |
| // previous one, we don't need to recompute the color |
| // table c[] |
| if (color0 != prev_color0 || color1 != prev_color1) { |
| // Store raw colors for comparison with next block |
| prev_color0 = color0; |
| prev_color1 = color1; |
| |
| int bbits = bits >> 1; |
| bool has2 = ((bbits & ~bits) & 0x55555555) != 0; |
| bool has3 = ((bbits & bits) & 0x55555555) != 0; |
| |
| if (has2 || has3) { |
| int r0 = red(color0); |
| int g0 = green(color0); |
| int b0 = blue(color0); |
| |
| int r1 = red(color1); |
| int g1 = green(color1); |
| int b1 = blue(color1); |
| |
| int r2 = avg23(r0, r1); |
| int g2 = avg23(g0, g1); |
| int b2 = avg23(b0, b1); |
| |
| int r3 = avg23(r1, r0); |
| int g3 = avg23(g1, g0); |
| int b3 = avg23(b1, b0); |
| |
| c[0] = rgb565SepTo888(r0, g0, b0); |
| c[1] = rgb565SepTo888(r1, g1, b1); |
| c[2] = rgb565SepTo888(r2, g2, b2); |
| c[3] = rgb565SepTo888(r3, g3, b3); |
| } else { |
| // Convert to 8 bits |
| c[0] = rgb565To888(color0); |
| c[1] = rgb565To888(color1); |
| } |
| } |
| |
| uint32_t* blockRowPtr = blockPtr; |
| for (int y = 0; y < 4; y++, blockRowPtr += stride) { |
| // Don't process rows past the botom |
| if (base_y + y >= height) { |
| break; |
| } |
| |
| int w = min(width - base_x, 4); |
| for (int x = 0; x < w; x++) { |
| int a = alpha & 0xf; |
| alpha >>= 4; |
| |
| int code = bits & 0x3; |
| bits >>= 2; |
| |
| blockRowPtr[x] = c[code] | (a << 28) | (a << 24); |
| } |
| } |
| } |
| } |
| } |
| |
| // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE |
| static void |
| decodeDXT5(const GLvoid *data, int width, int height, |
| void *surface, int stride) |
| |
| { |
| init_tables(); |
| |
| uint32_t const *d32 = (uint32_t *)data; |
| |
| // Specified alphas from the previous block |
| uint8_t prev_alpha0 = 0x00; |
| uint8_t prev_alpha1 = 0x00; |
| |
| // Specified colors from the previous block |
| uint16_t prev_color0 = 0x0000; |
| uint16_t prev_color1 = 0x0000; |
| |
| // Alpha table for the current block |
| uint8_t a[8]; |
| a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = 0; |
| |
| // Color table for the current block |
| uint32_t c[4]; |
| c[0] = c[1] = c[2] = c[3] = 0; |
| |
| int good_a5 = 0; |
| int bad_a5 = 0; |
| int good_a6 = 0; |
| int bad_a6 = 0; |
| int good_a7 = 0; |
| int bad_a7 = 0; |
| |
| uint32_t* rowPtr = (uint32_t*)surface; |
| for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) { |
| uint32_t *blockPtr = rowPtr; |
| for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) { |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| uint32_t alphahi = *d32++; |
| uint32_t alphalo = *d32++; |
| alphahi = swap(alphahi); |
| alphalo = swap(alphalo); |
| #else |
| uint32_t alphalo = *d32++; |
| uint32_t alphahi = *d32++; |
| #endif |
| |
| uint32_t colors = *d32++; |
| uint32_t bits = *d32++; |
| |
| #if __BYTE_ORDER == __BIG_ENDIANx |
| colors = swap(colors); |
| bits = swap(bits); |
| #endif |
| |
| uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo; |
| uint64_t alpha0 = alpha & 0xff; |
| alpha >>= 8; |
| uint64_t alpha1 = alpha & 0xff; |
| alpha >>= 8; |
| |
| if (alpha0 != prev_alpha0 || alpha1 != prev_alpha1) { |
| prev_alpha0 = alpha0; |
| prev_alpha1 = alpha1; |
| |
| a[0] = alpha0; |
| a[1] = alpha1; |
| int a01 = alpha0 + alpha1 - 1; |
| if (alpha0 > alpha1) { |
| a[2] = div7(6*alpha0 + alpha1); |
| a[4] = div7(4*alpha0 + 3*alpha1); |
| a[6] = div7(2*alpha0 + 5*alpha1); |
| |
| // Use symmetry to derive half of the values |
| // A few values will be off by 1 (~.5%) |
| // Alternate which values are computed directly |
| // and which are derived to try to reduce bias |
| a[3] = a01 - a[6]; |
| a[5] = a01 - a[4]; |
| a[7] = a01 - a[2]; |
| } else { |
| a[2] = div5(4*alpha0 + alpha1); |
| a[4] = div5(2*alpha0 + 3*alpha1); |
| a[3] = a01 - a[4]; |
| a[5] = a01 - a[2]; |
| a[6] = 0x00; |
| a[7] = 0xff; |
| } |
| } |
| |
| // Raw colors |
| uint16_t color0 = colors & 0xffff; |
| uint16_t color1 = colors >> 16; |
| |
| // If the new block has the same base colors as the |
| // previous one, we don't need to recompute the color |
| // table c[] |
| if (color0 != prev_color0 || color1 != prev_color1) { |
| // Store raw colors for comparison with next block |
| prev_color0 = color0; |
| prev_color1 = color1; |
| |
| int bbits = bits >> 1; |
| bool has2 = ((bbits & ~bits) & 0x55555555) != 0; |
| bool has3 = ((bbits & bits) & 0x55555555) != 0; |
| |
| if (has2 || has3) { |
| int r0 = red(color0); |
| int g0 = green(color0); |
| int b0 = blue(color0); |
| |
| int r1 = red(color1); |
| int g1 = green(color1); |
| int b1 = blue(color1); |
| |
| int r2 = avg23(r0, r1); |
| int g2 = avg23(g0, g1); |
| int b2 = avg23(b0, b1); |
| |
| int r3 = avg23(r1, r0); |
| int g3 = avg23(g1, g0); |
| int b3 = avg23(b1, b0); |
| |
| c[0] = rgb565SepTo888(r0, g0, b0); |
| c[1] = rgb565SepTo888(r1, g1, b1); |
| c[2] = rgb565SepTo888(r2, g2, b2); |
| c[3] = rgb565SepTo888(r3, g3, b3); |
| } else { |
| // Convert to 8 bits |
| c[0] = rgb565To888(color0); |
| c[1] = rgb565To888(color1); |
| } |
| } |
| |
| uint32_t* blockRowPtr = blockPtr; |
| for (int y = 0; y < 4; y++, blockRowPtr += stride) { |
| // Don't process rows past the botom |
| if (base_y + y >= height) { |
| break; |
| } |
| |
| int w = min(width - base_x, 4); |
| for (int x = 0; x < w; x++) { |
| int acode = alpha & 0x7; |
| alpha >>= 3; |
| |
| int code = bits & 0x3; |
| bits >>= 2; |
| |
| blockRowPtr[x] = c[code] | (a[acode] << 24); |
| } |
| } |
| } |
| } |
| } |
| |
| /* |
| * Decode a DXT-compressed texture into memory. DXT textures consist of |
| * a series of 4x4 pixel blocks in left-to-right, top-down order. |
| * The number of blocks is given by ceil(width/4)*ceil(height/4). |
| * |
| * 'data' points to the texture data. 'width' and 'height' indicate the |
| * dimensions of the texture. We assume width and height are >= 0 but |
| * do not require them to be powers of 2 or divisible by any factor. |
| * |
| * The output is written to 'surface' with each scanline separated by |
| * 'stride' 2- or 4-byte words. |
| * |
| * 'format' indicates the type of compression and must be one of the following: |
| * |
| * GL_COMPRESSED_RGB_S3TC_DXT1_EXT: |
| * The output is written as 5/6/5 opaque RGB (16 bit words). |
| * 8 bytes are read from 'data' for each block. |
| * |
| * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT |
| * The output is written as 5/5/5/1 RGBA (16 bit words) |
| * 8 bytes are read from 'data' for each block. |
| * |
| * GL_COMPRESSED_RGBA_S3TC_DXT3_EXT |
| * GL_COMPRESSED_RGBA_S3TC_DXT5_EXT |
| * The output is written as 8/8/8/8 ARGB (32 bit words) |
| * 16 bytes are read from 'data' for each block. |
| */ |
| void |
| decodeDXT(const GLvoid *data, int width, int height, |
| void *surface, int stride, int format) |
| { |
| #if TIMING |
| struct timeval start_t, end_t; |
| struct timezone tz; |
| |
| gettimeofday(&start_t, &tz); |
| #endif |
| |
| switch (format) { |
| case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: |
| decodeDXT1(data, width, height, surface, stride, false); |
| break; |
| |
| case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: |
| decodeDXT1(data, width, height, surface, stride, true); |
| break; |
| |
| case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: |
| decodeDXT3(data, width, height, surface, stride); |
| break; |
| |
| case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: |
| decodeDXT5(data, width, height, surface, stride); |
| break; |
| } |
| |
| #if TIMING |
| gettimeofday(&end_t, &tz); |
| long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 + |
| (end_t.tv_usec - start_t.tv_usec); |
| |
| printf("Loaded w=%d h=%d in %ld usec\n", width, height, usec); |
| #endif |
| } |
| |
| } // namespace android |