Add support for DXT3 and DXT5

TEST=none
BUG=none
Review URL: http://codereview.appspot.com/4809068

git-svn-id: https://angleproject.googlecode.com/svn/trunk@721 736b8ea6-26fd-11df-bfd4-992fa37f6226
diff --git a/src/libGLESv2/Texture.cpp b/src/libGLESv2/Texture.cpp
index 0187bcf..e143535 100644
--- a/src/libGLESv2/Texture.cpp
+++ b/src/libGLESv2/Texture.cpp
@@ -49,6 +49,8 @@
       case D3DFMT_L8:
       case D3DFMT_A8L8:
       case D3DFMT_DXT1:
+      case D3DFMT_DXT3:
+      case D3DFMT_DXT5:
         return false;
       case D3DFMT_A8R8G8B8:
       case D3DFMT_X8R8G8B8:
@@ -69,6 +71,14 @@
     {
         return D3DFMT_DXT1;
     }
+    else if (type == GL_COMPRESSED_RGBA_S3TC_DXT3_ANGLE)
+    {
+        return D3DFMT_DXT3;
+    }
+    else if (type == GL_COMPRESSED_RGBA_S3TC_DXT5_ANGLE)
+    {
+        return D3DFMT_DXT5;
+    }
     else if (type == GL_FLOAT)
     {
         return D3DFMT_A32B32G32R32F;
@@ -767,6 +777,132 @@
 }
 
 void Texture::loadCompressedImageData(GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
+                                      int inputPitch, const void *input, size_t outputPitch, void *output) const {
+    switch (getD3DFormat())
+    {
+        case D3DFMT_DXT1:
+          loadDXT1ImageData(xoffset, yoffset, width, height, inputPitch, input, outputPitch, output);
+          break;
+        case D3DFMT_DXT3:
+          loadDXT3ImageData(xoffset, yoffset, width, height, inputPitch, input, outputPitch, output);
+          break;
+        case D3DFMT_DXT5:
+          loadDXT5ImageData(xoffset, yoffset, width, height, inputPitch, input, outputPitch, output);
+          break;
+    }
+}
+
+static void FlipCopyDXT1BlockFull(const unsigned int* source, unsigned int* dest) {
+  // A DXT1 block layout is:
+  // [0-1] color0.
+  // [2-3] color1.
+  // [4-7] color bitmap, 2 bits per pixel.
+  // So each of the 4-7 bytes represents one line, flipping a block is just
+  // flipping those bytes.
+
+  // First 32-bits is two RGB565 colors shared by tile and does not need to be modified.
+  dest[0] = source[0];
+
+  // Second 32-bits contains 4 rows of 4 2-bit interpolants between the colors. All rows should be flipped.
+  dest[1] = (source[1] >> 24) |
+            ((source[1] << 8) & 0x00FF0000) |
+            ((source[1] >> 8) & 0x0000FF00) |
+            (source[1] << 24);
+}
+
+// Flips the first 2 lines of a DXT1 block in the y direction.
+static void FlipCopyDXT1BlockHalf(const unsigned int* source, unsigned int* dest) {
+  // See layout above.
+  dest[0] = source[0];
+  dest[1] = ((source[1] << 8) & 0x0000FF00) |
+            ((source[1] >> 8) & 0x000000FF);
+}
+
+// Flips a full DXT3 block in the y direction.
+static void FlipCopyDXT3BlockFull(const unsigned int* source, unsigned int* dest) {
+  // A DXT3 block layout is:
+  // [0-7]  alpha bitmap, 4 bits per pixel.
+  // [8-15] a DXT1 block.
+
+  // First and Second 32 bits are 4bit per pixel alpha and need to be flipped.
+  dest[0] = (source[1] >> 16) | (source[1] << 16);
+  dest[1] = (source[0] >> 16) | (source[0] << 16);
+
+  // And flip the DXT1 block using the above function.
+  FlipCopyDXT1BlockFull(source + 2, dest + 2);
+}
+
+// Flips the first 2 lines of a DXT3 block in the y direction.
+static void FlipCopyDXT3BlockHalf(const unsigned int* source, unsigned int* dest) {
+  // See layout above.
+  dest[0] = (source[1] >> 16) | (source[1] << 16);
+  FlipCopyDXT1BlockHalf(source + 2, dest + 2);
+}
+
+// Flips a full DXT5 block in the y direction.
+static void FlipCopyDXT5BlockFull(const unsigned int* source, unsigned int* dest) {
+  // A DXT5 block layout is:
+  // [0]    alpha0.
+  // [1]    alpha1.
+  // [2-7]  alpha bitmap, 3 bits per pixel.
+  // [8-15] a DXT1 block.
+
+  // The alpha bitmap doesn't easily map lines to bytes, so we have to
+  // interpret it correctly.  Extracted from
+  // http://www.opengl.org/registry/specs/EXT/texture_compression_s3tc.txt :
+  //
+  //   The 6 "bits" bytes of the block are decoded into one 48-bit integer:
+  //
+  //     bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 +
+  //                   256 * (bits_4 + 256 * bits_5))))
+  //
+  //   bits is a 48-bit unsigned integer, from which a three-bit control code
+  //   is extracted for a texel at location (x,y) in the block using:
+  //
+  //       code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0]
+  //
+  //   where bit 47 is the most significant and bit 0 is the least
+  //   significant bit.
+  const unsigned char* sourceBytes = static_cast<const unsigned char*>(static_cast<const void*>(source));
+  unsigned char* destBytes = static_cast<unsigned char*>(static_cast<void*>(dest));
+  unsigned int line_0_1 = sourceBytes[2] + 256 * (sourceBytes[3] + 256 * sourceBytes[4]);
+  unsigned int line_2_3 = sourceBytes[5] + 256 * (sourceBytes[6] + 256 * sourceBytes[7]);
+  // swap lines 0 and 1 in line_0_1.
+  unsigned int line_1_0 = ((line_0_1 & 0x000fff) << 12) |
+                          ((line_0_1 & 0xfff000) >> 12);
+  // swap lines 2 and 3 in line_2_3.
+  unsigned int line_3_2 = ((line_2_3 & 0x000fff) << 12) |
+                          ((line_2_3 & 0xfff000) >> 12);
+  destBytes[0] = sourceBytes[0];
+  destBytes[1] = sourceBytes[1];
+  destBytes[2] = line_3_2 & 0xff;
+  destBytes[3] = (line_3_2 & 0xff00) >> 8;
+  destBytes[4] = (line_3_2 & 0xff0000) >> 8;
+  destBytes[5] = line_1_0 & 0xff;
+  destBytes[6] = (line_1_0 & 0xff00) >> 8;
+  destBytes[7] = (line_1_0 & 0xff0000) >> 8;
+
+  // And flip the DXT1 block using the above function.
+  FlipCopyDXT1BlockFull(source + 2, dest + 2);
+}
+
+// Flips the first 2 lines of a DXT5 block in the y direction.
+static void FlipCopyDXT5BlockHalf(const unsigned int* source, unsigned int* dest) {
+  // See layout above.
+  const unsigned char* sourceBytes = static_cast<const unsigned char*>(static_cast<const void*>(source));
+  unsigned char* destBytes = static_cast<unsigned char*>(static_cast<void*>(dest));
+  unsigned int line_0_1 = sourceBytes[2] + 256 * (sourceBytes[3] + 256 * sourceBytes[4]);
+  unsigned int line_1_0 = ((line_0_1 & 0x000fff) << 12) |
+                          ((line_0_1 & 0xfff000) >> 12);
+  destBytes[0] = sourceBytes[0];
+  destBytes[1] = sourceBytes[1];
+  destBytes[2] = line_1_0 & 0xff;
+  destBytes[3] = (line_1_0 & 0xff00) >> 8;
+  destBytes[4] = (line_1_0 & 0xff0000) >> 8;
+  FlipCopyDXT1BlockHalf(source + 2, dest + 2);
+}
+
+void Texture::loadDXT1ImageData(GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
                                       int inputPitch, const void *input, size_t outputPitch, void *output) const
 {
     ASSERT(xoffset % 4 == 0);
@@ -778,29 +914,24 @@
     const unsigned int *source = reinterpret_cast<const unsigned int*>(input);
     unsigned int *dest = reinterpret_cast<unsigned int*>(output);
 
+    // Round width up in case it is less than 4.
+    int blocksAcross = (width + 3) / 4;
+    int intsAcross = blocksAcross * 2;
+
     switch (height)
     {
         case 1:
-            // Round width up in case it is 1.
-            for (int x = 0; x < (width + 1) / 2; x += 2)
+            for (int x = 0; x < intsAcross; x += 2)
             {
-                // First 32-bits is two RGB565 colors shared by tile and does not need to be modified.
+                // just copy the block
                 dest[x] = source[x];
-
-                // Second 32-bits contains 4 rows of 4 2-bit interpolants between the colors, the last 3 rows being unused. No flipping should occur.
                 dest[x + 1] = source[x + 1];
             }
             break;
         case 2:
-            // Round width up in case it is 1.
-            for (int x = 0; x < (width + 1) / 2; x += 2)
+            for (int x = 0; x < intsAcross; x += 2)
             {
-                // First 32-bits is two RGB565 colors shared by tile and does not need to be modified.
-                dest[x] = source[x];
-
-                // Second 32-bits contains 4 rows of 4 2-bit interpolants between the colors, the last 2 rows being unused. Only the top 2 rows should be flipped.
-                dest[x + 1] = ((source[x + 1] << 8) & 0x0000FF00) |
-                              ((source[x + 1] >> 8) & 0x000000FF);       
+                FlipCopyDXT1BlockHalf(source + x, dest + x);
             }
             break;
         default:
@@ -810,17 +941,109 @@
                 const unsigned int *source = reinterpret_cast<const unsigned int*>(static_cast<const unsigned char*>(input) + y * inputPitch);
                 unsigned int *dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + (y + yoffset) * outputPitch + xoffset * 8);
 
-                // Round width up in case it is 1.
-                for (int x = 0; x < (width + 1) / 2; x += 2)
+                for (int x = 0; x < intsAcross; x += 2)
                 {
-                    // First 32-bits is two RGB565 colors shared by tile and does not need to be modified.
-                    dest[x] = source[x];
+                    FlipCopyDXT1BlockFull(source + x, dest + x);
+                }
+            }
+            break;
+    }
+}
 
-                    // Second 32-bits contains 4 rows of 4 2-bit interpolants between the colors. All rows should be flipped.
-                    dest[x + 1] = (source[x + 1] >> 24) | 
-                                  ((source[x + 1] << 8) & 0x00FF0000) |
-                                  ((source[x + 1] >> 8) & 0x0000FF00) |
-                                  (source[x + 1] << 24);                    
+void Texture::loadDXT3ImageData(GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
+                                int inputPitch, const void *input, size_t outputPitch, void *output) const
+{
+    ASSERT(xoffset % 4 == 0);
+    ASSERT(yoffset % 4 == 0);
+    ASSERT(width % 4 == 0 || width == 2 || width == 1);
+    ASSERT(inputPitch % 16 == 0);
+    ASSERT(outputPitch % 16 == 0);
+
+    const unsigned int *source = reinterpret_cast<const unsigned int*>(input);
+    unsigned int *dest = reinterpret_cast<unsigned int*>(output);
+
+    // Round width up in case it is less than 4.
+    int blocksAcross = (width + 3) / 4;
+    int intsAcross = blocksAcross * 4;
+
+    switch (height)
+    {
+        case 1:
+            for (int x = 0; x < intsAcross; x += 4)
+            {
+                // just copy the block
+                dest[x] = source[x];
+                dest[x + 1] = source[x + 1];
+                dest[x + 2] = source[x + 2];
+                dest[x + 3] = source[x + 3];
+            }
+            break;
+        case 2:
+            for (int x = 0; x < intsAcross; x += 4)
+            {
+                FlipCopyDXT3BlockHalf(source + x, dest + x);
+            }
+            break;
+        default:
+            ASSERT(height % 4 == 0);
+            for (int y = 0; y < height / 4; ++y)
+            {
+                const unsigned int *source = reinterpret_cast<const unsigned int*>(static_cast<const unsigned char*>(input) + y * inputPitch);
+                unsigned int *dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + (y + yoffset) * outputPitch + xoffset * 16);
+
+                for (int x = 0; x < intsAcross; x += 4)
+                {
+                  FlipCopyDXT3BlockFull(source + x, dest + x);
+                }
+            }
+            break;
+    }
+}
+
+void Texture::loadDXT5ImageData(GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
+                                int inputPitch, const void *input, size_t outputPitch, void *output) const
+{
+    ASSERT(xoffset % 4 == 0);
+    ASSERT(yoffset % 4 == 0);
+    ASSERT(width % 4 == 0 || width == 2 || width == 1);
+    ASSERT(inputPitch % 16 == 0);
+    ASSERT(outputPitch % 16 == 0);
+
+    const unsigned int *source = reinterpret_cast<const unsigned int*>(input);
+    unsigned int *dest = reinterpret_cast<unsigned int*>(output);
+
+    // Round width up in case it is less than 4.
+    int blocksAcross = (width + 3) / 4;
+    int intsAcross = blocksAcross * 4;
+
+    switch (height)
+    {
+        case 1:
+            for (int x = 0; x < intsAcross; x += 4)
+            {
+                // just copy the block
+                dest[x] = source[x];
+                dest[x + 1] = source[x + 1];
+                dest[x + 2] = source[x + 2];
+                dest[x + 3] = source[x + 3];
+            }
+            break;
+        case 2:
+            for (int x = 0; x < intsAcross; x += 4)
+            {
+                FlipCopyDXT5BlockHalf(source + x, dest + x);
+            }
+            break;
+        default:
+            ASSERT(height % 4 == 0);
+            for (int y = 0; y < height; ++y)
+            {
+                const unsigned int *source = reinterpret_cast<const unsigned int*>(static_cast<const unsigned char*>(input) + y * inputPitch);
+                unsigned int *dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + (y + yoffset) * outputPitch + xoffset * 16);
+
+                for (int x = 0; x < intsAcross; x += 4)
+                {
+                    FlipCopyDXT5BlockFull(source + x, dest + x);
                 }
             }
             break;