Optional use of UTF-8 strings in resource bundles

Allows the use of UTF-8 for packing resources instead of the
default of UTF-16 for Java. When strings are extracted from the
ResStringPool, they are converted to UTF-16 and the result is
cached for subsequent calls.

When using aapt to package, add in the "-8" switch to pack the
resources using UTF-8. This will result in the value, key, and
type strings as well as the compiled XML string values taking
significantly less space in the final application package in
most scenarios.

Change-Id: I129483f8b3d3b1c5869dced05cb525e494a6c83a
diff --git a/tools/aapt/StringPool.cpp b/tools/aapt/StringPool.cpp
index 715170a..ec58591 100644
--- a/tools/aapt/StringPool.cpp
+++ b/tools/aapt/StringPool.cpp
@@ -30,8 +30,8 @@
     }
 }
 
-StringPool::StringPool(bool sorted)
-    : mSorted(sorted), mValues(-1), mIdents(-1)
+StringPool::StringPool(bool sorted, bool utf8)
+    : mSorted(sorted), mUTF8(utf8), mValues(-1), mIdents(-1)
 {
 }
 
@@ -165,6 +165,16 @@
     return err == NO_ERROR ? pool : NULL;
 }
 
+#define ENCODE_LENGTH(str, chrsz, strSize) \
+{ \
+    size_t maxMask = 1 << ((chrsz*8)-1); \
+    size_t maxSize = maxMask-1; \
+    if (strSize > maxSize) { \
+        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
+    } \
+    *str++ = strSize; \
+}
+
 status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
 {
     // Allow appending.  Sorry this is a little wacky.
@@ -213,28 +223,53 @@
         return NO_MEMORY;
     }
 
+    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t);
+
     size_t strPos = 0;
     for (i=0; i<STRINGS; i++) {
         entry& ent = mEntries.editItemAt(i);
         const size_t strSize = (ent.value.size());
-        const size_t lenSize = strSize > 0x7fff ? sizeof(uint32_t) : sizeof(uint16_t);
-        const size_t totalSize = lenSize + ((strSize+1)*sizeof(uint16_t));
+        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
+            charSize*2 : charSize;
+
+        String8 encStr;
+        if (mUTF8) {
+            encStr = String8(ent.value);
+        }
+
+        const size_t encSize = mUTF8 ? encStr.size() : 0;
+        const size_t encLenSize = mUTF8 ?
+            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
+                charSize*2 : charSize) : 0;
 
         ent.offset = strPos;
-        uint16_t* dat = (uint16_t*)pool->editData(preSize + strPos + totalSize);
+
+        const size_t totalSize = lenSize + encLenSize +
+            ((mUTF8 ? encSize : strSize)+1)*charSize;
+
+        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
         if (dat == NULL) {
             fprintf(stderr, "ERROR: Out of memory for string pool\n");
             return NO_MEMORY;
         }
-        dat += (preSize+strPos)/sizeof(uint16_t);
-        if (lenSize > sizeof(uint16_t)) {
-            *dat = htods(0x8000 | ((strSize>>16)&0x7fff));
-            dat++;
-        }
-        *dat++ = htods(strSize);
-        strcpy16_htod(dat, ent.value);
+        dat = (uint8_t*)dat + preSize + strPos;
+        if (mUTF8) {
+            uint8_t* strings = (uint8_t*)dat;
 
-        strPos += lenSize + (strSize+1)*sizeof(uint16_t);
+            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
+
+            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
+
+            strncpy((char*)strings, encStr, encSize+1);
+        } else {
+            uint16_t* strings = (uint16_t*)dat;
+
+            ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
+
+            strcpy16_htod(strings, ent.value);
+        }
+
+        strPos += totalSize;
     }
 
     // Pad ending string position up to a uint32_t boundary.
@@ -312,6 +347,9 @@
     if (mSorted) {
         header->flags |= htodl(ResStringPool_header::SORTED_FLAG);
     }
+    if (mUTF8) {
+        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
+    }
     header->stringsStart = htodl(preSize);
     header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);