Optional use of UTF-8 strings in resource bundles

Allows the use of UTF-8 for packing resources instead of the
default of UTF-16 for Java. When strings are extracted from the
ResStringPool, they are converted to UTF-16 and the result is
cached for subsequent calls.

When using aapt to package, add in the "-8" switch to pack the
resources using UTF-8. This will result in the value, key, and
type strings as well as the compiled XML string values taking
significantly less space in the final application package in
most scenarios.

Change-Id: I129483f8b3d3b1c5869dced05cb525e494a6c83a
diff --git a/tools/aapt/Bundle.h b/tools/aapt/Bundle.h
index 1ac13f2..cf70121 100644
--- a/tools/aapt/Bundle.h
+++ b/tools/aapt/Bundle.h
@@ -37,7 +37,7 @@
           mForce(false), mGrayscaleTolerance(0), mMakePackageDirs(false),
           mUpdate(false), mExtending(false),
           mRequireLocalization(false), mPseudolocalize(false),
-          mValues(false),
+          mUTF8(false), mValues(false),
           mCompressionMethod(0), mOutputAPKFile(NULL),
           mAssetSourceDir(NULL), mProguardFile(NULL),
           mAndroidManifestFile(NULL), mPublicOutputFile(NULL),
@@ -76,6 +76,8 @@
     void setRequireLocalization(bool val) { mRequireLocalization = val; }
     bool getPseudolocalize(void) const { return mPseudolocalize; }
     void setPseudolocalize(bool val) { mPseudolocalize = val; }
+    bool getUTF8(void) const { return mUTF8; }
+    void setUTF8(bool val) { mUTF8 = val; }
     bool getValues(void) const { return mValues; }
     void setValues(bool val) { mValues = val; }
     int getCompressionMethod(void) const { return mCompressionMethod; }
@@ -161,6 +163,7 @@
     bool        mExtending;
     bool        mRequireLocalization;
     bool        mPseudolocalize;
+    bool        mUTF8;
     bool        mValues;
     int         mCompressionMethod;
     bool        mJunkPath;
diff --git a/tools/aapt/Command.cpp b/tools/aapt/Command.cpp
index 1a536d6..ff9cc11 100644
--- a/tools/aapt/Command.cpp
+++ b/tools/aapt/Command.cpp
@@ -412,6 +412,7 @@
             }
             tree.restart();
             printXMLBlock(&tree);
+            tree.uninit();
             delete asset;
             asset = NULL;
         }
diff --git a/tools/aapt/Main.cpp b/tools/aapt/Main.cpp
index 98286c0..bd03b74 100644
--- a/tools/aapt/Main.cpp
+++ b/tools/aapt/Main.cpp
@@ -118,6 +118,7 @@
         "   -P  specify where to output public resource definitions\n"
         "   -S  directory in which to find resources.  Multiple directories will be scanned"
         "       and the first match found (left to right) will take precedence."
+        "   -8  Encode string resources in UTF-8.\n"
         "   -0  specifies an additional extension for which such files will not\n"
         "       be stored compressed in the .apk.  An empty string means to not\n"
         "       compress any files at all.\n"
@@ -370,6 +371,9 @@
                     bundle.setCompressionMethod(ZipEntry::kCompressStored);
                 }
                 break;
+            case '8':
+                bundle.setUTF8(true);
+                break;
             case '-':
                 if (strcmp(cp, "-min-sdk-version") == 0) {
                     argc--;
diff --git a/tools/aapt/Resource.cpp b/tools/aapt/Resource.cpp
index fdcada4..d04a873 100644
--- a/tools/aapt/Resource.cpp
+++ b/tools/aapt/Resource.cpp
@@ -613,6 +613,12 @@
 
     NOISY(printf("Found %d included resource packages\n", (int)table.size()));
 
+    // Standard flags for compiled XML and optional UTF-8 encoding
+    int xmlFlags = XML_COMPILE_STANDARD_RESOURCE;
+    if (bundle->getUTF8()) {
+        xmlFlags |= XML_COMPILE_UTF8;
+    }
+
     // --------------------------------------------------------------
     // First, gather all resource information.
     // --------------------------------------------------------------
@@ -763,7 +769,7 @@
         ResourceDirIterator it(layouts, String8("layout"));
         while ((err=it.next()) == NO_ERROR) {
             String8 src = it.getFile()->getPrintableSource();
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err == NO_ERROR) {
                 ResXMLTree block;
                 block.setTo(it.getFile()->getData(), it.getFile()->getSize(), true);
@@ -782,7 +788,7 @@
     if (anims != NULL) {
         ResourceDirIterator it(anims, String8("anim"));
         while ((err=it.next()) == NO_ERROR) {
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
@@ -797,7 +803,7 @@
     if (xmls != NULL) {
         ResourceDirIterator it(xmls, String8("xml"));
         while ((err=it.next()) == NO_ERROR) {
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
@@ -819,7 +825,7 @@
     if (colors != NULL) {
         ResourceDirIterator it(colors, String8("color"));
         while ((err=it.next()) == NO_ERROR) {
-          err = compileXmlFile(assets, it.getFile(), &table);
+          err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
@@ -835,7 +841,7 @@
         ResourceDirIterator it(menus, String8("menu"));
         while ((err=it.next()) == NO_ERROR) {
             String8 src = it.getFile()->getPrintableSource();
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
diff --git a/tools/aapt/ResourceTable.cpp b/tools/aapt/ResourceTable.cpp
index 19b9b01..a9cbd11 100644
--- a/tools/aapt/ResourceTable.cpp
+++ b/tools/aapt/ResourceTable.cpp
@@ -39,6 +39,10 @@
         root->removeWhitespace(false, NULL);
     }
 
+    if ((options&XML_COMPILE_UTF8) != 0) {
+        root->setUTF8(true);
+    }
+
     bool hasErrors = false;
     
     if ((options&XML_COMPILE_ASSIGN_ATTRIBUTE_IDS) != 0) {
@@ -2505,7 +2509,7 @@
 
     // Iterate through all data, collecting all values (strings,
     // references, etc).
-    StringPool valueStrings;
+    StringPool valueStrings = StringPool(false, bundle->getUTF8());
     for (pi=0; pi<N; pi++) {
         sp<Package> p = mOrderedPackages.itemAt(pi);
         if (p->getTypes().size() == 0) {
@@ -2513,8 +2517,8 @@
             continue;
         }
 
-        StringPool typeStrings;
-        StringPool keyStrings;
+        StringPool typeStrings = StringPool(false, bundle->getUTF8());
+        StringPool keyStrings = StringPool(false, bundle->getUTF8());
 
         const size_t N = p->getOrderedTypes().size();
         for (size_t ti=0; ti<N; ti++) {
diff --git a/tools/aapt/ResourceTable.h b/tools/aapt/ResourceTable.h
index caa01b3..cfa75a71 100644
--- a/tools/aapt/ResourceTable.h
+++ b/tools/aapt/ResourceTable.h
@@ -24,6 +24,7 @@
     XML_COMPILE_COMPACT_WHITESPACE = 1<<2,
     XML_COMPILE_STRIP_WHITESPACE = 1<<3,
     XML_COMPILE_STRIP_RAW_VALUES = 1<<4,
+    XML_COMPILE_UTF8 = 1<<5,
     
     XML_COMPILE_STANDARD_RESOURCE =
             XML_COMPILE_STRIP_COMMENTS | XML_COMPILE_ASSIGN_ATTRIBUTE_IDS
diff --git a/tools/aapt/StringPool.cpp b/tools/aapt/StringPool.cpp
index 715170a..ec58591 100644
--- a/tools/aapt/StringPool.cpp
+++ b/tools/aapt/StringPool.cpp
@@ -30,8 +30,8 @@
     }
 }
 
-StringPool::StringPool(bool sorted)
-    : mSorted(sorted), mValues(-1), mIdents(-1)
+StringPool::StringPool(bool sorted, bool utf8)
+    : mSorted(sorted), mUTF8(utf8), mValues(-1), mIdents(-1)
 {
 }
 
@@ -165,6 +165,16 @@
     return err == NO_ERROR ? pool : NULL;
 }
 
+#define ENCODE_LENGTH(str, chrsz, strSize) \
+{ \
+    size_t maxMask = 1 << ((chrsz*8)-1); \
+    size_t maxSize = maxMask-1; \
+    if (strSize > maxSize) { \
+        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
+    } \
+    *str++ = strSize; \
+}
+
 status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
 {
     // Allow appending.  Sorry this is a little wacky.
@@ -213,28 +223,53 @@
         return NO_MEMORY;
     }
 
+    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t);
+
     size_t strPos = 0;
     for (i=0; i<STRINGS; i++) {
         entry& ent = mEntries.editItemAt(i);
         const size_t strSize = (ent.value.size());
-        const size_t lenSize = strSize > 0x7fff ? sizeof(uint32_t) : sizeof(uint16_t);
-        const size_t totalSize = lenSize + ((strSize+1)*sizeof(uint16_t));
+        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
+            charSize*2 : charSize;
+
+        String8 encStr;
+        if (mUTF8) {
+            encStr = String8(ent.value);
+        }
+
+        const size_t encSize = mUTF8 ? encStr.size() : 0;
+        const size_t encLenSize = mUTF8 ?
+            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
+                charSize*2 : charSize) : 0;
 
         ent.offset = strPos;
-        uint16_t* dat = (uint16_t*)pool->editData(preSize + strPos + totalSize);
+
+        const size_t totalSize = lenSize + encLenSize +
+            ((mUTF8 ? encSize : strSize)+1)*charSize;
+
+        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
         if (dat == NULL) {
             fprintf(stderr, "ERROR: Out of memory for string pool\n");
             return NO_MEMORY;
         }
-        dat += (preSize+strPos)/sizeof(uint16_t);
-        if (lenSize > sizeof(uint16_t)) {
-            *dat = htods(0x8000 | ((strSize>>16)&0x7fff));
-            dat++;
-        }
-        *dat++ = htods(strSize);
-        strcpy16_htod(dat, ent.value);
+        dat = (uint8_t*)dat + preSize + strPos;
+        if (mUTF8) {
+            uint8_t* strings = (uint8_t*)dat;
 
-        strPos += lenSize + (strSize+1)*sizeof(uint16_t);
+            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
+
+            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
+
+            strncpy((char*)strings, encStr, encSize+1);
+        } else {
+            uint16_t* strings = (uint16_t*)dat;
+
+            ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
+
+            strcpy16_htod(strings, ent.value);
+        }
+
+        strPos += totalSize;
     }
 
     // Pad ending string position up to a uint32_t boundary.
@@ -312,6 +347,9 @@
     if (mSorted) {
         header->flags |= htodl(ResStringPool_header::SORTED_FLAG);
     }
+    if (mUTF8) {
+        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
+    }
     header->stringsStart = htodl(preSize);
     header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
 
diff --git a/tools/aapt/StringPool.h b/tools/aapt/StringPool.h
index 9082b37..7275259 100644
--- a/tools/aapt/StringPool.h
+++ b/tools/aapt/StringPool.h
@@ -68,8 +68,11 @@
      * lookup with ResStringPool::indexOfString() (O(log n)), at the expense
      * of support for styled string entries (which requires the same string
      * be included multiple times in the pool).
+     *
+     * If 'utf8' is true, strings will be encoded with UTF-8 instead of
+     * left in Java's native UTF-16.
      */
-    explicit StringPool(bool sorted = false);
+    explicit StringPool(bool sorted = false, bool utf8 = false);
 
     /**
      * Add a new string to the pool.  If mergeDuplicates is true, thenif
@@ -123,6 +126,7 @@
 
 private:
     const bool                              mSorted;
+    const bool                              mUTF8;
     // Raw array of unique strings, in some arbitrary order.
     Vector<entry>                           mEntries;
     // Array of indices into mEntries, in the order they were
diff --git a/tools/aapt/XMLNode.cpp b/tools/aapt/XMLNode.cpp
index d4d2a45c..036dde4 100644
--- a/tools/aapt/XMLNode.cpp
+++ b/tools/aapt/XMLNode.cpp
@@ -478,6 +478,7 @@
     , mFilename(filename)
     , mStartLineNumber(0)
     , mEndLineNumber(0)
+    , mUTF8(false)
 {
     if (isNamespace) {
         mNamespacePrefix = s1;
@@ -837,7 +838,7 @@
 status_t XMLNode::flatten(const sp<AaptFile>& dest,
         bool stripComments, bool stripRawValues) const
 {
-    StringPool strings;
+    StringPool strings = StringPool(false, mUTF8);
     Vector<uint32_t> resids;
     
     // First collect just the strings for attribute names that have a
diff --git a/tools/aapt/XMLNode.h b/tools/aapt/XMLNode.h
index a9bea43..dc92fa7 100644
--- a/tools/aapt/XMLNode.h
+++ b/tools/aapt/XMLNode.h
@@ -124,6 +124,8 @@
 
     void removeWhitespace(bool stripAll=true, const char** cDataTags=NULL);
 
+    void setUTF8(bool val) { mUTF8 = val; }
+
     status_t parseValues(const sp<AaptAssets>& assets, ResourceTable* table);
 
     status_t assignResourceIds(const sp<AaptAssets>& assets,
@@ -189,6 +191,9 @@
     String8 mFilename;
     int32_t mStartLineNumber;
     int32_t mEndLineNumber;
+
+    // Encode compiled XML with UTF-8 StringPools?
+    bool mUTF8;
 };
 
 #endif