am e6c0e993: Merge change I129483f8 into eclair-mr2

Merge commit 'e6c0e99334bc2f47e5d36db253ac8f166047c03b' into eclair-mr2-plus-aosp

* commit 'e6c0e99334bc2f47e5d36db253ac8f166047c03b':
  Optional use of UTF-8 strings in resource bundles
diff --git a/include/utils/ResourceTypes.h b/include/utils/ResourceTypes.h
index 49145e8..a845908 100644
--- a/include/utils/ResourceTypes.h
+++ b/include/utils/ResourceTypes.h
@@ -393,7 +393,10 @@
     enum {
         // If set, the string index is sorted by the string values (based
         // on strcmp16()).
-        SORTED_FLAG = 1<<0
+        SORTED_FLAG = 1<<0,
+
+        // String pool is encoded in UTF-8
+        UTF8_FLAG = 1<<8
     };
     uint32_t flags;
 
@@ -456,9 +459,11 @@
     void*                       mOwnedData;
     const ResStringPool_header* mHeader;
     size_t                      mSize;
+    mutable Mutex               mDecodeLock;
     const uint32_t*             mEntries;
     const uint32_t*             mEntryStyles;
-    const char16_t*             mStrings;
+    const void*                 mStrings;
+    char16_t**                  mCache;
     uint32_t                    mStringPoolSize;    // number of uint16_t
     const uint32_t*             mStyles;
     uint32_t                    mStylePoolSize;    // number of uint32_t
diff --git a/include/utils/String16.h b/include/utils/String16.h
index a2d22ee..07a0c11 100644
--- a/include/utils/String16.h
+++ b/include/utils/String16.h
@@ -49,12 +49,17 @@
 // Version of strzcmp16 for comparing strings in different endianness.
 int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2);
 
+// Convert UTF-8 to UTF-16 including surrogate pairs
+void utf8_to_utf16(const uint8_t *src, size_t srcLen, char16_t* dst, const size_t dstLen);
+
 }
 
 // ---------------------------------------------------------------------------
 
 namespace android {
 
+// ---------------------------------------------------------------------------
+
 class String8;
 class TextOutput;
 
diff --git a/include/utils/String8.h b/include/utils/String8.h
index ecc5774..c4b18a4 100644
--- a/include/utils/String8.h
+++ b/include/utils/String8.h
@@ -60,6 +60,11 @@
 /*
  * Returns the UTF-8 length of "src".
  */
+size_t utf8_length_from_utf16(const char16_t *src, size_t src_len);
+
+/*
+ * Returns the UTF-8 length of "src".
+ */
 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len);
 
 /*
@@ -120,6 +125,9 @@
 size_t utf32_to_utf8(const char32_t* src, size_t src_len,
                      char* dst, size_t dst_len);
 
+size_t utf16_to_utf8(const char16_t* src, size_t src_len,
+                     char* dst, size_t dst_len);
+
 }
 
 // ---------------------------------------------------------------------------
diff --git a/libs/utils/ResourceTypes.cpp b/libs/utils/ResourceTypes.cpp
index 450af8d..afca814 100644
--- a/libs/utils/ResourceTypes.cpp
+++ b/libs/utils/ResourceTypes.cpp
@@ -229,12 +229,12 @@
 // --------------------------------------------------------------------
 
 ResStringPool::ResStringPool()
-    : mError(NO_INIT), mOwnedData(NULL)
+    : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
 {
 }
 
 ResStringPool::ResStringPool(const void* data, size_t size, bool copyData)
-    : mError(NO_INIT), mOwnedData(NULL)
+    : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
 {
     setTo(data, size, copyData);
 }
@@ -296,7 +296,17 @@
                     (int)size);
             return (mError=BAD_TYPE);
         }
-        mStrings = (const char16_t*)
+
+        size_t charSize;
+        if (mHeader->flags&ResStringPool_header::UTF8_FLAG) {
+            charSize = sizeof(uint8_t);
+            mCache = (char16_t**)malloc(sizeof(char16_t**)*mHeader->stringCount);
+            memset(mCache, 0, sizeof(char16_t**)*mHeader->stringCount);
+        } else {
+            charSize = sizeof(char16_t);
+        }
+
+        mStrings = (const void*)
             (((const uint8_t*)data)+mHeader->stringsStart);
         if (mHeader->stringsStart >= (mHeader->header.size-sizeof(uint16_t))) {
             LOGW("Bad string block: string pool starts at %d, after total size %d\n",
@@ -305,7 +315,7 @@
         }
         if (mHeader->styleCount == 0) {
             mStringPoolSize =
-                (mHeader->header.size-mHeader->stringsStart)/sizeof(uint16_t);
+                (mHeader->header.size-mHeader->stringsStart)/charSize;
         } else {
             // check invariant: styles follow the strings
             if (mHeader->stylesStart <= mHeader->stringsStart) {
@@ -314,7 +324,7 @@
                 return (mError=BAD_TYPE);
             }
             mStringPoolSize =
-                (mHeader->stylesStart-mHeader->stringsStart)/sizeof(uint16_t);
+                (mHeader->stylesStart-mHeader->stringsStart)/charSize;
         }
 
         // check invariant: stringCount > 0 requires a string pool to exist
@@ -329,13 +339,19 @@
             for (i=0; i<mHeader->stringCount; i++) {
                 e[i] = dtohl(mEntries[i]);
             }
-            char16_t* s = const_cast<char16_t*>(mStrings);
-            for (i=0; i<mStringPoolSize; i++) {
-                s[i] = dtohs(mStrings[i]);
+            if (!(mHeader->flags&ResStringPool_header::UTF8_FLAG)) {
+                const char16_t* strings = (const char16_t*)mStrings;
+                char16_t* s = const_cast<char16_t*>(strings);
+                for (i=0; i<mStringPoolSize; i++) {
+                    s[i] = dtohs(strings[i]);
+                }
             }
         }
 
-        if (mStrings[mStringPoolSize-1] != 0) {
+        if ((mHeader->flags&ResStringPool_header::UTF8_FLAG &&
+                ((uint8_t*)mStrings)[mStringPoolSize-1] != 0) ||
+                (!mHeader->flags&ResStringPool_header::UTF8_FLAG &&
+                ((char16_t*)mStrings)[mStringPoolSize-1] != 0)) {
             LOGW("Bad string block: last string is not 0-terminated\n");
             return (mError=BAD_TYPE);
         }
@@ -410,24 +426,67 @@
         free(mOwnedData);
         mOwnedData = NULL;
     }
+    if (mHeader != NULL && mCache != NULL) {
+        for (size_t x = 0; x < mHeader->stringCount; x++) {
+            if (mCache[x] != NULL) {
+                free(mCache[x]);
+                mCache[x] = NULL;
+            }
+        }
+        free(mCache);
+        mCache = NULL;
+    }
 }
 
+#define DECODE_LENGTH(str, chrsz, len) \
+    len = *(str); \
+    if (*(str)&(1<<(chrsz*8-1))) { \
+        (str)++; \
+        len = (((len)&((1<<(chrsz*8-1))-1))<<(chrsz*8)) + *(str); \
+    } \
+    (str)++;
+
 const uint16_t* ResStringPool::stringAt(size_t idx, size_t* outLen) const
 {
     if (mError == NO_ERROR && idx < mHeader->stringCount) {
-        const uint32_t off = (mEntries[idx]/sizeof(uint16_t));
+        const bool isUTF8 = (mHeader->flags&ResStringPool_header::UTF8_FLAG) != 0;
+        const uint32_t off = mEntries[idx]/(isUTF8?sizeof(char):sizeof(char16_t));
         if (off < (mStringPoolSize-1)) {
-            const char16_t* str = mStrings+off;
-            *outLen = *str;
-            if ((*str)&0x8000) {
-                str++;
-                *outLen = (((*outLen)&0x7fff)<<16) + *str;
-            }
-            if ((uint32_t)(str+1+*outLen-mStrings) < mStringPoolSize) {
-                return str+1;
+            if (!isUTF8) {
+                const char16_t* strings = (char16_t*)mStrings;
+                const char16_t* str = strings+off;
+                DECODE_LENGTH(str, sizeof(char16_t), *outLen)
+                if ((uint32_t)(str+*outLen-strings) < mStringPoolSize) {
+                    return str;
+                } else {
+                    LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
+                            (int)idx, (int)(str+*outLen-strings), (int)mStringPoolSize);
+                }
             } else {
-                LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
-                        (int)idx, (int)(str+1+*outLen-mStrings), (int)mStringPoolSize);
+                const uint8_t* strings = (uint8_t*)mStrings;
+                const uint8_t* str = strings+off;
+                DECODE_LENGTH(str, sizeof(uint8_t), *outLen)
+                size_t encLen;
+                DECODE_LENGTH(str, sizeof(uint8_t), encLen)
+                if ((uint32_t)(str+encLen-strings) < mStringPoolSize) {
+                    AutoMutex lock(mDecodeLock);
+                    if (mCache[idx] != NULL) {
+                        return mCache[idx];
+                    }
+                    char16_t *u16str = (char16_t *)calloc(*outLen+1, sizeof(char16_t));
+                    if (!u16str) {
+                        LOGW("No memory when trying to allocate decode cache for string #%d\n",
+                                (int)idx);
+                        return NULL;
+                    }
+                    const unsigned char *u8src = reinterpret_cast<const unsigned char *>(str);
+                    utf8_to_utf16(u8src, encLen, u16str, *outLen);
+                    mCache[idx] = u16str;
+                    return u16str;
+                } else {
+                    LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
+                            (int)idx, (int)(str+encLen-strings), (int)mStringPoolSize);
+                }
             }
         } else {
             LOGW("Bad string block: string #%d entry is at %d, past end at %d\n",
@@ -466,6 +525,10 @@
 
     size_t len;
 
+    // TODO optimize searching for UTF-8 strings taking into account
+    // the cache fill to determine when to convert the searched-for
+    // string key to UTF-8.
+
     if (mHeader->flags&ResStringPool_header::SORTED_FLAG) {
         // Do a binary search for the string...
         ssize_t l = 0;
@@ -1043,6 +1106,7 @@
 void ResXMLTree::uninit()
 {
     mError = NO_INIT;
+    mStrings.uninit();
     if (mOwnedData) {
         free(mOwnedData);
         mOwnedData = NULL;
diff --git a/libs/utils/String16.cpp b/libs/utils/String16.cpp
index aef67f2..eab7b2b 100644
--- a/libs/utils/String16.cpp
+++ b/libs/utils/String16.cpp
@@ -172,10 +172,6 @@
            : 0);
 }
 
-// ---------------------------------------------------------------------------
-
-namespace android {
-
 static inline size_t
 utf8_char_len(uint8_t ch)
 {
@@ -215,8 +211,38 @@
     //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
 }
 
+void
+utf8_to_utf16(const uint8_t *src, size_t srcLen,
+        char16_t* dst, const size_t dstLen)
+{
+    const uint8_t* const end = src + srcLen;
+    const char16_t* const dstEnd = dst + dstLen;
+    while (src < end && dst < dstEnd) {
+        size_t len = utf8_char_len(*src);
+        uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);
+
+        // Convert the UTF32 codepoint to one or more UTF16 codepoints
+        if (codepoint <= 0xFFFF) {
+            // Single UTF16 character
+            *dst++ = (char16_t) codepoint;
+        } else {
+            // Multiple UTF16 characters with surrogates
+            codepoint = codepoint - 0x10000;
+            *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
+            *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
+        }
+
+        src += len;
+    }
+    if (dst < dstEnd) {
+        *dst = 0;
+    }
+}
+
 // ---------------------------------------------------------------------------
 
+namespace android {
+
 static SharedBuffer* gEmptyStringBuf = NULL;
 static char16_t* gEmptyString = NULL;
 
@@ -260,30 +286,14 @@
         p += utf8len;
     }
     
-    SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t));
+    size_t bufSize = (chars+1)*sizeof(char16_t);
+    SharedBuffer* buf = SharedBuffer::alloc(bufSize);
     if (buf) {
         p = in;
         char16_t* str = (char16_t*)buf->data();
-        char16_t* d = str;
-        while (p < end) {
-            size_t len = utf8_char_len(*p);
-            uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, len);
-
-            // Convert the UTF32 codepoint to one or more UTF16 codepoints
-            if (codepoint <= 0xFFFF) {
-                // Single UTF16 character
-                *d++ = (char16_t) codepoint;
-            } else {
-                // Multiple UTF16 characters with surrogates
-                codepoint = codepoint - 0x10000;
-                *d++ = (char16_t) ((codepoint >> 10) + 0xD800);
-                *d++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
-            }
-
-            p += len;
-        }
-        *d = 0;
         
+        utf8_to_utf16((const uint8_t*)p, len, str, bufSize);
+
         //printf("Created UTF-16 string from UTF-8 \"%s\":", in);
         //printHexData(1, str, buf->size(), 16, 1);
         //printf("\n");
diff --git a/libs/utils/String8.cpp b/libs/utils/String8.cpp
index e908ec1..3a34838 100644
--- a/libs/utils/String8.cpp
+++ b/libs/utils/String8.cpp
@@ -208,10 +208,23 @@
     return getEmptyString();
 }
 
-// Note: not dealing with expanding surrogate pairs.
 static char* allocFromUTF16(const char16_t* in, size_t len)
 {
-    return allocFromUTF16OrUTF32<char16_t, size_t>(in, len);
+    if (len == 0) return getEmptyString();
+
+    const size_t bytes = utf8_length_from_utf16(in, len);
+
+    SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
+    LOG_ASSERT(buf, "Unable to allocate shared buffer");
+    if (buf) {
+        char* str = (char*)buf->data();
+
+        utf16_to_utf8(in, len, str, bytes+1);
+
+        return str;
+    }
+
+    return getEmptyString();
 }
 
 static char* allocFromUTF32(const char32_t* in, size_t len)
@@ -762,6 +775,26 @@
     return ret;
 }
 
+size_t utf8_length_from_utf16(const char16_t *src, size_t src_len)
+{
+    if (src == NULL || src_len == 0) {
+        return 0;
+    }
+    size_t ret = 0;
+    const char16_t* const end = src + src_len;
+    while (src < end) {
+        if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
+                && (*++src & 0xFC00) == 0xDC00) {
+            // surrogate pairs are always 4 bytes.
+            ret += 4;
+            src++;
+        } else {
+            ret += android::utf32_to_utf8_bytes((char32_t) *src++);
+        }
+    }
+    return ret;
+}
+
 static int32_t utf32_at_internal(const char* cur, size_t *num_read)
 {
     const char first_char = *cur;
@@ -848,3 +881,33 @@
     }
     return cur - dst;
 }
+
+size_t utf16_to_utf8(const char16_t* src, size_t src_len,
+                     char* dst, size_t dst_len)
+{
+    if (src == NULL || src_len == 0 || dst == NULL || dst_len == 0) {
+        return 0;
+    }
+    const char16_t* cur_utf16 = src;
+    const char16_t* const end_utf16 = src + src_len;
+    char *cur = dst;
+    const char* const end = dst + dst_len;
+    while (cur_utf16 < end_utf16 && cur < end) {
+        char32_t utf32;
+        // surrogate pairs
+        if ((*cur_utf16 & 0xFC00) == 0xD800 && (cur_utf16 + 1) < end_utf16) {
+            utf32 = (*cur_utf16++ - 0xD800) << 10;
+            utf32 |= *cur_utf16++ - 0xDC00;
+            utf32 += 0x10000;
+        } else {
+            utf32 = (char32_t) *cur_utf16++;
+        }
+        size_t len = android::utf32_to_utf8_bytes(utf32);
+        android::utf32_to_utf8((uint8_t*)cur, utf32, len);
+        cur += len;
+    }
+    if (cur < end) {
+        *cur = '\0';
+    }
+    return cur - dst;
+}
diff --git a/tools/aapt/Bundle.h b/tools/aapt/Bundle.h
index 1ac13f2..cf70121 100644
--- a/tools/aapt/Bundle.h
+++ b/tools/aapt/Bundle.h
@@ -37,7 +37,7 @@
           mForce(false), mGrayscaleTolerance(0), mMakePackageDirs(false),
           mUpdate(false), mExtending(false),
           mRequireLocalization(false), mPseudolocalize(false),
-          mValues(false),
+          mUTF8(false), mValues(false),
           mCompressionMethod(0), mOutputAPKFile(NULL),
           mAssetSourceDir(NULL), mProguardFile(NULL),
           mAndroidManifestFile(NULL), mPublicOutputFile(NULL),
@@ -76,6 +76,8 @@
     void setRequireLocalization(bool val) { mRequireLocalization = val; }
     bool getPseudolocalize(void) const { return mPseudolocalize; }
     void setPseudolocalize(bool val) { mPseudolocalize = val; }
+    bool getUTF8(void) const { return mUTF8; }
+    void setUTF8(bool val) { mUTF8 = val; }
     bool getValues(void) const { return mValues; }
     void setValues(bool val) { mValues = val; }
     int getCompressionMethod(void) const { return mCompressionMethod; }
@@ -161,6 +163,7 @@
     bool        mExtending;
     bool        mRequireLocalization;
     bool        mPseudolocalize;
+    bool        mUTF8;
     bool        mValues;
     int         mCompressionMethod;
     bool        mJunkPath;
diff --git a/tools/aapt/Command.cpp b/tools/aapt/Command.cpp
index 4067735..990a7ea 100644
--- a/tools/aapt/Command.cpp
+++ b/tools/aapt/Command.cpp
@@ -412,6 +412,7 @@
             }
             tree.restart();
             printXMLBlock(&tree);
+            tree.uninit();
             delete asset;
             asset = NULL;
         }
diff --git a/tools/aapt/Main.cpp b/tools/aapt/Main.cpp
index 98286c0..bd03b74 100644
--- a/tools/aapt/Main.cpp
+++ b/tools/aapt/Main.cpp
@@ -118,6 +118,7 @@
         "   -P  specify where to output public resource definitions\n"
         "   -S  directory in which to find resources.  Multiple directories will be scanned"
         "       and the first match found (left to right) will take precedence."
+        "   -8  Encode string resources in UTF-8.\n"
         "   -0  specifies an additional extension for which such files will not\n"
         "       be stored compressed in the .apk.  An empty string means to not\n"
         "       compress any files at all.\n"
@@ -370,6 +371,9 @@
                     bundle.setCompressionMethod(ZipEntry::kCompressStored);
                 }
                 break;
+            case '8':
+                bundle.setUTF8(true);
+                break;
             case '-':
                 if (strcmp(cp, "-min-sdk-version") == 0) {
                     argc--;
diff --git a/tools/aapt/Resource.cpp b/tools/aapt/Resource.cpp
index fdcada4..d04a873 100644
--- a/tools/aapt/Resource.cpp
+++ b/tools/aapt/Resource.cpp
@@ -613,6 +613,12 @@
 
     NOISY(printf("Found %d included resource packages\n", (int)table.size()));
 
+    // Standard flags for compiled XML and optional UTF-8 encoding
+    int xmlFlags = XML_COMPILE_STANDARD_RESOURCE;
+    if (bundle->getUTF8()) {
+        xmlFlags |= XML_COMPILE_UTF8;
+    }
+
     // --------------------------------------------------------------
     // First, gather all resource information.
     // --------------------------------------------------------------
@@ -763,7 +769,7 @@
         ResourceDirIterator it(layouts, String8("layout"));
         while ((err=it.next()) == NO_ERROR) {
             String8 src = it.getFile()->getPrintableSource();
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err == NO_ERROR) {
                 ResXMLTree block;
                 block.setTo(it.getFile()->getData(), it.getFile()->getSize(), true);
@@ -782,7 +788,7 @@
     if (anims != NULL) {
         ResourceDirIterator it(anims, String8("anim"));
         while ((err=it.next()) == NO_ERROR) {
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
@@ -797,7 +803,7 @@
     if (xmls != NULL) {
         ResourceDirIterator it(xmls, String8("xml"));
         while ((err=it.next()) == NO_ERROR) {
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
@@ -819,7 +825,7 @@
     if (colors != NULL) {
         ResourceDirIterator it(colors, String8("color"));
         while ((err=it.next()) == NO_ERROR) {
-          err = compileXmlFile(assets, it.getFile(), &table);
+          err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
@@ -835,7 +841,7 @@
         ResourceDirIterator it(menus, String8("menu"));
         while ((err=it.next()) == NO_ERROR) {
             String8 src = it.getFile()->getPrintableSource();
-            err = compileXmlFile(assets, it.getFile(), &table);
+            err = compileXmlFile(assets, it.getFile(), &table, xmlFlags);
             if (err != NO_ERROR) {
                 hasErrors = true;
             }
diff --git a/tools/aapt/ResourceTable.cpp b/tools/aapt/ResourceTable.cpp
index 19b9b01..a9cbd11 100644
--- a/tools/aapt/ResourceTable.cpp
+++ b/tools/aapt/ResourceTable.cpp
@@ -39,6 +39,10 @@
         root->removeWhitespace(false, NULL);
     }
 
+    if ((options&XML_COMPILE_UTF8) != 0) {
+        root->setUTF8(true);
+    }
+
     bool hasErrors = false;
     
     if ((options&XML_COMPILE_ASSIGN_ATTRIBUTE_IDS) != 0) {
@@ -2505,7 +2509,7 @@
 
     // Iterate through all data, collecting all values (strings,
     // references, etc).
-    StringPool valueStrings;
+    StringPool valueStrings = StringPool(false, bundle->getUTF8());
     for (pi=0; pi<N; pi++) {
         sp<Package> p = mOrderedPackages.itemAt(pi);
         if (p->getTypes().size() == 0) {
@@ -2513,8 +2517,8 @@
             continue;
         }
 
-        StringPool typeStrings;
-        StringPool keyStrings;
+        StringPool typeStrings = StringPool(false, bundle->getUTF8());
+        StringPool keyStrings = StringPool(false, bundle->getUTF8());
 
         const size_t N = p->getOrderedTypes().size();
         for (size_t ti=0; ti<N; ti++) {
diff --git a/tools/aapt/ResourceTable.h b/tools/aapt/ResourceTable.h
index caa01b3..cfa75a71 100644
--- a/tools/aapt/ResourceTable.h
+++ b/tools/aapt/ResourceTable.h
@@ -24,6 +24,7 @@
     XML_COMPILE_COMPACT_WHITESPACE = 1<<2,
     XML_COMPILE_STRIP_WHITESPACE = 1<<3,
     XML_COMPILE_STRIP_RAW_VALUES = 1<<4,
+    XML_COMPILE_UTF8 = 1<<5,
     
     XML_COMPILE_STANDARD_RESOURCE =
             XML_COMPILE_STRIP_COMMENTS | XML_COMPILE_ASSIGN_ATTRIBUTE_IDS
diff --git a/tools/aapt/StringPool.cpp b/tools/aapt/StringPool.cpp
index 715170a..ec58591 100644
--- a/tools/aapt/StringPool.cpp
+++ b/tools/aapt/StringPool.cpp
@@ -30,8 +30,8 @@
     }
 }
 
-StringPool::StringPool(bool sorted)
-    : mSorted(sorted), mValues(-1), mIdents(-1)
+StringPool::StringPool(bool sorted, bool utf8)
+    : mSorted(sorted), mUTF8(utf8), mValues(-1), mIdents(-1)
 {
 }
 
@@ -165,6 +165,16 @@
     return err == NO_ERROR ? pool : NULL;
 }
 
+#define ENCODE_LENGTH(str, chrsz, strSize) \
+{ \
+    size_t maxMask = 1 << ((chrsz*8)-1); \
+    size_t maxSize = maxMask-1; \
+    if (strSize > maxSize) { \
+        *str++ = maxMask | ((strSize>>(chrsz*8))&maxSize); \
+    } \
+    *str++ = strSize; \
+}
+
 status_t StringPool::writeStringBlock(const sp<AaptFile>& pool)
 {
     // Allow appending.  Sorry this is a little wacky.
@@ -213,28 +223,53 @@
         return NO_MEMORY;
     }
 
+    const size_t charSize = mUTF8 ? sizeof(uint8_t) : sizeof(char16_t);
+
     size_t strPos = 0;
     for (i=0; i<STRINGS; i++) {
         entry& ent = mEntries.editItemAt(i);
         const size_t strSize = (ent.value.size());
-        const size_t lenSize = strSize > 0x7fff ? sizeof(uint32_t) : sizeof(uint16_t);
-        const size_t totalSize = lenSize + ((strSize+1)*sizeof(uint16_t));
+        const size_t lenSize = strSize > (size_t)(1<<((charSize*8)-1))-1 ?
+            charSize*2 : charSize;
+
+        String8 encStr;
+        if (mUTF8) {
+            encStr = String8(ent.value);
+        }
+
+        const size_t encSize = mUTF8 ? encStr.size() : 0;
+        const size_t encLenSize = mUTF8 ?
+            (encSize > (size_t)(1<<((charSize*8)-1))-1 ?
+                charSize*2 : charSize) : 0;
 
         ent.offset = strPos;
-        uint16_t* dat = (uint16_t*)pool->editData(preSize + strPos + totalSize);
+
+        const size_t totalSize = lenSize + encLenSize +
+            ((mUTF8 ? encSize : strSize)+1)*charSize;
+
+        void* dat = (void*)pool->editData(preSize + strPos + totalSize);
         if (dat == NULL) {
             fprintf(stderr, "ERROR: Out of memory for string pool\n");
             return NO_MEMORY;
         }
-        dat += (preSize+strPos)/sizeof(uint16_t);
-        if (lenSize > sizeof(uint16_t)) {
-            *dat = htods(0x8000 | ((strSize>>16)&0x7fff));
-            dat++;
-        }
-        *dat++ = htods(strSize);
-        strcpy16_htod(dat, ent.value);
+        dat = (uint8_t*)dat + preSize + strPos;
+        if (mUTF8) {
+            uint8_t* strings = (uint8_t*)dat;
 
-        strPos += lenSize + (strSize+1)*sizeof(uint16_t);
+            ENCODE_LENGTH(strings, sizeof(uint8_t), strSize)
+
+            ENCODE_LENGTH(strings, sizeof(uint8_t), encSize)
+
+            strncpy((char*)strings, encStr, encSize+1);
+        } else {
+            uint16_t* strings = (uint16_t*)dat;
+
+            ENCODE_LENGTH(strings, sizeof(uint16_t), strSize)
+
+            strcpy16_htod(strings, ent.value);
+        }
+
+        strPos += totalSize;
     }
 
     // Pad ending string position up to a uint32_t boundary.
@@ -312,6 +347,9 @@
     if (mSorted) {
         header->flags |= htodl(ResStringPool_header::SORTED_FLAG);
     }
+    if (mUTF8) {
+        header->flags |= htodl(ResStringPool_header::UTF8_FLAG);
+    }
     header->stringsStart = htodl(preSize);
     header->stylesStart = htodl(STYLES > 0 ? (preSize+strPos) : 0);
 
diff --git a/tools/aapt/StringPool.h b/tools/aapt/StringPool.h
index 9082b37..7275259 100644
--- a/tools/aapt/StringPool.h
+++ b/tools/aapt/StringPool.h
@@ -68,8 +68,11 @@
      * lookup with ResStringPool::indexOfString() (O(log n)), at the expense
      * of support for styled string entries (which requires the same string
      * be included multiple times in the pool).
+     *
+     * If 'utf8' is true, strings will be encoded with UTF-8 instead of
+     * left in Java's native UTF-16.
      */
-    explicit StringPool(bool sorted = false);
+    explicit StringPool(bool sorted = false, bool utf8 = false);
 
     /**
      * Add a new string to the pool.  If mergeDuplicates is true, thenif
@@ -123,6 +126,7 @@
 
 private:
     const bool                              mSorted;
+    const bool                              mUTF8;
     // Raw array of unique strings, in some arbitrary order.
     Vector<entry>                           mEntries;
     // Array of indices into mEntries, in the order they were
diff --git a/tools/aapt/XMLNode.cpp b/tools/aapt/XMLNode.cpp
index d4d2a45c..036dde4 100644
--- a/tools/aapt/XMLNode.cpp
+++ b/tools/aapt/XMLNode.cpp
@@ -478,6 +478,7 @@
     , mFilename(filename)
     , mStartLineNumber(0)
     , mEndLineNumber(0)
+    , mUTF8(false)
 {
     if (isNamespace) {
         mNamespacePrefix = s1;
@@ -837,7 +838,7 @@
 status_t XMLNode::flatten(const sp<AaptFile>& dest,
         bool stripComments, bool stripRawValues) const
 {
-    StringPool strings;
+    StringPool strings = StringPool(false, mUTF8);
     Vector<uint32_t> resids;
     
     // First collect just the strings for attribute names that have a
diff --git a/tools/aapt/XMLNode.h b/tools/aapt/XMLNode.h
index a9bea43..dc92fa7 100644
--- a/tools/aapt/XMLNode.h
+++ b/tools/aapt/XMLNode.h
@@ -124,6 +124,8 @@
 
     void removeWhitespace(bool stripAll=true, const char** cDataTags=NULL);
 
+    void setUTF8(bool val) { mUTF8 = val; }
+
     status_t parseValues(const sp<AaptAssets>& assets, ResourceTable* table);
 
     status_t assignResourceIds(const sp<AaptAssets>& assets,
@@ -189,6 +191,9 @@
     String8 mFilename;
     int32_t mStartLineNumber;
     int32_t mEndLineNumber;
+
+    // Encode compiled XML with UTF-8 StringPools?
+    bool mUTF8;
 };
 
 #endif