Pseudolocalizer improvements.

Fixes accented pseudolocalization and adds RTL pseudolocale.

This change contains following modifications in the pseudolocalization logic:
1) zz_ZZ pseudolocale was removed;
2) en_XA pseudolocale was added for pseudo-accented;
3) ar_XB pseudolocale was added for pseudo-rtl;
4) Pseudo RTL localization functionality was implemented;
5) Text expansion functionality was implemented;
6) Text bracketing was implemented;
7) Couple of issues of previous implementation were fixed.

Change-Id: I9f7f27bed717e39e82717d15c398decffc8bec3c
Signed-off-by: Anton Krumin <antkrumin@google.com>
diff --git a/tools/aapt/Bundle.h b/tools/aapt/Bundle.h
index 5089b9d..a6f2442 100644
--- a/tools/aapt/Bundle.h
+++ b/tools/aapt/Bundle.h
@@ -42,6 +42,15 @@
 } Command;
 
 /*
+ * Pseudolocalization methods
+ */
+typedef enum PseudolocalizationMethod {
+    NO_PSEUDOLOCALIZATION = 0,
+    PSEUDO_ACCENTED,
+    PSEUDO_BIDI,
+} PseudolocalizationMethod;
+
+/*
  * Bundle of goodies, including everything specified on the command line.
  */
 class Bundle {
@@ -50,12 +59,12 @@
         : mCmd(kCommandUnknown), mVerbose(false), mAndroidList(false),
           mForce(false), mGrayscaleTolerance(0), mMakePackageDirs(false),
           mUpdate(false), mExtending(false),
-          mRequireLocalization(false), mPseudolocalize(false),
+          mRequireLocalization(false), mPseudolocalize(NO_PSEUDOLOCALIZATION),
           mWantUTF16(false), mValues(false),
           mCompressionMethod(0), mJunkPath(false), mOutputAPKFile(NULL),
           mManifestPackageNameOverride(NULL), mInstrumentationPackageNameOverride(NULL),
           mAutoAddOverlay(false), mGenDependencies(false),
-          mAssetSourceDir(NULL), 
+          mAssetSourceDir(NULL),
           mCrunchedOutputDir(NULL), mProguardFile(NULL),
           mAndroidManifestFile(NULL), mPublicOutputFile(NULL),
           mRClassDir(NULL), mResourceIntermediatesDir(NULL), mManifestMinSdkVersion(NULL),
@@ -94,8 +103,8 @@
     void setExtending(bool val) { mExtending = val; }
     bool getRequireLocalization(void) const { return mRequireLocalization; }
     void setRequireLocalization(bool val) { mRequireLocalization = val; }
-    bool getPseudolocalize(void) const { return mPseudolocalize; }
-    void setPseudolocalize(bool val) { mPseudolocalize = val; }
+    short getPseudolocalize(void) const { return mPseudolocalize; }
+    void setPseudolocalize(short val) { mPseudolocalize = val; }
     void setWantUTF16(bool val) { mWantUTF16 = val; }
     bool getValues(void) const { return mValues; }
     void setValues(bool val) { mValues = val; }
@@ -250,7 +259,7 @@
     bool        mUpdate;
     bool        mExtending;
     bool        mRequireLocalization;
-    bool        mPseudolocalize;
+    short       mPseudolocalize;
     bool        mWantUTF16;
     bool        mValues;
     int         mCompressionMethod;
diff --git a/tools/aapt/Command.cpp b/tools/aapt/Command.cpp
index c7cce96..f7de558 100644
--- a/tools/aapt/Command.cpp
+++ b/tools/aapt/Command.cpp
@@ -1885,14 +1885,17 @@
     FILE* fp;
     String8 dependencyFile;
 
-    // -c zz_ZZ means do pseudolocalization
+    // -c en_XA or/and ar_XB means do pseudolocalization
     ResourceFilter filter;
     err = filter.parse(bundle->getConfigurations());
     if (err != NO_ERROR) {
         goto bail;
     }
     if (filter.containsPseudo()) {
-        bundle->setPseudolocalize(true);
+        bundle->setPseudolocalize(bundle->getPseudolocalize() | PSEUDO_ACCENTED);
+    }
+    if (filter.containsPseudoBidi()) {
+        bundle->setPseudolocalize(bundle->getPseudolocalize() | PSEUDO_BIDI);
     }
 
     N = bundle->getFileSpecCount();
diff --git a/tools/aapt/ResourceFilter.cpp b/tools/aapt/ResourceFilter.cpp
index e8a2be4..8ca852e 100644
--- a/tools/aapt/ResourceFilter.cpp
+++ b/tools/aapt/ResourceFilter.cpp
@@ -24,8 +24,10 @@
 
         String8 part(p, q-p);
 
-        if (part == "zz_ZZ") {
-            mContainsPseudo = true;
+        if (part == "en_XA") {
+            mContainsPseudoAccented = true;
+        } else if (part == "ar_XB") {
+            mContainsPseudoBidi = true;
         }
         int axis;
         AxisValue value;
diff --git a/tools/aapt/ResourceFilter.h b/tools/aapt/ResourceFilter.h
index 0d127ba..c57770e 100644
--- a/tools/aapt/ResourceFilter.h
+++ b/tools/aapt/ResourceFilter.h
@@ -16,19 +16,22 @@
 class ResourceFilter
 {
 public:
-    ResourceFilter() : mData(), mContainsPseudo(false) {}
+    ResourceFilter() : mData(), mContainsPseudoAccented(false),
+        mContainsPseudoBidi(false) {}
     status_t parse(const char* arg);
     bool isEmpty() const;
     bool match(int axis, const ResTable_config& config) const;
     bool match(const ResTable_config& config) const;
     const SortedVector<AxisValue>* configsForAxis(int axis) const;
-    inline bool containsPseudo() const { return mContainsPseudo; }
+    inline bool containsPseudo() const { return mContainsPseudoAccented; }
+    inline bool containsPseudoBidi() const { return mContainsPseudoBidi; }
 
 private:
     bool match(int axis, const AxisValue& value) const;
 
     KeyedVector<int,SortedVector<AxisValue> > mData;
-    bool mContainsPseudo;
+    bool mContainsPseudoAccented;
+    bool mContainsPseudoBidi;
 };
 
 #endif
diff --git a/tools/aapt/ResourceTable.cpp b/tools/aapt/ResourceTable.cpp
index 0b1f985..26b5bd6 100644
--- a/tools/aapt/ResourceTable.cpp
+++ b/tools/aapt/ResourceTable.cpp
@@ -25,7 +25,7 @@
     if (root == NULL) {
         return UNKNOWN_ERROR;
     }
-    
+
     return compileXmlFile(assets, root, target, table, options);
 }
 
@@ -577,13 +577,13 @@
                         int32_t curFormat,
                         bool isFormatted,
                         const String16& product,
-                        bool pseudolocalize,
+                        PseudolocalizationMethod pseudolocalize,
                         const bool overwrite,
                         ResourceTable* outTable)
 {
     status_t err;
     const String16 item16("item");
-    
+
     String16 str;
     Vector<StringPool::entry_style_span> spans;
     err = parseStyledString(bundle, in->getPrintableSource().string(),
@@ -672,7 +672,7 @@
                         int32_t curFormat,
                         bool isFormatted,
                         const String16& product,
-                        bool pseudolocalize,
+                        PseudolocalizationMethod pseudolocalize,
                         const bool overwrite,
                         KeyedVector<type_ident_pair_t, bool>* skippedResourceNames,
                         ResourceTable* outTable)
@@ -854,10 +854,16 @@
     ResTable_config curParams(defParams);
 
     ResTable_config pseudoParams(curParams);
-        pseudoParams.language[0] = 'z';
-        pseudoParams.language[1] = 'z';
-        pseudoParams.country[0] = 'Z';
-        pseudoParams.country[1] = 'Z';
+        pseudoParams.language[0] = 'e';
+        pseudoParams.language[1] = 'n';
+        pseudoParams.country[0] = 'X';
+        pseudoParams.country[1] = 'A';
+
+    ResTable_config pseudoBidiParams(curParams);
+        pseudoBidiParams.language[0] = 'a';
+        pseudoBidiParams.language[1] = 'r';
+        pseudoBidiParams.country[0] = 'X';
+        pseudoBidiParams.country[1] = 'B';
 
     while ((code=block.next()) != ResXMLTree::END_DOCUMENT && code != ResXMLTree::BAD_DOCUMENT) {
         if (code == ResXMLTree::START_TAG) {
@@ -1334,6 +1340,7 @@
                                 name,
                                 locale,
                                 SourcePos(in->getPrintableSource(), block.getLineNumber()));
+                        curIsPseudolocalizable = fileIsTranslatable;
                     }
 
                     if (formatted == false16) {
@@ -1389,6 +1396,7 @@
                 curTag = &plurals16;
                 curType = plurals16;
                 curIsBag = true;
+                curIsPseudolocalizable = fileIsTranslatable;
             } else if (strcmp16(block.getElementName(&len), array16.string()) == 0) {
                 curTag = &array16;
                 curType = array16;
@@ -1410,26 +1418,24 @@
             } else if (strcmp16(block.getElementName(&len), string_array16.string()) == 0) {
                 // Check whether these strings need valid formats.
                 // (simplified form of what string16 does above)
+                bool isTranslatable = false;
                 size_t n = block.getAttributeCount();
 
                 // Pseudolocalizable by default, unless this string array isn't
                 // translatable.
-                curIsPseudolocalizable = true;
                 for (size_t i = 0; i < n; i++) {
                     size_t length;
                     const uint16_t* attr = block.getAttributeName(i, &length);
-                    if (strcmp16(attr, translatable16.string()) == 0) {
-                        const uint16_t* value = block.getAttributeStringValue(i, &length);
-                        if (strcmp16(value, false16.string()) == 0) {
-                            curIsPseudolocalizable = false;
-                        }
-                    }
-
                     if (strcmp16(attr, formatted16.string()) == 0) {
                         const uint16_t* value = block.getAttributeStringValue(i, &length);
                         if (strcmp16(value, false16.string()) == 0) {
                             curIsFormatted = false;
                         }
+                    } else if (strcmp16(attr, translatable16.string()) == 0) {
+                        const uint16_t* value = block.getAttributeStringValue(i, &length);
+                        if (strcmp16(value, false16.string()) == 0) {
+                            isTranslatable = false;
+                        }
                     }
                 }
 
@@ -1438,6 +1444,7 @@
                 curFormat = ResTable_map::TYPE_REFERENCE|ResTable_map::TYPE_STRING;
                 curIsBag = true;
                 curIsBagReplaceOnOverwrite = true;
+                curIsPseudolocalizable = isTranslatable && fileIsTranslatable;
             } else if (strcmp16(block.getElementName(&len), integer_array16.string()) == 0) {
                 curTag = &integer_array16;
                 curType = array16;
@@ -1559,19 +1566,29 @@
 
                         err = parseAndAddBag(bundle, in, &block, curParams, myPackage, curType,
                                 ident, parentIdent, itemIdent, curFormat, curIsFormatted,
-                                product, false, overwrite, outTable);
+                                product, NO_PSEUDOLOCALIZATION, overwrite, outTable);
                         if (err == NO_ERROR) {
                             if (curIsPseudolocalizable && localeIsDefined(curParams)
-                                    && bundle->getPseudolocalize()) {
+                                    && bundle->getPseudolocalize() > 0) {
                                 // pseudolocalize here
-#if 1
-                                block.setPosition(parserPosition);
-                                err = parseAndAddBag(bundle, in, &block, pseudoParams, myPackage,
-                                        curType, ident, parentIdent, itemIdent, curFormat,
-                                        curIsFormatted, product, true, overwrite, outTable);
-#endif
+                                if ((PSEUDO_ACCENTED & bundle->getPseudolocalize()) ==
+                                   PSEUDO_ACCENTED) {
+                                    block.setPosition(parserPosition);
+                                    err = parseAndAddBag(bundle, in, &block, pseudoParams, myPackage,
+                                            curType, ident, parentIdent, itemIdent, curFormat,
+                                            curIsFormatted, product, PSEUDO_ACCENTED,
+                                            overwrite, outTable);
+                                }
+                                if ((PSEUDO_BIDI & bundle->getPseudolocalize()) ==
+                                   PSEUDO_BIDI) {
+                                    block.setPosition(parserPosition);
+                                    err = parseAndAddBag(bundle, in, &block, pseudoBidiParams, myPackage,
+                                            curType, ident, parentIdent, itemIdent, curFormat,
+                                            curIsFormatted, product, PSEUDO_BIDI,
+                                            overwrite, outTable);
+                                }
                             }
-                        } 
+                        }
                         if (err != NO_ERROR) {
                             hasErrors = localHasErrors = true;
                         }
@@ -1592,20 +1609,31 @@
 
                 err = parseAndAddEntry(bundle, in, &block, curParams, myPackage, curType, ident,
                         *curTag, curIsStyled, curFormat, curIsFormatted,
-                        product, false, overwrite, &skippedResourceNames, outTable);
+                        product, NO_PSEUDOLOCALIZATION, overwrite, &skippedResourceNames, outTable);
 
                 if (err < NO_ERROR) { // Why err < NO_ERROR instead of err != NO_ERROR?
                     hasErrors = localHasErrors = true;
                 }
                 else if (err == NO_ERROR) {
                     if (curIsPseudolocalizable && localeIsDefined(curParams)
-                            && bundle->getPseudolocalize()) {
+                            && bundle->getPseudolocalize() > 0) {
                         // pseudolocalize here
-                        block.setPosition(parserPosition);
-                        err = parseAndAddEntry(bundle, in, &block, pseudoParams, myPackage, curType,
-                                ident, *curTag, curIsStyled, curFormat,
-                                curIsFormatted, product,
-                                true, overwrite, &skippedResourceNames, outTable);
+                        if ((PSEUDO_ACCENTED & bundle->getPseudolocalize()) ==
+                           PSEUDO_ACCENTED) {
+                            block.setPosition(parserPosition);
+                            err = parseAndAddEntry(bundle, in, &block, pseudoParams, myPackage, curType,
+                                    ident, *curTag, curIsStyled, curFormat,
+                                    curIsFormatted, product,
+                                    PSEUDO_ACCENTED, overwrite, &skippedResourceNames, outTable);
+                        }
+                        if ((PSEUDO_BIDI & bundle->getPseudolocalize()) ==
+                           PSEUDO_BIDI) {
+                            block.setPosition(parserPosition);
+                            err = parseAndAddEntry(bundle, in, &block, pseudoBidiParams,
+                                    myPackage, curType, ident, *curTag, curIsStyled, curFormat,
+                                    curIsFormatted, product,
+                                    PSEUDO_BIDI, overwrite, &skippedResourceNames, outTable);
+                        }
                         if (err != NO_ERROR) {
                             hasErrors = localHasErrors = true;
                         }
@@ -2636,8 +2664,8 @@
                     continue;
                 }
 
-                // don't bother with the pseudolocale "zz_ZZ"
-                if (config != "zz_ZZ") {
+                // don't bother with the pseudolocale "en_XA" or "ar_XB"
+                if (config != "en_XA" && config != "ar_XB") {
                     if (configSrcMap.find(config) == configSrcMap.end()) {
                         // okay, no specific localization found.  it's possible that we are
                         // requiring a specific regional localization [e.g. de_DE] but there is an
diff --git a/tools/aapt/XMLNode.cpp b/tools/aapt/XMLNode.cpp
index a663ad5..607d419 100644
--- a/tools/aapt/XMLNode.cpp
+++ b/tools/aapt/XMLNode.cpp
@@ -187,7 +187,7 @@
                            String16* outString,
                            Vector<StringPool::entry_style_span>* outSpans,
                            bool isFormatted,
-                           bool pseudolocalize)
+                           PseudolocalizationMethod pseudolocalize)
 {
     Vector<StringPool::entry_style_span> spanStack;
     String16 curString;
@@ -198,21 +198,30 @@
 
     size_t len;
     ResXMLTree::event_code_t code;
+    // Bracketing if pseudolocalization accented method specified.
+    if (pseudolocalize == PSEUDO_ACCENTED) {
+        curString.append(String16(String8("[")));
+    }
     while ((code=inXml->next()) != ResXMLTree::END_DOCUMENT && code != ResXMLTree::BAD_DOCUMENT) {
-
         if (code == ResXMLTree::TEXT) {
             String16 text(inXml->getText(&len));
             if (firstTime && text.size() > 0) {
                 firstTime = false;
                 if (text.string()[0] == '@') {
                     // If this is a resource reference, don't do the pseudoloc.
-                    pseudolocalize = false;
+                    pseudolocalize = NO_PSEUDOLOCALIZATION;
                 }
             }
-            if (xliffDepth == 0 && pseudolocalize) {
-                std::string orig(String8(text).string());
-                std::string pseudo = pseudolocalize_string(orig);
-                curString.append(String16(String8(pseudo.c_str())));
+            if (xliffDepth == 0 && pseudolocalize > 0) {
+                String16 pseudo;
+                if (pseudolocalize == PSEUDO_ACCENTED) {
+                    pseudo = pseudolocalize_string(text);
+                } else if (pseudolocalize == PSEUDO_BIDI) {
+                    pseudo = pseudobidi_string(text);
+                } else {
+                    pseudo = text;
+                }
+                curString.append(pseudo);
             } else {
                 if (isFormatted && hasSubstitutionErrors(fileName, inXml, text) != NO_ERROR) {
                     return UNKNOWN_ERROR;
@@ -352,6 +361,25 @@
         }
     }
 
+    // Bracketing if pseudolocalization accented method specified.
+    if (pseudolocalize == PSEUDO_ACCENTED) {
+        const char16_t* str = outString->string();
+        const char16_t* p = str;
+        const char16_t* e = p + outString->size();
+        int words_cnt = 0;
+        while (p < e) {
+            if (isspace(*p)) {
+                words_cnt++;
+            }
+            p++;
+        }
+        unsigned int length = words_cnt > 3 ? outString->size() :
+            outString->size() / 2;
+        curString.append(String16(String8(" ")));
+        curString.append(pseudo_generate_expansion(length));
+        curString.append(String16(String8("]")));
+    }
+
     if (code == ResXMLTree::BAD_DOCUMENT) {
             SourcePos(String8(fileName), inXml->getLineNumber()).error(
                     "Error parsing XML\n");
diff --git a/tools/aapt/XMLNode.h b/tools/aapt/XMLNode.h
index 05624b7..ccbf9f4 100644
--- a/tools/aapt/XMLNode.h
+++ b/tools/aapt/XMLNode.h
@@ -26,7 +26,7 @@
                            String16* outString,
                            Vector<StringPool::entry_style_span>* outSpans,
                            bool isFormatted,
-                           bool isPseudolocalizable);
+                           PseudolocalizationMethod isPseudolocalizable);
 
 void printXMLBlock(ResXMLTree* block);
 
diff --git a/tools/aapt/pseudolocalize.cpp b/tools/aapt/pseudolocalize.cpp
index 9e50c5a..c02327a 100644
--- a/tools/aapt/pseudolocalize.cpp
+++ b/tools/aapt/pseudolocalize.cpp
@@ -2,89 +2,155 @@
 
 using namespace std;
 
+// String basis to generate expansion
+static const String16 k_expansion_string = String16("one two three "
+    "four five six seven eight nine ten eleven twelve thirteen "
+    "fourteen fiveteen sixteen seventeen nineteen twenty");
+
+// Special unicode characters to override directionality of the words
+static const String16 k_rlm = String16("\xe2\x80\x8f");
+static const String16 k_rlo = String16("\xE2\x80\xae");
+static const String16 k_pdf = String16("\xE2\x80\xac");
+
+// Placeholder marks
+static const String16 k_placeholder_open = String16("\xc2\xbb");
+static const String16 k_placeholder_close = String16("\xc2\xab");
+
 static const char*
-pseudolocalize_char(char c)
+pseudolocalize_char(const char16_t c)
 {
     switch (c) {
-        case 'a':   return "\xc4\x83";
-        case 'b':   return "\xcf\x84";
-        case 'c':   return "\xc4\x8b";
-        case 'd':   return "\xc4\x8f";
-        case 'e':   return "\xc4\x99";
+        case 'a':   return "\xc3\xa5";
+        case 'b':   return "\xc9\x93";
+        case 'c':   return "\xc3\xa7";
+        case 'd':   return "\xc3\xb0";
+        case 'e':   return "\xc3\xa9";
         case 'f':   return "\xc6\x92";
         case 'g':   return "\xc4\x9d";
-        case 'h':   return "\xd1\x9b";
-        case 'i':   return "\xcf\x8a";
+        case 'h':   return "\xc4\xa5";
+        case 'i':   return "\xc3\xae";
         case 'j':   return "\xc4\xb5";
-        case 'k':   return "\xc4\xb8";
-        case 'l':   return "\xc4\xba";
+        case 'k':   return "\xc4\xb7";
+        case 'l':   return "\xc4\xbc";
         case 'm':   return "\xe1\xb8\xbf";
-        case 'n':   return "\xd0\xb8";
-        case 'o':   return "\xcf\x8c";
-        case 'p':   return "\xcf\x81";
+        case 'n':   return "\xc3\xb1";
+        case 'o':   return "\xc3\xb6";
+        case 'p':   return "\xc3\xbe";
         case 'q':   return "\x51";
-        case 'r':   return "\xd2\x91";
+        case 'r':   return "\xc5\x95";
         case 's':   return "\xc5\xa1";
-        case 't':   return "\xd1\x82";
-        case 'u':   return "\xce\xb0";
+        case 't':   return "\xc5\xa3";
+        case 'u':   return "\xc3\xbb";
         case 'v':   return "\x56";
-        case 'w':   return "\xe1\xba\x85";
+        case 'w':   return "\xc5\xb5";
         case 'x':   return "\xd1\x85";
-        case 'y':   return "\xe1\xbb\xb3";
-        case 'z':   return "\xc5\xba";
+        case 'y':   return "\xc3\xbd";
+        case 'z':   return "\xc5\xbe";
         case 'A':   return "\xc3\x85";
         case 'B':   return "\xce\xb2";
-        case 'C':   return "\xc4\x88";
-        case 'D':   return "\xc4\x90";
-        case 'E':   return "\xd0\x84";
-        case 'F':   return "\xce\x93";
-        case 'G':   return "\xc4\x9e";
-        case 'H':   return "\xc4\xa6";
-        case 'I':   return "\xd0\x87";
-        case 'J':   return "\xc4\xb5";
+        case 'C':   return "\xc3\x87";
+        case 'D':   return "\xc3\x90";
+        case 'E':   return "\xc3\x89";
+        case 'G':   return "\xc4\x9c";
+        case 'H':   return "\xc4\xa4";
+        case 'I':   return "\xc3\x8e";
+        case 'J':   return "\xc4\xb4";
         case 'K':   return "\xc4\xb6";
-        case 'L':   return "\xc5\x81";
+        case 'L':   return "\xc4\xbb";
         case 'M':   return "\xe1\xb8\xbe";
-        case 'N':   return "\xc5\x83";
-        case 'O':   return "\xce\x98";
-        case 'P':   return "\xcf\x81";
+        case 'N':   return "\xc3\x91";
+        case 'O':   return "\xc3\x96";
+        case 'P':   return "\xc3\x9e";
         case 'Q':   return "\x71";
-        case 'R':   return "\xd0\xaf";
-        case 'S':   return "\xc8\x98";
-        case 'T':   return "\xc5\xa6";
-        case 'U':   return "\xc5\xa8";
+        case 'R':   return "\xc5\x94";
+        case 'S':   return "\xc5\xa0";
+        case 'T':   return "\xc5\xa2";
+        case 'U':   return "\xc3\x9b";
         case 'V':   return "\xce\xbd";
-        case 'W':   return "\xe1\xba\x84";
+        case 'W':   return "\xc5\xb4";
         case 'X':   return "\xc3\x97";
-        case 'Y':   return "\xc2\xa5";
+        case 'Y':   return "\xc3\x9d";
         case 'Z':   return "\xc5\xbd";
+        case '!':   return "\xc2\xa1";
+        case '?':   return "\xc2\xbf";
+        case '$':   return "\xe2\x82\xac";
         default:    return NULL;
     }
 }
 
+static const bool
+is_possible_normal_placeholder_end(const char16_t c) {
+    switch (c) {
+        case 's': return true;
+        case 'S': return true;
+        case 'c': return true;
+        case 'C': return true;
+        case 'd': return true;
+        case 'o': return true;
+        case 'x': return true;
+        case 'X': return true;
+        case 'f': return true;
+        case 'e': return true;
+        case 'E': return true;
+        case 'g': return true;
+        case 'G': return true;
+        case 'a': return true;
+        case 'A': return true;
+        case 'b': return true;
+        case 'B': return true;
+        case 'h': return true;
+        case 'H': return true;
+        case '%': return true;
+        case 'n': return true;
+        default:  return false;
+    }
+}
+
+String16
+pseudo_generate_expansion(const unsigned int length) {
+    String16 result = k_expansion_string;
+    const char16_t* s = result.string();
+    if (result.size() < length) {
+        result += String16(" ");
+        result += pseudo_generate_expansion(length - result.size());
+    } else {
+        int ext = 0;
+        // Should contain only whole words, so looking for a space
+        for (unsigned int i = length + 1; i < result.size(); ++i) {
+          ++ext;
+          if (s[i] == ' ') {
+            break;
+          }
+        }
+        result.remove(length + ext, 0);
+    }
+    return result;
+}
+
 /**
  * Converts characters so they look like they've been localized.
  *
  * Note: This leaves escape sequences untouched so they can later be
  * processed by ResTable::collectString in the normal way.
  */
-string
-pseudolocalize_string(const string& source)
+String16
+pseudolocalize_string(const String16& source)
 {
-    const char* s = source.c_str();
-    string result;
-    const size_t I = source.length();
+    const char16_t* s = source.string();
+    String16 result;
+    const size_t I = source.size();
     for (size_t i=0; i<I; i++) {
-        char c = s[i];
+        char16_t c = s[i];
         if (c == '\\') {
+            // Escape syntax, no need to pseudolocalize
             if (i<I-1) {
-                result += '\\';
+                result += String16("\\");
                 i++;
                 c = s[i];
                 switch (c) {
                     case 'u':
                         // this one takes up 5 chars
-                        result += string(s+i, 5);
+                        result += String16(s+i, 5);
                         i += 4;
                         break;
                     case 't':
@@ -96,24 +162,107 @@
                     case '\'':
                     case '\\':
                     default:
-                        result += c;
+                        result.append(&c, 1);
                         break;
                 }
             } else {
-                result += c;
+                result.append(&c, 1);
+            }
+        } else if (c == '%') {
+            // Placeholder syntax, no need to pseudolocalize
+            result += k_placeholder_open;
+            bool end = false;
+            result.append(&c, 1);
+            while (!end && i < I) {
+                ++i;
+                c = s[i];
+                result.append(&c, 1);
+                if (is_possible_normal_placeholder_end(c)) {
+                    end = true;
+                } else if (c == 't') {
+                    ++i;
+                    c = s[i];
+                    result.append(&c, 1);
+                    end = true;
+                }
+            }
+            result += k_placeholder_close;
+        } else if (c == '<' || c == '&') {
+            // html syntax, no need to pseudolocalize
+            bool tag_closed = false;
+            while (!tag_closed && i < I) {
+                if (c == '&') {
+                    String16 escape_text;
+                    escape_text.append(&c, 1);
+                    bool end = false;
+                    size_t htmlCodePos = i;
+                    while (!end && htmlCodePos < I) {
+                        ++htmlCodePos;
+                        c = s[htmlCodePos];
+                        escape_text.append(&c, 1);
+                        // Valid html code
+                        if (c == ';') {
+                            end = true;
+                            i = htmlCodePos;
+                        }
+                        // Wrong html code
+                        else if (!((c == '#' ||
+                                 (c >= 'a' && c <= 'z') ||
+                                 (c >= 'A' && c <= 'Z') ||
+                                 (c >= '0' && c <= '9')))) {
+                            end = true;
+                        }
+                    }
+                    result += escape_text;
+                    if (escape_text != String16("&lt;")) {
+                        tag_closed = true;
+                    }
+                    continue;
+                }
+                if (c == '>') {
+                    tag_closed = true;
+                    result.append(&c, 1);
+                    continue;
+                }
+                result.append(&c, 1);
+                i++;
+                c = s[i];
             }
         } else {
+            // This is a pure text that should be pseudolocalized
             const char* p = pseudolocalize_char(c);
             if (p != NULL) {
-                result += p;
+                result += String16(p);
             } else {
-                result += c;
+                result.append(&c, 1);
             }
         }
     }
-
-    //printf("result=\'%s\'\n", result.c_str());
     return result;
 }
 
+String16
+pseudobidi_string(const String16& source)
+{
+    const char16_t* s = source.string();
+    String16 result;
+    result += k_rlm;
+    result += k_rlo;
+    for (size_t i=0; i<source.size(); i++) {
+        char16_t c = s[i];
+        switch(c) {
+            case ' ': result += k_pdf;
+                      result += k_rlm;
+                      result.append(&c, 1);
+                      result += k_rlm;
+                      result += k_rlo;
+                      break;
+            default: result.append(&c, 1);
+                     break;
+        }
+    }
+    result += k_pdf;
+    result += k_rlm;
+    return result;
+}
 
diff --git a/tools/aapt/pseudolocalize.h b/tools/aapt/pseudolocalize.h
index 94cb034..e6ab18e 100644
--- a/tools/aapt/pseudolocalize.h
+++ b/tools/aapt/pseudolocalize.h
@@ -1,9 +1,18 @@
 #ifndef HOST_PSEUDOLOCALIZE_H
 #define HOST_PSEUDOLOCALIZE_H
 
+#include "StringPool.h"
+
 #include <string>
 
-std::string pseudolocalize_string(const std::string& source);
+String16 pseudolocalize_string(const String16& source);
+// Surrounds every word in the sentance with specific characters that makes
+// the word directionality RTL.
+String16 pseudobidi_string(const String16& source);
+// Generates expansion string based on the specified lenght.
+// Generated string could not be shorter that length, but it could be slightly
+// longer.
+String16 pseudo_generate_expansion(const unsigned int length);
 
 #endif // HOST_PSEUDOLOCALIZE_H