AAPT2: Fix pseudolocalization (again)

Pseudolocalization didn't properly handle spans in
strings like "<small><small>Hello</small></small>".

The spans would be identical and when doing range checks
only one of them would be updated.

Switched to a more robust way of extracting the relevant
chunks of a styled string. This uses a stack, which is more
in line with the real representation in XML.

Bug: 34088357
Test: make aapt2_tests
Change-Id: Ia4e4501713e688c96a89e26e4e2b1384f4cd3889
diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp
index 8461905..90f713b 100644
--- a/tools/aapt2/ResourceParser.cpp
+++ b/tools/aapt2/ResourceParser.cpp
@@ -155,7 +155,10 @@
     xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string,
     std::vector<UntranslatableSection>* out_untranslatable_sections) {
   // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply.
-  std::vector<Span> span_stack;
+  // The stack elements refer to the indices in out_style_string->spans.
+  // By first adding to the out_style_string->spans vector, and then using the stack to refer
+  // to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>.
+  std::vector<size_t> span_stack;
 
   // Clear the output variables.
   out_raw_string->clear();
@@ -192,7 +195,9 @@
           return false;
         }
 
-        span_stack.push_back(Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())});
+        out_style_string->spans.push_back(
+            Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())});
+        span_stack.push_back(out_style_string->spans.size() - 1);
       } else if (parser->element_namespace() == sXliffNamespaceUri) {
         if (parser->element_name() == "g") {
           if (untranslatable_start_depth) {
@@ -233,9 +238,8 @@
       if (parser->element_namespace().empty()) {
         // This is an HTML tag which we encode as a span. Update the span
         // stack and pop the top entry.
-        Span& top_span = span_stack.back();
+        Span& top_span = out_style_string->spans[span_stack.back()];
         top_span.last_char = builder.Utf16Len() - 1;
-        out_style_string->spans.push_back(std::move(top_span));
         span_stack.pop_back();
       } else if (untranslatable_start_depth == make_value(depth)) {
         // This is the end of an untranslatable section. Use UTF8 indices/lengths.
diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp
index eefa320..8062c2e6 100644
--- a/tools/aapt2/ResourceParser_test.cpp
+++ b/tools/aapt2/ResourceParser_test.cpp
@@ -101,20 +101,24 @@
   // Use a surrogate pair unicode point so that we can verify that the span
   // indices use UTF-16 length and not UTF-8 length.
   std::string input =
-      "<string name=\"foo\">This is my aunt\u2019s <b>string</b></string>";
+      "<string name=\"foo\">This is my aunt\u2019s <b>fickle <small>string</small></b></string>";
   ASSERT_TRUE(TestParse(input));
 
   StyledString* str = test::GetValue<StyledString>(&table_, "string/foo");
   ASSERT_NE(nullptr, str);
 
-  const std::string expected_str = "This is my aunt\u2019s string";
+  const std::string expected_str = "This is my aunt\u2019s fickle string";
   EXPECT_EQ(expected_str, *str->value->str);
-  EXPECT_EQ(1u, str->value->spans.size());
+  EXPECT_EQ(2u, str->value->spans.size());
   EXPECT_TRUE(str->untranslatable_sections.empty());
 
   EXPECT_EQ(std::string("b"), *str->value->spans[0].name);
   EXPECT_EQ(17u, str->value->spans[0].first_char);
-  EXPECT_EQ(23u, str->value->spans[0].last_char);
+  EXPECT_EQ(30u, str->value->spans[0].last_char);
+
+  EXPECT_EQ(std::string("small"), *str->value->spans[1].name);
+  EXPECT_EQ(24u, str->value->spans[1].first_char);
+  EXPECT_EQ(30u, str->value->spans[1].last_char);
 }
 
 TEST_F(ResourceParserTest, ParseStringWithWhitespace) {
diff --git a/tools/aapt2/compile/PseudolocaleGenerator.cpp b/tools/aapt2/compile/PseudolocaleGenerator.cpp
index fad9edd..a031ea4 100644
--- a/tools/aapt2/compile/PseudolocaleGenerator.cpp
+++ b/tools/aapt2/compile/PseudolocaleGenerator.cpp
@@ -22,136 +22,194 @@
 #include "ResourceValues.h"
 #include "ValueVisitor.h"
 #include "compile/Pseudolocalizer.h"
+#include "util/Util.h"
 
 using android::StringPiece;
+using android::StringPiece16;
 
 namespace aapt {
 
-std::unique_ptr<StyledString> PseudolocalizeStyledString(
-    StyledString* string, Pseudolocalizer::Method method, StringPool* pool) {
+// The struct that represents both Span objects and UntranslatableSections.
+struct UnifiedSpan {
+  // Only present for Span objects. If not present, this was an UntranslatableSection.
+  Maybe<std::string> tag;
+
+  // The UTF-16 index into the string where this span starts.
+  uint32_t first_char;
+
+  // The UTF-16 index into the string where this span ends, inclusive.
+  uint32_t last_char;
+};
+
+inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
+  if (left.first_char < right.first_char) {
+    return true;
+  } else if (left.first_char > right.first_char) {
+    return false;
+  } else if (left.last_char < right.last_char) {
+    return true;
+  }
+  return false;
+}
+
+inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
+  return UnifiedSpan{*span.name, span.first_char, span.last_char};
+}
+
+inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
+  return UnifiedSpan{
+      {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
+}
+
+// Merges the Span and UntranslatableSections of this StyledString into a single vector of
+// UnifiedSpans. This will first check that the Spans are sorted in ascending order.
+static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
+  // Ensure the Spans are sorted and converted.
+  std::vector<UnifiedSpan> sorted_spans;
+  sorted_spans.reserve(string.value->spans.size());
+  std::transform(string.value->spans.begin(), string.value->spans.end(),
+                 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
+
+  // Stable sort to ensure tag sequences like "<b><i>" are preserved.
+  std::stable_sort(sorted_spans.begin(), sorted_spans.end());
+
+  // Ensure the UntranslatableSections are sorted and converted.
+  std::vector<UnifiedSpan> sorted_untranslatable_sections;
+  sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
+  std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
+                 std::back_inserter(sorted_untranslatable_sections),
+                 UntranslatableSectionToUnifiedSpan);
+  std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
+
+  std::vector<UnifiedSpan> merged_spans;
+  merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
+  auto span_iter = sorted_spans.begin();
+  auto untranslatable_iter = sorted_untranslatable_sections.begin();
+  while (span_iter != sorted_spans.end() &&
+         untranslatable_iter != sorted_untranslatable_sections.end()) {
+    if (*span_iter < *untranslatable_iter) {
+      merged_spans.push_back(std::move(*span_iter));
+      ++span_iter;
+    } else {
+      merged_spans.push_back(std::move(*untranslatable_iter));
+      ++untranslatable_iter;
+    }
+  }
+
+  while (span_iter != sorted_spans.end()) {
+    merged_spans.push_back(std::move(*span_iter));
+    ++span_iter;
+  }
+
+  while (untranslatable_iter != sorted_untranslatable_sections.end()) {
+    merged_spans.push_back(std::move(*untranslatable_iter));
+    ++untranslatable_iter;
+  }
+  return merged_spans;
+}
+
+std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
+                                                         Pseudolocalizer::Method method,
+                                                         StringPool* pool) {
   Pseudolocalizer localizer(method);
 
-  const StringPiece original_text = *string->value->str;
+  // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
+  // This will effectively subdivide the string into multiple sections that can be individually
+  // pseudolocalized, while keeping the span indices synchronized.
+  std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
+
+  // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
+  // runtime. So we will do all our processing in UTF-16, then convert back.
+  const std::u16string text16 = util::Utf8ToUtf16(*string->value->str);
+
+  // Convenient wrapper around the text that allows us to work with StringPieces.
+  const StringPiece16 text(text16);
+
+  // The new string.
+  std::string new_string = localizer.Start();
+
+  // The stack that keeps track of what nested Span we're in.
+  std::vector<size_t> span_stack;
+
+  // The current position in the original text.
+  uint32_t cursor = 0u;
+
+  // The current position in the new text.
+  uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
+                                             new_string.size(), false);
+
+  // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
+  bool translatable = true;
+  size_t span_idx = 0u;
+  while (span_idx < merged_spans.size() || !span_stack.empty()) {
+    UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
+    UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
+
+    if (span != nullptr) {
+      if (parent_span == nullptr || parent_span->last_char > span->first_char) {
+        // There is no parent, or this span is the child of the parent.
+        // Pseudolocalize all the text until this span.
+        const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
+        cursor += substr.size();
+
+        // Pseudolocalize the substring.
+        std::string new_substr = util::Utf16ToUtf8(substr);
+        if (translatable) {
+          new_substr = localizer.Text(new_substr);
+        }
+        new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
+                                           new_substr.size(), false);
+        new_string += new_substr;
+
+        // Rewrite the first_char.
+        span->first_char = new_cursor;
+        if (!span->tag) {
+          // An untranslatable section has begun!
+          translatable = false;
+        }
+        span_stack.push_back(span_idx);
+        ++span_idx;
+        continue;
+      }
+    }
+
+    if (parent_span != nullptr) {
+      // There is a parent, and either this span is not a child of it, or there are no more spans.
+      // Pop this off the stack.
+      const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
+      cursor += substr.size();
+
+      // Pseudolocalize the substring.
+      std::string new_substr = util::Utf16ToUtf8(substr);
+      if (translatable) {
+        new_substr = localizer.Text(new_substr);
+      }
+      new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
+                                         new_substr.size(), false);
+      new_string += new_substr;
+
+      parent_span->last_char = new_cursor - 1;
+      if (parent_span->tag) {
+        // An end to an untranslatable section.
+        translatable = true;
+      }
+      span_stack.pop_back();
+    }
+  }
+
+  // Finish the pseudolocalization at the end of the string.
+  new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
+  new_string += localizer.End();
 
   StyleString localized;
+  localized.str = std::move(new_string);
 
-  // Copy the spans. We will update their offsets when we localize.
-  localized.spans.reserve(string->value->spans.size());
-  for (const StringPool::Span& span : string->value->spans) {
-    localized.spans.push_back(
-        Span{*span.name, span.first_char, span.last_char});
-  }
-
-  // The ranges are all represented with a single value. This is the start of
-  // one range and end of another.
-  struct Range {
-    size_t start;
-
-    // If set to true, toggles the state of translatability.
-    bool toggle_translatability;
-
-    // Once the new string is localized, these are the pointers to the spans to adjust.
-    // Since this struct represents the start of one range and end of another,
-    // we have the two pointers respectively.
-    uint32_t* update_start;
-    uint32_t* update_end;
-  };
-
-  auto cmp = [](const Range& r, size_t index) -> bool {
-    return r.start < index;
-  };
-
-  // Construct the ranges. The ranges are represented like so: [0, 2, 5, 7]
-  // The ranges are the spaces in between. In this example, with a total string
-  // length of 9, the vector represents: (0,1], (2,4], (5,6], (7,9]
-  //
-  std::vector<Range> ranges;
-  ranges.push_back(Range{0, false, nullptr, nullptr});
-  ranges.push_back(Range{original_text.size() - 1, false, nullptr, nullptr});
-  for (size_t i = 0; i < string->value->spans.size(); i++) {
-    const StringPool::Span& span = string->value->spans[i];
-
-    // Insert or update the Range marker for the start of this span.
-    auto iter =
-        std::lower_bound(ranges.begin(), ranges.end(), span.first_char, cmp);
-    if (iter != ranges.end() && iter->start == span.first_char) {
-      iter->update_start = &localized.spans[i].first_char;
-    } else {
-      ranges.insert(iter, Range{span.first_char, false, &localized.spans[i].first_char, nullptr});
-    }
-
-    // Insert or update the Range marker for the end of this span.
-    iter = std::lower_bound(ranges.begin(), ranges.end(), span.last_char, cmp);
-    if (iter != ranges.end() && iter->start == span.last_char) {
-      iter->update_end = &localized.spans[i].last_char;
-    } else {
-      ranges.insert(iter, Range{span.last_char, false, nullptr, &localized.spans[i].last_char});
+  // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
+  for (UnifiedSpan& span : merged_spans) {
+    if (span.tag) {
+      localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
     }
   }
-
-  // Parts of the string may be untranslatable. Merge those ranges
-  // in as well, so that we have continuous sections of text to
-  // feed into the pseudolocalizer.
-  // We do this by marking the beginning of a range as either toggling
-  // the translatability state or not.
-  for (const UntranslatableSection& section : string->untranslatable_sections) {
-    auto iter = std::lower_bound(ranges.begin(), ranges.end(), section.start, cmp);
-    if (iter != ranges.end() && iter->start == section.start) {
-      // An existing span starts (or ends) here. We just need to mark that
-      // the translatability should toggle here. If translatability was
-      // already being toggled, then that means we have two adjacent ranges of untranslatable
-      // text, so remove the toggle and only toggle at the end of this range,
-      // effectively merging these ranges.
-      iter->toggle_translatability = !iter->toggle_translatability;
-    } else {
-      // Insert a new range that specifies to toggle the translatability.
-      iter = ranges.insert(iter, Range{section.start, true, nullptr, nullptr});
-    }
-
-    // Update/create an end to the untranslatable section.
-    iter = std::lower_bound(iter, ranges.end(), section.end, cmp);
-    if (iter != ranges.end() && iter->start == section.end) {
-      iter->toggle_translatability = true;
-    } else {
-      iter = ranges.insert(iter, Range{section.end, true, nullptr, nullptr});
-    }
-  }
-
-  localized.str += localizer.Start();
-
-  // Iterate over the ranges and localize each section.
-  // The text starts as translatable, and each time a range has toggle_translatability
-  // set to true, we toggle whether to translate or not.
-  // This assumes no untranslatable ranges overlap.
-  bool translatable = true;
-  for (size_t i = 0; i < ranges.size(); i++) {
-    const size_t start = ranges[i].start;
-    size_t len = original_text.size() - start;
-    if (i + 1 < ranges.size()) {
-      len = ranges[i + 1].start - start;
-    }
-
-    if (ranges[i].update_start) {
-      *ranges[i].update_start = localized.str.size();
-    }
-
-    if (ranges[i].update_end) {
-      *ranges[i].update_end = localized.str.size();
-    }
-
-    if (ranges[i].toggle_translatability) {
-      translatable = !translatable;
-    }
-
-    if (translatable) {
-      localized.str += localizer.Text(original_text.substr(start, len));
-    } else {
-      localized.str += original_text.substr(start, len);
-    }
-  }
-
-  localized.str += localizer.End();
-
   return util::make_unique<StyledString>(pool->MakeRef(localized));
 }
 
@@ -175,8 +233,7 @@
         if (sub_visitor.value) {
           localized->values[i] = std::move(sub_visitor.item);
         } else {
-          localized->values[i] =
-              std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
+          localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
         }
       }
     }
@@ -210,8 +267,7 @@
     }
     result += localizer_.End();
 
-    std::unique_ptr<String> localized =
-        util::make_unique<String>(pool_->MakeRef(result));
+    std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
     localized->SetSource(string->GetSource());
     localized->SetWeak(true);
     item = std::move(localized);
@@ -282,14 +338,10 @@
   }
 }
 
-/**
- * A value is pseudolocalizable if it does not define a locale (or is the
- * default locale)
- * and is translatable.
- */
+// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
+// translatable.
 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
-  const int diff =
-      config_value->config.diff(ConfigDescription::DefaultConfig());
+  const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
   if (diff & ConfigDescription::CONFIG_LOCALE) {
     return false;
   }
@@ -298,19 +350,16 @@
 
 }  // namespace
 
-bool PseudolocaleGenerator::Consume(IAaptContext* context,
-                                    ResourceTable* table) {
+bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
   for (auto& package : table->packages) {
     for (auto& type : package->types) {
       for (auto& entry : type->entries) {
-        std::vector<ResourceConfigValue*> values =
-            entry->FindValuesIf(IsPseudolocalizable);
-
+        std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
         for (ResourceConfigValue* value : values) {
-          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value,
-                                 &table->string_pool, entry.get());
-          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value,
-                                 &table->string_pool, entry.get());
+          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
+                                 entry.get());
+          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
+                                 entry.get());
         }
       }
     }
diff --git a/tools/aapt2/compile/PseudolocaleGenerator_test.cpp b/tools/aapt2/compile/PseudolocaleGenerator_test.cpp
index 4db37db..b08e1da 100644
--- a/tools/aapt2/compile/PseudolocaleGenerator_test.cpp
+++ b/tools/aapt2/compile/PseudolocaleGenerator_test.cpp
@@ -25,7 +25,7 @@
   StringPool pool;
   StyleString original_style;
   original_style.str = "Hello world!";
-  original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}};
+  original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}};
 
   std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
       util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
@@ -34,22 +34,19 @@
   EXPECT_EQ(original_style.str, *new_string->value->str);
   ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size());
 
-  EXPECT_EQ(std::string("He").size(), new_string->value->spans[0].first_char);
-  EXPECT_EQ(std::string("Hel").size(), new_string->value->spans[0].last_char);
-  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
+  EXPECT_EQ(std::string("i"), *new_string->value->spans[0].name);
+  EXPECT_EQ(std::u16string(u"H").size(), new_string->value->spans[0].first_char);
+  EXPECT_EQ(std::u16string(u"Hello worl").size(), new_string->value->spans[0].last_char);
 
-  EXPECT_EQ(std::string("Hello ").size(),
-            new_string->value->spans[1].first_char);
-  EXPECT_EQ(std::string("Hello w").size(),
-            new_string->value->spans[1].last_char);
   EXPECT_EQ(std::string("b"), *new_string->value->spans[1].name);
+  EXPECT_EQ(std::u16string(u"He").size(), new_string->value->spans[1].first_char);
+  EXPECT_EQ(std::u16string(u"Hel").size(), new_string->value->spans[1].last_char);
 
-  EXPECT_EQ(std::string("H").size(), new_string->value->spans[2].first_char);
-  EXPECT_EQ(std::string("Hello worl").size(),
-            new_string->value->spans[2].last_char);
-  EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name);
+  EXPECT_EQ(std::string("b"), *new_string->value->spans[2].name);
+  EXPECT_EQ(std::u16string(u"Hello ").size(), new_string->value->spans[2].first_char);
+  EXPECT_EQ(std::u16string(u"Hello w").size(), new_string->value->spans[2].last_char);
 
-  original_style.spans.push_back(Span{"em", 0, 11u});
+  original_style.spans.insert(original_style.spans.begin(), Span{"em", 0, 11u});
 
   new_string = PseudolocalizeStyledString(
       util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
@@ -58,23 +55,128 @@
   EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð¡ one two]"), *new_string->value->str);
   ASSERT_EQ(original_style.spans.size(), new_string->value->spans.size());
 
-  EXPECT_EQ(std::string("[Ĥé").size(), new_string->value->spans[0].first_char);
-  EXPECT_EQ(std::string("[Ĥéļ").size(), new_string->value->spans[0].last_char);
+  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char);
+  EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļð").size(), new_string->value->spans[0].last_char);
 
-  EXPECT_EQ(std::string("[Ĥéļļö ").size(),
+  EXPECT_EQ(std::u16string(u"[Ĥ").size(), new_string->value->spans[1].first_char);
+  EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵöŕļ").size(), new_string->value->spans[1].last_char);
+
+  EXPECT_EQ(std::u16string(u"[Ĥé").size(), new_string->value->spans[2].first_char);
+  EXPECT_EQ(std::u16string(u"[Ĥéļ").size(), new_string->value->spans[2].last_char);
+
+  EXPECT_EQ(std::u16string(u"[Ĥéļļö ").size(), new_string->value->spans[3].first_char);
+  EXPECT_EQ(std::u16string(u"[Ĥéļļö ŵ").size(), new_string->value->spans[3].last_char);
+}
+
+TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentNestedTags) {
+  StringPool pool;
+  StyleString original_style;
+  original_style.str = "bold";
+  original_style.spans = {Span{"b", 0, 3}, Span{"i", 0, 3}};
+
+  std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
+      util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
+      Pseudolocalizer::Method::kAccent, &pool);
+  ASSERT_NE(nullptr, new_string);
+  ASSERT_EQ(2u, new_string->value->spans.size());
+  EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str);
+
+  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
+  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char);
+  EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[0].last_char);
+
+  EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name);
+  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[1].first_char);
+  EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char);
+}
+
+TEST(PseudolocaleGeneratorTest, PseudolocalizeAdjacentTagsUnsorted) {
+  StringPool pool;
+  StyleString original_style;
+  original_style.str = "bold";
+  original_style.spans = {Span{"i", 2, 3}, Span{"b", 0, 1}};
+
+  std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
+      util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
+      Pseudolocalizer::Method::kAccent, &pool);
+  ASSERT_NE(nullptr, new_string);
+  ASSERT_EQ(2u, new_string->value->spans.size());
+  EXPECT_EQ(std::string("[ɓöļð one]"), *new_string->value->str);
+
+  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
+  EXPECT_EQ(std::u16string(u"[").size(), new_string->value->spans[0].first_char);
+  EXPECT_EQ(std::u16string(u"[ɓ").size(), new_string->value->spans[0].last_char);
+
+  EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name);
+  EXPECT_EQ(std::u16string(u"[ɓö").size(), new_string->value->spans[1].first_char);
+  EXPECT_EQ(std::u16string(u"[ɓöļ").size(), new_string->value->spans[1].last_char);
+}
+
+TEST(PseudolocaleGeneratorTest, PseudolocalizeNestedAndAdjacentTags) {
+  StringPool pool;
+  StyleString original_style;
+  original_style.str = "This sentence is not what you think it is at all.";
+  original_style.spans = {Span{"b", 16u, 19u}, Span{"em", 29u, 47u}, Span{"i", 38u, 40u},
+                          Span{"b", 44u, 47u}};
+
+  std::unique_ptr<StyledString> new_string = PseudolocalizeStyledString(
+      util::make_unique<StyledString>(pool.MakeRef(original_style)).get(),
+      Pseudolocalizer::Method::kAccent, &pool);
+  ASSERT_NE(nullptr, new_string);
+  ASSERT_EQ(4u, new_string->value->spans.size());
+  EXPECT_EQ(std::string(
+                "[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļļ. one two three four five six]"),
+            *new_string->value->str);
+
+  EXPECT_EQ(std::string("b"), *new_string->value->spans[0].name);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš").size(), new_string->value->spans[0].first_char);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñö").size(), new_string->value->spans[0].last_char);
+
+  EXPECT_EQ(std::string("em"), *new_string->value->spans[1].name);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû").size(),
             new_string->value->spans[1].first_char);
-  EXPECT_EQ(std::string("[Ĥéļļö ŵ").size(),
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(),
             new_string->value->spans[1].last_char);
 
-  EXPECT_EQ(std::string("[Ĥ").size(), new_string->value->spans[2].first_char);
-  EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļ").size(),
+  EXPECT_EQ(std::string("i"), *new_string->value->spans[2].name);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ").size(),
+            new_string->value->spans[2].first_char);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ î").size(),
             new_string->value->spans[2].last_char);
 
-  EXPECT_EQ(std::string("[").size(), new_string->value->spans[3].first_char);
-  EXPECT_EQ(std::string("[Ĥéļļö ŵöŕļð").size(),
+  EXPECT_EQ(std::string("b"), *new_string->value->spans[3].name);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ").size(),
+            new_string->value->spans[3].first_char);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šéñţéñçé îš ñöţ ŵĥåţ ýöû ţĥîñķ îţ îš åţ åļ").size(),
             new_string->value->spans[3].last_char);
 }
 
+TEST(PseudolocaleGeneratorTest, PseudolocalizePartsOfString) {
+  StringPool pool;
+  StyleString original_style;
+  original_style.str = "This should NOT be pseudolocalized.";
+  original_style.spans = {Span{"em", 4u, 14u}, Span{"i", 18u, 33u}};
+  std::unique_ptr<StyledString> original_string =
+      util::make_unique<StyledString>(pool.MakeRef(original_style));
+  original_string->untranslatable_sections = {UntranslatableSection{11u, 15u}};
+
+  std::unique_ptr<StyledString> new_string =
+      PseudolocalizeStyledString(original_string.get(), Pseudolocalizer::Method::kAccent, &pool);
+  ASSERT_NE(nullptr, new_string);
+  ASSERT_EQ(2u, new_string->value->spans.size());
+  EXPECT_EQ(std::string("[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžéð. one two three four]"),
+            *new_string->value->str);
+
+  EXPECT_EQ(std::string("em"), *new_string->value->spans[0].name);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš").size(), new_string->value->spans[0].first_char);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NO").size(), new_string->value->spans[0].last_char);
+
+  EXPECT_EQ(std::string("i"), *new_string->value->spans[1].name);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé").size(), new_string->value->spans[1].first_char);
+  EXPECT_EQ(std::u16string(u"[Ţĥîš šĥöûļð NOT ɓé þšéûðöļöçåļîžé").size(),
+            new_string->value->spans[1].last_char);
+}
+
 TEST(PseudolocaleGeneratorTest, PseudolocalizeOnlyDefaultConfigs) {
   std::unique_ptr<ResourceTable> table =
       test::ResourceTableBuilder()
@@ -138,7 +240,7 @@
   {
     StyleString original_style;
     original_style.str = "Hello world!";
-    original_style.spans = {Span{"b", 2, 3}, Span{"b", 6, 7}, Span{"i", 1, 10}};
+    original_style.spans = {Span{"i", 1, 10}, Span{"b", 2, 3}, Span{"b", 6, 7}};
 
     auto styled_string =
         util::make_unique<StyledString>(table->string_pool.MakeRef(original_style));