Combine image string char arrays into single array

Having one giant char array shared between all the strings saves memory
since it avoids the 12 bytes of array overhead per image string. Also
added substring finding based on prefixes, strings are added into the
array in reverse sorted length.

Image size goes from 11767808 -> 11100160.

Bug: 17643507

Change-Id: I2a7f177b40d0458d5c50640643d8f16b0030bdce

(cherry picked from commit 23c1d0ca7ab63f4adad88631bddefb769d0dcc2c)
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index b7283a4..6096625 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -72,7 +72,6 @@
     Thread::Current()->TransitionFromSuspendedToRunnable();
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
-    ComputeEagerResolvedStrings();
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -265,6 +264,152 @@
   return true;
 }
 
+// Count the number of strings in the heap and put the result in arg as a size_t pointer.
+static void CountStringsCallback(Object* obj, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (obj->GetClass()->IsStringClass()) {
+    ++*reinterpret_cast<size_t*>(arg);
+  }
+}
+
+// Collect all the java.lang.String in the heap and put them in the output strings_ array.
+class StringCollector {
+ public:
+  StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index)
+      : strings_(strings), index_(index) {
+  }
+  static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    auto* collector = reinterpret_cast<StringCollector*>(arg);
+    if (obj->GetClass()->IsStringClass()) {
+      collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString());
+    }
+  }
+  size_t GetIndex() const {
+    return index_;
+  }
+
+ private:
+  Handle<mirror::ObjectArray<mirror::String>> strings_;
+  size_t index_;
+};
+
+// Compare strings based on length, used for sorting strings by length / reverse length.
+class StringLengthComparator {
+ public:
+  explicit StringLengthComparator(Handle<mirror::ObjectArray<mirror::String>> strings)
+      : strings_(strings) {
+  }
+  bool operator()(size_t a, size_t b) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return strings_->GetWithoutChecks(a)->GetLength() < strings_->GetWithoutChecks(b)->GetLength();
+  }
+
+ private:
+  Handle<mirror::ObjectArray<mirror::String>> strings_;
+};
+
+// If string a is a prefix of b or b is a prefix of a then they are considered equal. This
+// enables us to find prefixes instead of exact matches. Otherwise we do a normal string
+// comparison. The strings compared of the form <position, length> inside of the chars_ array.
+class SubstringComparator {
+ public:
+  explicit SubstringComparator(const std::vector<uint16_t>* const chars) : chars_(chars) {
+  }
+  bool operator()(const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b) {
+    size_t compare_length = std::min(a.second, b.second);
+    const uint16_t* ptr_a = &chars_->at(a.first);
+    const uint16_t* ptr_b = &chars_->at(b.first);
+    for (size_t i = 0; i < compare_length; ++i) {
+      if (ptr_a[i] != ptr_b[i]) {
+        return ptr_a[i] < ptr_b[i];
+      }
+    }
+    return false;
+  }
+
+ private:
+  const std::vector<uint16_t>* const chars_;
+};
+
+void ImageWriter::ProcessStrings() {
+  size_t total_strings = 0;
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  ClassLinker* cl = Runtime::Current()->GetClassLinker();
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    heap->VisitObjects(CountStringsCallback, &total_strings);  // Count the strings.
+  }
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  auto strings = hs.NewHandle(cl->AllocStringArray(self, total_strings));
+  StringCollector string_collector(strings, 0U);
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    // Read strings into the array.
+    heap->VisitObjects(StringCollector::Callback, &string_collector);
+  }
+  // Some strings could have gotten freed if AllocStringArray caused a GC.
+  CHECK_LE(string_collector.GetIndex(), total_strings);
+  total_strings = string_collector.GetIndex();
+  size_t total_length = 0;
+  std::vector<size_t> reverse_sorted_strings;
+  for (size_t i = 0; i < total_strings; ++i) {
+    mirror::String* s = strings->GetWithoutChecks(i);
+    // Look up the string in the array.
+    total_length += s->GetLength();
+    reverse_sorted_strings.push_back(i);
+  }
+  // Sort by reverse length.
+  StringLengthComparator comparator(strings);
+  std::sort(reverse_sorted_strings.rbegin(), reverse_sorted_strings.rend(), comparator);
+  // Deduplicate prefixes and add strings to the char array.
+  std::vector<uint16_t> combined_chars(total_length, 0U);
+  size_t num_chars = 0;
+  // Characters of strings which are non equal prefix of another string (not the same string).
+  // We don't count the savings from equal strings since these would get interned later anyways.
+  size_t prefix_saved_chars = 0;
+  std::set<std::pair<size_t, size_t>, SubstringComparator> existing_strings((
+      SubstringComparator(&combined_chars)));
+  for (size_t i = 0; i < total_strings; ++i) {
+    mirror::String* s = strings->GetWithoutChecks(reverse_sorted_strings[i]);
+    // Add the string to the end of the char array.
+    size_t length = s->GetLength();
+    for (size_t j = 0; j < length; ++j) {
+      combined_chars[num_chars++] = s->CharAt(j);
+    }
+    // Try to see if the string exists as a prefix of an existing string.
+    size_t new_offset = 0;
+    std::pair<size_t, size_t> new_string(num_chars - length, length);
+    auto it = existing_strings.find(new_string);
+    if (it != existing_strings.end()) {
+      for (size_t j = 0; j < length; ++j) {
+        DCHECK_EQ(combined_chars[it->first + j], s->CharAt(j));
+      }
+      // Shares a prefix, set the offset to where the new offset will be.
+      new_offset = it->first;
+      // Remove the added chars.
+      num_chars -= length;
+      if (it->second != length) {
+        prefix_saved_chars += length;
+      }
+    } else {
+      new_offset = new_string.first;
+      existing_strings.insert(new_string);
+    }
+    s->SetOffset(new_offset);
+  }
+  // Allocate and update the char arrays.
+  auto* array = mirror::CharArray::Alloc(self, num_chars);
+  for (size_t i = 0; i < num_chars; ++i) {
+    array->SetWithoutChecks<false>(i, combined_chars[i]);
+  }
+  for (size_t i = 0; i < total_strings; ++i) {
+    strings->GetWithoutChecks(i)->SetArray(array);
+  }
+  VLOG(compiler) << "Total # image strings=" << total_strings << " combined length="
+      << total_length << " prefix saved chars=" << prefix_saved_chars;
+  ComputeEagerResolvedStrings();
+}
+
 void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) {
   if (!obj->GetClass()->IsStringClass()) {
     return;
@@ -293,7 +438,7 @@
   }
 }
 
-void ImageWriter::ComputeEagerResolvedStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::ComputeEagerResolvedStrings() {
   ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
@@ -364,8 +509,7 @@
   return true;
 }
 
-void ImageWriter::CheckNonImageClassesRemoved()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::CheckNonImageClassesRemoved() {
   if (compiler_driver_.GetImageClasses() != nullptr) {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -587,9 +731,7 @@
                                     compile_pic_);
 }
 
-
-void ImageWriter::CopyAndFixupObjects()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::CopyAndFixupObjects() {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "ImageWriter");
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // TODO: heap validation can't handle this fix up pass
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index b0cf2b2..1217145 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -26,12 +26,12 @@
 
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
+#include "gc/space/space.h"
 #include "mem_map.h"
 #include "oat_file.h"
 #include "mirror/dex_cache.h"
 #include "os.h"
 #include "safe_map.h"
-#include "gc/space/space.h"
 
 namespace art {
 
@@ -136,13 +136,16 @@
   static void ComputeEagerResolvedStringsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Combine string char arrays.
+  void ProcessStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static bool NonImageClassesVisitor(mirror::Class* c, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Verify unwanted classes removed.
-  void CheckNonImageClassesRemoved();
+  void CheckNonImageClassesRemoved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -164,7 +167,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
-  void CopyAndFixupObjects();
+  void CopyAndFixupObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FixupMethod(mirror::ArtMethod* orig, mirror::ArtMethod* copy)
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 64408a6..30b8aa3 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -109,6 +109,14 @@
 
   int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Offset is only used during testing so use non-transactional mode.
+    DCHECK_LE(0, new_offset);
+    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
+  }
+
+  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static Class* GetJavaLangString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!java_lang_String_.IsNull());
     return java_lang_String_.Read();
@@ -134,21 +142,12 @@
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
   }
 
-  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Offset is only used during testing so use non-transactional mode.
-    DCHECK_LE(0, new_offset);
-    DCHECK_GE(GetLength(), new_offset);
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
-  }
-
   static String* Alloc(Thread* self, int32_t utf16_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static String* Alloc(Thread* self, Handle<CharArray> array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<CharArray> array_;