Merge remote-tracking branch 'goog/androidx-platform-dev' am: 0dc99ac82a am: 37d4fcdbfe

Original change: https://googleplex-android-review.googlesource.com/c/platform/external/icing/+/15157372

Change-Id: Ia0f9c71a5856f8bd7499c59801bc4da2f7493b17
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 9ccd81b..b2b37e8 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -80,23 +80,6 @@
 namespace icing {
 namespace lib {
 
-namespace {
-
-bool IsEmptyBuffer(const char* buffer, int size) {
-  return std::all_of(buffer, buffer + size,
-                     [](const char byte) { return byte == 0; });
-}
-
-// Helper function to get stored proto size from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
-
-// Helper function to get stored proto magic from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
-
-}  // namespace
-
 template <typename ProtoT>
 class FileBackedProtoLog {
  public:
@@ -402,6 +385,28 @@
       const Filesystem* filesystem, const std::string& file_path,
       Crc32 initial_crc, int64_t start, int64_t end);
 
+  static bool IsEmptyBuffer(const char* buffer, int size) {
+    return std::all_of(buffer, buffer + size,
+                       [](const char byte) { return byte == 0; });
+  }
+
+  // Helper function to get stored proto size from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+  // Helper function to get stored proto magic from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+  // Reads out the metadata of a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   Proto's metadata on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+  static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
+      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+
   // Magic number added in front of every proto. Used when reading out protos
   // as a first check for corruption in each entry in the file. Even if there is
   // a corruption, the best we can do is roll back to our last recovery point
@@ -429,15 +434,6 @@
   ScopedFd fd_;
   const Filesystem* const filesystem_;
   const std::string file_path_;
-
-  // Reads out the metadata of a proto located at file_offset from the file.
-  //
-  // Returns:
-  //   Proto's metadata on success
-  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
-  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
-  static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
-      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
   std::unique_ptr<Header> header_;
 };
 
@@ -573,6 +569,7 @@
   ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
                          ComputeChecksum(filesystem, file_path, Crc32(),
                                          sizeof(Header), file_size));
+
   // Double check that the log checksum is the same as the one that was
   // persisted last time. If not, we start recovery logic.
   if (header->log_checksum != calculated_log_checksum.Get()) {
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
index 766cc64..c09fd5a 100644
--- a/icing/file/file-backed-proto-log_benchmark.cc
+++ b/icing/file/file-backed-proto-log_benchmark.cc
@@ -164,6 +164,46 @@
                               // 16MiB, and we need some extra space for the
                               // rest of the document properties
 
+static void BM_Erase(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s", GetTestTempDir().c_str(), "/proto.log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                               proto_log->WriteProto(document));
+    state.ResumeTiming();
+
+    testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
+
 static void BM_ComputeChecksum(benchmark::State& state) {
   const Filesystem filesystem;
   const std::string file_path = GetTestTempDir() + "/proto.log";
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index 000ab3d..825b763 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -83,28 +83,6 @@
 namespace icing {
 namespace lib {
 
-namespace {
-
-// Number of bytes we reserve for the heading at the beginning of the proto log.
-// We reserve this so the header can grow without running into the contents of
-// the proto log, triggering an unnecessary migration of the data.
-constexpr int kHeaderReservedBytes = 256;
-
-bool IsEmptyBuffer(const char* buffer, int size) {
-  return std::all_of(buffer, buffer + size,
-                     [](const char byte) { return byte == 0; });
-}
-
-// Helper function to get stored proto size from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
-
-// Helper function to get stored proto magic from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
-
-}  // namespace
-
 template <typename ProtoT>
 class PortableFileBackedProtoLog {
  public:
@@ -135,6 +113,11 @@
         : compress(compress_in), max_proto_size(max_proto_size_in) {}
   };
 
+  // Number of bytes we reserve for the heading at the beginning of the proto
+  // log. We reserve this so the header can grow without running into the
+  // contents of the proto log, triggering an unnecessary migration of the data.
+  static constexpr int kHeaderReservedBytes = 256;
+
   // Header stored at the beginning of the file before the rest of the log
   // contents. Stores metadata on the log.
   class Header {
@@ -541,6 +524,19 @@
   static libtextclassifier3::Status WriteProtoMetadata(
       const Filesystem* filesystem, int fd, int32_t host_order_metadata);
 
+  static bool IsEmptyBuffer(const char* buffer, int size) {
+    return std::all_of(buffer, buffer + size,
+                       [](const char byte) { return byte == 0; });
+  }
+
+  // Helper function to get stored proto size from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+  // Helper function to get stored proto magic from the metadata.
+  // Metadata format: 8 bits magic + 24 bits size
+  static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
   // Magic number added in front of every proto. Used when reading out protos
   // as a first check for corruption in each entry in the file. Even if there is
   // a corruption, the best we can do is roll back to our last recovery point
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
index b1dfe12..04ccab0 100644
--- a/icing/file/portable-file-backed-proto-log_benchmark.cc
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -163,6 +163,46 @@
     ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
                               // 16MiB, and we need some extra space for the
                               // rest of the document properties
+                              //
+static void BM_Erase(benchmark::State& state) {
+  const Filesystem filesystem;
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s", GetTestTempDir().c_str(), "/proto.log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+                       &filesystem, file_path,
+                       PortableFileBackedProtoLog<DocumentProto>::Options(
+                           compress, max_proto_size))
+                       .ValueOrDie()
+                       .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                               proto_log->WriteProto(document));
+    state.ResumeTiming();
+
+    testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+  }
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
 
 static void BM_ComputeChecksum(benchmark::State& state) {
   const Filesystem filesystem;
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index 69b8a1a..b5fee4b 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -113,7 +113,8 @@
 
   // With no protos written yet, the log should be minimum the size of the
   // reserved header space.
-  ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()), kHeaderReservedBytes);
+  ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+            PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes);
 }
 
 TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
@@ -417,8 +418,9 @@
 
     // We still have the corrupted content in our file, we didn't throw
     // everything out.
-    EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-                Gt(kHeaderReservedBytes));
+    EXPECT_THAT(
+        filesystem_.GetFileSize(file_path_.c_str()),
+        Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
   }
 }
 
@@ -456,9 +458,10 @@
     DocumentProto document =
         DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
     std::string serialized_document = document.SerializeAsString();
-    ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), kHeaderReservedBytes,
-                                   serialized_document.data(),
-                                   serialized_document.size()));
+    ASSERT_TRUE(filesystem_.PWrite(
+        file_path_.c_str(),
+        PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes,
+        serialized_document.data(), serialized_document.size()));
 
     Header header = ReadHeader(filesystem_, file_path_);
 
@@ -484,8 +487,9 @@
     EXPECT_TRUE(create_result.recalculated_checksum);
 
     // We lost everything, file size is back down to the header.
-    EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-                Eq(kHeaderReservedBytes));
+    EXPECT_THAT(
+        filesystem_.GetFileSize(file_path_.c_str()),
+        Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
 
     // At least the log is no longer dirty.
     Header header = ReadHeader(filesystem_, file_path_);
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index b437724..316b74f 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -577,6 +577,120 @@
 // cap the limit to 1 << 18.
 BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
 
+// This is different from BM_RepeatedPut since we're just trying to benchmark
+// one Put call, not thousands of them at once.
+void BM_Put(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(icing->Put(document));
+  }
+}
+BENCHMARK(BM_Put);
+
+void BM_Get(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  for (auto s : state) {
+    benchmark::DoNotOptimize(
+        icing->Get("namespace", "uri", GetResultSpecProto::default_instance()));
+  }
+}
+BENCHMARK(BM_Get);
+
+void BM_Delete(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document
+  DocumentProto document = DocumentBuilder()
+                               .SetSchema("Message")
+                               .SetNamespace("namespace")
+                               .SetUri("uri")
+                               .Build();
+
+  ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+  for (auto s : state) {
+    state.PauseTiming();
+    icing->Put(document);
+    state.ResumeTiming();
+
+    benchmark::DoNotOptimize(icing->Delete("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_Delete);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index c1de0f0..752e0e2 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -42,6 +42,7 @@
 #include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
+#include "icing/store/document-log-creator.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/jni-test-helpers.h"
@@ -100,9 +101,26 @@
 constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
     StringIndexingConfig_TokenizerType_Code_NONE;
 
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
 constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
 constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
 
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path) {
+  PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+  filesystem.PRead(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path,
+    PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+  filesystem.Write(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
 // For mocking purpose, we allow tests to provide a custom Filesystem.
 class TestIcingSearchEngine : public IcingSearchEngine {
  public:
@@ -990,7 +1008,8 @@
               HasSubstr("'Photo' not found"));
 }
 
-TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
+TEST_F(IcingSearchEngineTest,
+       SetSchemaTriggersIndexRestorationAndReturnsOk) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
 
@@ -2074,7 +2093,8 @@
     // Deletes document1
     ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
     const std::string document_log_path =
-        icing_options.base_dir() + "/document_dir/document_log";
+        icing_options.base_dir() + "/document_dir/" +
+        DocumentLogCreator::GetDocumentLogFilename();
     int64_t document_log_size_before =
         filesystem()->GetFileSize(document_log_path.c_str());
     ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
@@ -3438,8 +3458,8 @@
         EqualsProto(expected_get_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
-  const std::string document_log_file =
-      absl_ports::StrCat(GetDocumentDir(), "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
   const std::string corrupt_data = "1234";
   EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
                                   corrupt_data.data(), corrupt_data.size()));
@@ -5616,15 +5636,16 @@
 
   // 2. Delete the last document from the document log
   {
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+    const std::string document_log_file = absl_ports::StrCat(
+        GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
     filesystem()->DeleteFile(document_log_file.c_str());
-    ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-                               FileBackedProtoLog<DocumentWrapper>::Create(
-                                   filesystem(), document_log_file.c_str(),
-                                   FileBackedProtoLog<DocumentWrapper>::Options(
-                                       /*compress_in=*/true)));
-    std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto create_result,
+        PortableFileBackedProtoLog<DocumentWrapper>::Create(
+            filesystem(), document_log_file.c_str(),
+            PortableFileBackedProtoLog<DocumentWrapper>::Options(
+                /*compress_in=*/true)));
+    std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
         std::move(create_result.proto_log);
 
     document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5689,15 +5710,16 @@
 
   // 2. Delete the last two documents from the document log.
   {
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+    const std::string document_log_file = absl_ports::StrCat(
+        GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
     filesystem()->DeleteFile(document_log_file.c_str());
-    ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
-                               FileBackedProtoLog<DocumentWrapper>::Create(
-                                   filesystem(), document_log_file.c_str(),
-                                   FileBackedProtoLog<DocumentWrapper>::Options(
-                                       /*compress_in=*/true)));
-    std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto create_result,
+        PortableFileBackedProtoLog<DocumentWrapper>::Create(
+            filesystem(), document_log_file.c_str(),
+            PortableFileBackedProtoLog<DocumentWrapper>::Options(
+                /*compress_in=*/true)));
+    std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
         std::move(create_result.proto_log);
 
     document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5994,8 +6016,8 @@
     // Append a non-checksummed document. This will mess up the checksum of the
     // proto log, forcing it to rewind and later return a DATA_LOSS error.
     const std::string serialized_document = document.SerializeAsString();
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+    const std::string document_log_file = absl_ports::StrCat(
+        GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
 
     int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
     filesystem()->PWrite(document_log_file.c_str(), file_size,
@@ -6045,31 +6067,47 @@
                                 .SetSchema("Message")
                                 .AddStringProperty("body", "message body")
                                 .Build();
+
+  const std::string document_log_file = absl_ports::StrCat(
+      GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+  int64_t corruptible_offset;
+
   {
     // Initialize and put a document.
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // There's some space at the beginning of the file (e.g. header, kmagic,
+    // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+    // corrupt that region, so we need to figure out the offset at which
+    // documents will be written to - which is the file size after
+    // initialization.
+    corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
+
     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
     EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
   }
 
   {
-    // Modify the document log checksum to trigger a complete document log
-    // rewind.
-    const std::string document_log_file =
-        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem()->PWrite(
+        document_log_file.c_str(), corruptible_offset,
+        serialized_document.data(), serialized_document.size()));
 
-    FileBackedProtoLog<DocumentWrapper>::Header document_log_header;
-    filesystem()->PRead(document_log_file.c_str(), &document_log_header,
-                        sizeof(FileBackedProtoLog<DocumentWrapper>::Header),
-                        /*offset=*/0);
-    // Set a garbage checksum.
-    document_log_header.log_checksum = 10;
-    document_log_header.header_checksum =
-        document_log_header.CalculateHeaderChecksum();
-    filesystem()->PWrite(document_log_file.c_str(), /*offset=*/0,
-                         &document_log_header,
-                         sizeof(FileBackedProtoLog<DocumentWrapper>::Header));
+    PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+        ReadDocumentLogHeader(*filesystem(), document_log_file);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteDocumentLogHeader(*filesystem(), document_log_file, header);
   }
 
   {
@@ -7182,6 +7220,177 @@
   EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
 }
 
+// We skip this test case when we're running in a jni_test since the data files
+// will be stored in the android-instrumented storage location, rather than the
+// normal cc_library runfiles directory. To get that storage location, it's
+// recommended to use the TestStorage APIs which handles different API
+// levels/absolute vs relative/etc differences. Since that's only accessible on
+// the java-side, and I haven't figured out a way to pass that directory path to
+// this native side yet, we're just going to disable this. The functionality is
+// already well-tested across 4 different emulated OS's so we're not losing much
+// test coverage here.
+#ifndef ICING_JNI_TEST
+// Disable backwards compat test. This test is enabled in google3, but disabled
+// in jetpack/framework because we didn't want to keep the binary testdata files
+// in our repo.
+#define DISABLE_BACKWARDS_COMPAT_TEST
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
+TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
+  // Copy the testdata files into our IcingSearchEngine directory
+  std::string dir_without_portable_log;
+  if (IsAndroidX86()) {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_x86");
+  } else if (IsAndroidArm()) {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_arm");
+  } else if (IsIosPlatform()) {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_ios");
+  } else {
+    dir_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_linux");
+  }
+
+  // Create dst directory that we'll initialize the IcingSearchEngine over.
+  std::string base_dir = GetTestBaseDir() + "_migrate";
+  ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+  ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+  ASSERT_TRUE(filesystem()->CopyDirectory(dir_without_portable_log.c_str(),
+                                          base_dir.c_str(),
+                                          /*recursive=*/true));
+
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(base_dir);
+
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  InitializeResultProto init_result = icing.Initialize();
+  EXPECT_THAT(init_result.status(), ProtoIsOk());
+  EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+              Eq(InitializeStatsProto::NO_DATA_LOSS));
+  EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+              Eq(InitializeStatsProto::NONE));
+  EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+              Eq(InitializeStatsProto::NONE));
+
+  // Set up schema, this is the one used to validate documents in the testdata
+  // files. Do not change unless you're also updating the testdata files.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  // Make sure our schema is still the same as we expect. If not, there's
+  // definitely no way we're getting the documents back that we expect.
+  GetSchemaResultProto expected_get_schema_result_proto;
+  expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_schema_result_proto.mutable_schema() = schema;
+  ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .AddStringProperty("body", "bar")
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(20)
+                                .SetScore(321)
+                                .AddStringProperty("body", "baz bat")
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(30)
+                                .SetScore(123)
+                                .AddStringProperty("subject", "phoo")
+                                .Build();
+
+  // Document 1 and 3 were put normally, and document 2 was deleted in our
+  // testdata files.
+  EXPECT_THAT(icing
+                  .Get(document1.namespace_(), document1.uri(),
+                       GetResultSpecProto::default_instance())
+                  .document(),
+              EqualsProto(document1));
+  EXPECT_THAT(icing
+                  .Get(document2.namespace_(), document2.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
+              ProtoStatusIs(StatusProto::NOT_FOUND));
+  EXPECT_THAT(icing
+                  .Get(document3.namespace_(), document3.uri(),
+                       GetResultSpecProto::default_instance())
+                  .document(),
+              EqualsProto(document3));
+
+  // Searching for "foo" should get us document1.
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("foo");
+
+  SearchResultProto expected_document1;
+  expected_document1.mutable_status()->set_code(StatusProto::OK);
+  *expected_document1.mutable_results()->Add()->mutable_document() = document1;
+
+  SearchResultProto actual_results =
+      icing.Search(search_spec, GetDefaultScoringSpec(),
+                   ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_document1));
+
+  // Searching for "baz" would've gotten us document2, except it got deleted.
+  // Make sure that it's cleared from our index too.
+  search_spec.set_query("baz");
+
+  SearchResultProto expected_no_documents;
+  expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+
+  // Searching for "phoo" should get us document3.
+  search_spec.set_query("phoo");
+
+  SearchResultProto expected_document3;
+  expected_document3.mutable_status()->set_code(StatusProto::OK);
+  *expected_document3.mutable_results()->Add()->mutable_document() = document3;
+
+  actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+                                ResultSpecProto::default_instance());
+  EXPECT_THAT(actual_results,
+              EqualsSearchResultIgnoreStatsAndScores(expected_document3));
+}
+#endif  // DISABLE_BACKWARDS_COMPAT_TEST
+#endif  // !ICING_JNI_TEST
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/portable/platform.h b/icing/portable/platform.h
index 8712835..150eede 100644
--- a/icing/portable/platform.h
+++ b/icing/portable/platform.h
@@ -34,11 +34,19 @@
   return false;
 }
 
-// Whether the running test is an Android test.
-inline bool IsAndroidPlatform() {
-#if defined(__ANDROID__)
+// Whether we're running on android_x86
+inline bool IsAndroidX86() {
+#if defined(__ANDROID__) && defined(__i386__)
   return true;
-#endif  // defined(__ANDROID__)
+#endif  // defined(__ANDROID__) && defined(__i386__)
+  return false;
+}
+
+// Whether we're running on android_armeabi-v7a
+inline bool IsAndroidArm() {
+#if defined(__ANDROID__) && defined(__arm__)
+  return true;
+#endif  // defined(__ANDROID__) && defined(__arm__)
   return false;
 }
 
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
new file mode 100644
index 0000000..a035f93
--- /dev/null
+++ b/icing/store/document-log-creator.cc
@@ -0,0 +1,206 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/document-log-creator.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/logging.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/annotate.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Used in DocumentId mapper to mark a document as deleted
+constexpr char kDocumentLogFilename[] = "document_log";
+
+std::string DocumentLogFilenameV0() {
+  // Originally only had this one version, no suffix.
+  return kDocumentLogFilename;
+}
+
+std::string DocumentLogFilenameV1() {
+  return absl_ports::StrCat(kDocumentLogFilename, "_v1");
+}
+
+std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
+}
+
+std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
+}
+
+}  // namespace
+
+std::string DocumentLogCreator::GetDocumentLogFilename() {
+  // This should always return the latest version of the document log in use.
+  // The current latest version is V1.
+  return DocumentLogFilenameV1();
+}
+
+libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
+DocumentLogCreator::Create(const Filesystem* filesystem,
+                           const std::string& base_dir) {
+  bool v0_exists =
+      filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
+  bool regen_derived_files = false;
+
+#ifdef ENABLE_V1_MIGRATION
+  bool v1_exists =
+      filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
+
+  if (v0_exists && !v1_exists) {
+    ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
+
+    // Need to regenerate derived files since documents may be written to a
+    // different file offset in the log.
+    regen_derived_files = true;
+  } else if (!v1_exists) {
+    // First time initializing a v1 log. There are no existing derived files at
+    // this point, so we should generate some. "regenerate" here also means
+    // "generate for the first time", i.e. we shouldn't expect there to be any
+    // existing derived files.
+    regen_derived_files = true;
+  }
+#else  // !ENABLE_V1_MIGRATION
+  if (v0_exists) {
+    // If migration from v0 to v1 is not enabled, then simply delete the v0 file
+    // and treat this as if it's our first time initializing a v1 log.
+    regen_derived_files = true;
+    filesystem->DeleteFile(MakeDocumentLogFilenameV0(base_dir).c_str());
+  }
+#endif  // ENABLED_V1_MIGRATION
+
+  ICING_ASSIGN_OR_RETURN(
+      PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
+          log_create_result,
+      PortableFileBackedProtoLog<DocumentWrapper>::Create(
+          filesystem, MakeDocumentLogFilenameV1(base_dir),
+          PortableFileBackedProtoLog<DocumentWrapper>::Options(
+              /*compress_in=*/true)));
+
+  CreateResult create_result = {std::move(log_create_result),
+                                regen_derived_files};
+  return create_result;
+}
+
+libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
+    const Filesystem* filesystem, const std::string& base_dir) {
+  ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
+
+  // Our v0 proto log was non-portable, create it so we can read protos out from
+  // it.
+  auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
+      filesystem, MakeDocumentLogFilenameV0(base_dir),
+      FileBackedProtoLog<DocumentWrapper>::Options(
+          /*compress_in=*/true));
+  if (!v0_create_result_or.ok()) {
+    return absl_ports::Annotate(
+        v0_create_result_or.status(),
+        "Failed to initialize v0 document log while migrating.");
+    return v0_create_result_or.status();
+  }
+  FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
+      std::move(v0_create_result_or).ValueOrDie();
+  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
+      std::move(v0_create_result.proto_log);
+
+  // Create a v1 portable proto log that we will write our protos to.
+  auto v1_create_result_or =
+      PortableFileBackedProtoLog<DocumentWrapper>::Create(
+          filesystem, MakeDocumentLogFilenameV1(base_dir),
+          PortableFileBackedProtoLog<DocumentWrapper>::Options(
+              /*compress_in=*/true));
+  if (!v1_create_result_or.ok()) {
+    return absl_ports::Annotate(
+        v1_create_result_or.status(),
+        "Failed to initialize v1 document log while migrating.");
+  }
+  PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
+      std::move(v1_create_result_or).ValueOrDie();
+  std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
+      std::move(v1_create_result.proto_log);
+
+  // Dummy empty document to be used when copying over deleted documents.
+  DocumentProto empty_document;
+
+  // Start reading out from the old log and putting them in the new log.
+  auto iterator = v0_proto_log->GetIterator();
+  auto iterator_status = iterator.Advance();
+  while (iterator_status.ok()) {
+    libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
+        v0_proto_log->ReadProto(iterator.GetOffset());
+
+    bool deleted_document = false;
+    DocumentWrapper document_wrapper;
+    if (absl_ports::IsNotFound(document_wrapper_or.status())) {
+      // Proto was erased, we can skip copying this into our new log.
+      *document_wrapper.mutable_document() = empty_document;
+      deleted_document = true;
+    } else if (!document_wrapper_or.ok()) {
+      // Some real error, pass up
+      return document_wrapper_or.status();
+    } else {
+      document_wrapper = std::move(document_wrapper_or).ValueOrDie();
+    }
+
+    auto offset_or = v1_proto_log->WriteProto(document_wrapper);
+    if (!offset_or.ok()) {
+      return absl_ports::Annotate(
+          offset_or.status(),
+          "Failed to write proto to v1 document log while migrating.");
+    }
+
+    // If the original document was deleted, erase the proto we just wrote.
+    // We do this to maintain the document_ids, i.e. we still want document_id 2
+    // to point to a deleted document even though we may not have the document
+    // contents anymore. DocumentStore guarantees that the document_ids don't
+    // change unless an Optimize is triggered.
+    if (deleted_document) {
+      int64_t offset = offset_or.ValueOrDie();
+      auto erased_status = v1_proto_log->EraseProto(offset);
+      if (!erased_status.ok()) {
+        return absl_ports::Annotate(
+            erased_status,
+            "Failed to erase proto in v1 document log while migrating.");
+      }
+    }
+
+    iterator_status = iterator.Advance();
+  }
+
+  // Close out our file log pointers.
+  v0_proto_log.reset();
+  v1_proto_log.reset();
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
new file mode 100644
index 0000000..51cf497
--- /dev/null
+++ b/icing/store/document-log-creator.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+#define ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Handles creation of the document log and any underlying migrations that may
+// be necessary.
+class DocumentLogCreator {
+ public:
+  struct CreateResult {
+    // The create result passed up from the PortableFileBackedProtoLog::Create.
+    // Contains the document log.
+    PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result;
+
+    // Whether the caller needs to also regenerate/generate any derived files
+    // based off of the initialized document log.
+    bool regen_derived_files;
+  };
+
+  // Creates the document log in the base_dir. Will create one if it doesn't
+  // already exist.
+  //
+  // This also handles any potential migrations from old document log versions.
+  // At the end of this call, the most up-to-date log will be returned and will
+  // be usable.
+  //
+  // Returns:
+  //   CreateResult on success.
+  //   INTERNAL on any I/O error.
+  static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create(
+      const Filesystem* filesystem, const std::string& base_dir);
+
+  // Returns the filename of the document log, without any directory prefixes.
+  // Used mainly for testing purposes.
+  static std::string GetDocumentLogFilename();
+
+ private:
+  // Handles migrating a v0 document log (not portable) to a v1 document log
+  // (portable). This will initialize the log in the beginning, and close it
+  // when migration is done. Callers will need to reinitialize the log on their
+  // own.
+  //
+  // Returns:
+  //   OK on success.
+  //   INVALID_ARGUMENT if some invalid option was passed to the document log.
+  //   INTERNAL on I/O error.
+  static libtextclassifier3::Status MigrateFromV0ToV1(
+      const Filesystem* filesystem, const std::string& base_dir);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DOCUMENT_LOG_CREATOR_H_
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 4e63b90..907bace 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -33,6 +33,7 @@
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
+#include "icing/file/portable-file-backed-proto-log.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
@@ -44,6 +45,7 @@
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
 #include "icing/store/key-mapper.h"
 #include "icing/store/namespace-id.h"
 #include "icing/store/usage-store.h"
@@ -62,7 +64,6 @@
 
 // Used in DocumentId mapper to mark a document as deleted
 constexpr int64_t kDocDeletedFlag = -1;
-constexpr char kDocumentLogFilename[] = "document_log";
 constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
 constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
 constexpr char kScoreCacheFilename[] = "score_cache";
@@ -93,10 +94,6 @@
   return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
 }
 
-std::string MakeDocumentLogFilename(const std::string& base_dir) {
-  return absl_ports::StrCat(base_dir, "/", kDocumentLogFilename);
-}
-
 std::string MakeScoreCacheFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
 }
@@ -224,30 +221,36 @@
 libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
     bool force_recovery_and_revalidate_documents,
     InitializeStatsProto* initialize_stats) {
-  auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
-      filesystem_, MakeDocumentLogFilename(base_dir_),
-      FileBackedProtoLog<DocumentWrapper>::Options(
-          /*compress_in=*/true));
+  auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_);
+
   // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   if (!create_result_or.ok()) {
     ICING_LOG(ERROR) << create_result_or.status().error_message()
-                     << "\nFailed to initialize DocumentLog";
+                     << "\nFailed to initialize DocumentLog.";
     return create_result_or.status();
   }
-  FileBackedProtoLog<DocumentWrapper>::CreateResult create_result =
+  DocumentLogCreator::CreateResult create_result =
       std::move(create_result_or).ValueOrDie();
-  document_log_ = std::move(create_result.proto_log);
 
-  if (force_recovery_and_revalidate_documents ||
-      create_result.has_data_loss()) {
-    if (create_result.has_data_loss() && initialize_stats != nullptr) {
+  document_log_ = std::move(create_result.log_create_result.proto_log);
+
+  if (create_result.regen_derived_files ||
+      force_recovery_and_revalidate_documents ||
+      create_result.log_create_result.has_data_loss()) {
+    // We can't rely on any existing derived files. Recreate them from scratch.
+    // Currently happens if:
+    //   1) This is a new log and we don't have derived files yet
+    //   2) Client wanted us to force a regeneration.
+    //   3) Log has some data loss, can't rely on existing derived data.
+    if (create_result.log_create_result.has_data_loss() &&
+        initialize_stats != nullptr) {
       ICING_LOG(WARNING)
           << "Data loss in document log, regenerating derived files.";
       initialize_stats->set_document_store_recovery_cause(
           InitializeStatsProto::DATA_LOSS);
 
-      if (create_result.data_loss == DataLoss::PARTIAL) {
+      if (create_result.log_create_result.data_loss == DataLoss::PARTIAL) {
         // Ground truth is partially lost.
         initialize_stats->set_document_store_data_status(
             InitializeStatsProto::PARTIAL_LOSS);
@@ -257,10 +260,16 @@
             InitializeStatsProto::COMPLETE_LOSS);
       }
     }
+
     std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
     libtextclassifier3::Status status =
         RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
-    if (initialize_stats != nullptr) {
+    if (initialize_stats != nullptr &&
+        (force_recovery_and_revalidate_documents ||
+         create_result.log_create_result.has_data_loss())) {
+      // Only consider it a recovery if the client forced a recovery or there
+      // was data loss. Otherwise, this could just be the first time we're
+      // initializing and generating derived files.
       initialize_stats->set_document_store_recovery_latency_ms(
           document_recovery_timer->GetElapsedMilliseconds());
     }
@@ -270,7 +279,7 @@
       return status;
     }
   } else {
-    if (!InitializeDerivedFiles().ok()) {
+    if (!InitializeExistingDerivedFiles().ok()) {
       ICING_VLOG(1)
           << "Couldn't find derived files or failed to initialize them, "
              "regenerating derived files for DocumentStore.";
@@ -296,10 +305,10 @@
     initialize_stats->set_num_documents(document_id_mapper_->num_elements());
   }
 
-  return create_result.data_loss;
+  return create_result.log_create_result.data_loss;
 }
 
-libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
+libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
   if (!HeaderExists()) {
     // Without a header, we don't know if things are consistent between each
     // other so the caller should just regenerate everything from ground
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index b0cd1ce..79d99d4 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -26,6 +26,7 @@
 #include "icing/file/file-backed-proto-log.h"
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/logging.pb.h"
@@ -438,7 +439,7 @@
 
   // A log used to store all documents, it serves as a ground truth of doc
   // store. key_mapper_ and document_id_mapper_ can be regenerated from it.
-  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log_;
+  std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
 
   // Key (namespace + uri) to DocumentId mapping
   std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_;
@@ -495,11 +496,35 @@
       bool force_recovery_and_revalidate_documents,
       InitializeStatsProto* initialize_stats);
 
+  // Initializes a new DocumentStore and sets up any underlying files.
+  //
+  // Returns:
+  //   Data loss status on success, effectively always DataLoss::NONE
+  //   INTERNAL on I/O error
+  libtextclassifier3::StatusOr<DataLoss> InitializeNewStore(
+      InitializeStatsProto* initialize_stats);
+
+  // Initializes a DocumentStore over an existing directory of files.
+  //
+  // stats will be set if non-null
+  //
+  // Returns:
+  //   Data loss status on success
+  //   INTERNAL on I/O error
+  libtextclassifier3::StatusOr<DataLoss> InitializeExistingStore(
+      bool force_recovery_and_revalidate_documents,
+      InitializeStatsProto* initialize_stats);
+
+  libtextclassifier3::StatusOr<DataLoss> MigrateFromV0ToV1(
+      InitializeStatsProto* initialize_stats);
+
   // Creates sub-components and verifies the integrity of each sub-component.
+  // This assumes that the the underlying files already exist, and will return
+  // an error if it doesn't find what it's expecting.
   //
   // Returns an error if subcomponents failed to initialize successfully.
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status InitializeDerivedFiles();
+  libtextclassifier3::Status InitializeExistingDerivedFiles();
 
   // Re-generates all files derived from the ground truth: the document log.
   //
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index f68e115..ce608fc 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -168,6 +168,93 @@
 }
 BENCHMARK(BM_DoesDocumentExistBenchmark);
 
+void BM_Put(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+
+  for (auto s : state) {
+    // It's ok that this is the same document over and over. We'll create a new
+    // document_id for it and still insert the proto into the underlying log.
+    benchmark::DoNotOptimize(document_store->Put(document));
+  }
+}
+BENCHMARK(BM_Put);
+
+void BM_GetSameDocument(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  ICING_ASSERT_OK(document_store->Put(CreateDocument("namespace", "uri")));
+
+  for (auto s : state) {
+    benchmark::DoNotOptimize(document_store->Get("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_GetSameDocument);
+
+void BM_Delete(benchmark::State& state) {
+  Filesystem filesystem;
+  Clock clock;
+
+  std::string directory = GetTestTempDir() + "/icing";
+  DestructibleDirectory ddir(filesystem, directory);
+
+  std::string document_store_dir = directory + "/store";
+  std::unique_ptr<SchemaStore> schema_store =
+      CreateSchemaStore(filesystem, directory, &clock);
+
+  filesystem.CreateDirectoryRecursively(document_store_dir.data());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem, document_store_dir, &clock,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document = CreateDocument("namespace", "uri");
+
+  for (auto s : state) {
+    state.PauseTiming();
+    ICING_ASSERT_OK(document_store->Put(document));
+    state.ResumeTiming();
+
+    benchmark::DoNotOptimize(document_store->Delete("namespace", "uri"));
+  }
+}
+BENCHMARK(BM_Delete);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index ad3b7c4..3ed4c4e 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -15,6 +15,7 @@
 #include "icing/store/document-store.h"
 
 #include <cstdint>
+#include <filesystem>
 #include <limits>
 #include <memory>
 #include <string>
@@ -40,6 +41,7 @@
 #include "icing/store/corpus-id.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
@@ -105,6 +107,22 @@
   return usage_report;
 }
 
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path) {
+  PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+  filesystem.PRead(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+                   /*offset=*/0);
+  return header;
+}
+
+void WriteDocumentLogHeader(
+    Filesystem filesystem, const std::string& file_path,
+    PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+  filesystem.Write(file_path.c_str(), &header,
+                   sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
 class DocumentStoreTest : public ::testing::Test {
  protected:
   DocumentStoreTest()
@@ -452,14 +470,18 @@
   // Validates that deleting something non-existing won't append anything to
   // ground truth
   int64_t document_log_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
   EXPECT_THAT(
       document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
       StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
   int64_t document_log_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
   EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
@@ -538,13 +560,17 @@
   // Validates that deleting something non-existing won't append anything to
   // ground truth
   int64_t document_log_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
   EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status,
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
   int64_t document_log_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
   EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
 
@@ -607,7 +633,9 @@
     EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
 
     document_log_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
   CorruptDocStoreHeaderChecksumFile();
@@ -621,7 +649,9 @@
 
   // Make sure we didn't add anything to the ground truth after we recovered.
   int64_t document_log_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
   EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
@@ -730,13 +760,17 @@
   // Validates that deleting something non-existing won't append anything to
   // ground truth
   int64_t document_log_size_before = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
   EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status,
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
   int64_t document_log_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
 
   EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
 }
@@ -809,7 +843,9 @@
     EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
 
     document_log_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
   CorruptDocStoreHeaderChecksumFile();
@@ -823,7 +859,9 @@
 
   // Make sure we didn't add anything to the ground truth after we recovered.
   int64_t document_log_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
   EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(document_store->Get(email_document_id),
@@ -901,7 +939,9 @@
                 IsOkAndHolds(EqualsProto(message_document)));
 
     document_log_size_before = filesystem_.GetFileSize(
-        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+        absl_ports::StrCat(document_store_dir_, "/",
+                           DocumentLogCreator::GetDocumentLogFilename())
+            .c_str());
   }  // Destructors should update checksum and persist all data to file.
 
   CorruptDocStoreHeaderChecksumFile();
@@ -923,7 +963,9 @@
 
   // Make sure we didn't add anything to the ground truth after we recovered.
   int64_t document_log_size_after = filesystem_.GetFileSize(
-      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str());
   EXPECT_EQ(document_log_size_before, document_log_size_after);
 
   EXPECT_THAT(document_store->Get(email_document_id),
@@ -968,7 +1010,9 @@
   ICING_ASSERT_OK(doc_store->Put(document2));
   ICING_ASSERT_OK(doc_store->Put(document3));
 
-  std::string original_document_log = document_store_dir_ + "/document_log";
+  std::string original_document_log = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
   int64_t original_size =
       filesystem_.GetFileSize(original_document_log.c_str());
 
@@ -979,7 +1023,8 @@
                HasSubstr("directory is the same")));
 
   std::string optimized_dir = document_store_dir_ + "_optimize";
-  std::string optimized_document_log = optimized_dir + "/document_log";
+  std::string optimized_document_log =
+      optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
 
   // Validates that the optimized document log has the same size if nothing is
   // deleted
@@ -1067,8 +1112,8 @@
   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
   const std::string serialized_document = document.SerializeAsString();
 
-  const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
   int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
   filesystem_.PWrite(document_log_file.c_str(), file_size,
                      serialized_document.data(), serialized_document.size());
@@ -2919,8 +2964,8 @@
   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
   const std::string serialized_document = document.SerializeAsString();
 
-  const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
   int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
   filesystem_.PWrite(document_log_file.c_str(), file_size,
                      serialized_document.data(), serialized_document.size());
@@ -3043,7 +3088,9 @@
   const std::string serialized_document = document.SerializeAsString();
 
   const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+      absl_ports::StrCat(document_store_dir_, "/",
+                         DocumentLogCreator::GetDocumentLogFilename())
+          .c_str();
   int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
   filesystem_.PWrite(document_log_file.c_str(), file_size,
                      serialized_document.data(), serialized_document.size());
@@ -3060,8 +3107,8 @@
 
 TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
   int64_t corruptible_offset;
-  const std::string document_log_file =
-      absl_ports::StrCat(document_store_dir_, "/document_log");
+  const std::string document_log_file = absl_ports::StrCat(
+      document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
   {
     // Can put and delete fine.
     ICING_ASSERT_OK_AND_ASSIGN(
@@ -3088,8 +3135,30 @@
   // "Corrupt" the persisted content written in the log. We can't recover if
   // the persisted data was corrupted.
   std::string corruption = "abc";
-  filesystem_.PWrite(document_log_file.c_str(), /*offset=*/corruptible_offset,
-                     corruption.data(), corruption.size());
+  filesystem_.PWrite(document_log_file.c_str(),
+                     /*offset=*/corruptible_offset, corruption.data(),
+                     corruption.size());
+
+  {
+    // "Corrupt" the content written in the log. Make the corrupt document
+    // smaller than our original one so we don't accidentally write past our
+    // file.
+    DocumentProto document =
+        DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+    std::string serialized_document = document.SerializeAsString();
+    ASSERT_TRUE(filesystem_.PWrite(
+        document_log_file.c_str(), corruptible_offset,
+        serialized_document.data(), serialized_document.size()));
+
+    PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+        ReadDocumentLogHeader(filesystem_, document_log_file);
+
+    // Set dirty bit to true to reflect that something changed in the log.
+    header.SetDirtyFlag(true);
+    header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+    WriteDocumentLogHeader(filesystem_, document_log_file, header);
+  }
 
   // Successfully recover from a data loss issue.
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -3106,8 +3175,8 @@
 // the document store header.
 //
 // This causes a problem now because this cl changes behavior to not consider an
-// InitializeDerivedFiles failure to be a recovery if there is nothing to
-// recover because the doocument store is empty.
+// InitializeExistingDerivedFiles failure to be a recovery if there is nothing
+// to recover because the doocument store is empty.
 #define DISABLE_BACKWARDS_COMPAT_TEST
 #ifndef DISABLE_BACKWARDS_COMPAT_TEST
 TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
@@ -3667,6 +3736,128 @@
   }
 }
 
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
+TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
+  // Set up schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+
+  std::string schema_store_dir = schema_store_dir_ + "_migrate";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+  // Create dst directory that we'll initialize the DocumentStore over.
+  std::string document_store_dir = document_store_dir_ + "_migrate";
+  ASSERT_THAT(
+      filesystem_.DeleteDirectoryRecursively(document_store_dir.c_str()), true);
+  ASSERT_THAT(
+      filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()), true);
+
+  // Copy the testdata files into our DocumentStore directory
+  std::string document_store_without_portable_log;
+  if (IsAndroidX86()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_x86/document_dir");
+  } else if (IsAndroidArm()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_android_arm/document_dir");
+  } else if (IsIosPlatform()) {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_ios/document_dir");
+  } else {
+    document_store_without_portable_log = GetTestFilePath(
+        "icing/testdata/not_portable_log/"
+        "icing_search_engine_linux/document_dir");
+  }
+
+  ASSERT_TRUE(filesystem_.CopyDirectory(
+      document_store_without_portable_log.c_str(), document_store_dir.c_str(),
+      /*recursive=*/true));
+
+  // Initialize the DocumentStore over our copied files.
+  InitializeStatsProto initialize_stats;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_,
+                            schema_store.get(),
+                            /*force_recovery_and_revalidate_documents=*/false,
+                            &initialize_stats));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  // These are the documents that are stored in the testdata files. Do not
+  // change unless you're also updating the testdata files.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .AddStringProperty("body", "bar")
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(20)
+                                .SetScore(321)
+                                .AddStringProperty("body", "baz bat")
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampMs(30)
+                                .SetScore(123)
+                                .AddStringProperty("subject", "phoo")
+                                .Build();
+
+  // Check that we didn't lose anything. A migration also doesn't technically
+  // count as a recovery.
+  EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+  EXPECT_FALSE(initialize_stats.has_document_store_recovery_cause());
+
+  // Document 1 and 3 were put normally, and document 2 was deleted in our
+  // testdata files.
+  //
+  // Check by namespace, uri
+  EXPECT_THAT(document_store->Get(document1.namespace_(), document1.uri()),
+              IsOkAndHolds(EqualsProto(document1)));
+  EXPECT_THAT(document_store->Get(document2.namespace_(), document2.uri()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(document3.namespace_(), document3.uri()),
+              IsOkAndHolds(EqualsProto(document3)));
+
+  // Check by document_id
+  EXPECT_THAT(document_store->Get(/*document_id=*/0),
+              IsOkAndHolds(EqualsProto(document1)));
+  EXPECT_THAT(document_store->Get(/*document_id=*/1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(/*document_id=*/2),
+              IsOkAndHolds(EqualsProto(document3)));
+}
+#endif  // DISABLE_BACKWARDS_COMPAT_TEST
+
 }  // namespace
 
 }  // namespace lib