pw_kvs: Reinit KVS metadata as part of the repair process

- Split out the KVS metadata initialization out of Init to
  a new method InitializeMetadata.
- Split out the fixing errors out of Repair to a new method FixErrors.
- Use InitializeMetadata followed up by FixErrors as the process to
  recover from corruption. This does the most robust integrity/error
  checking, and is the simplest path to an intact and coherent KVS.
- Add KVS error test for recovering after losing all copies of an entry.

Change-Id: Ifca540886738d378428162074ac186e2dc0d3d7c
diff --git a/pw_kvs/key_value_store.cc b/pw_kvs/key_value_store.cc
index 2bc6ddf..55d030d 100644
--- a/pw_kvs/key_value_store.cc
+++ b/pw_kvs/key_value_store.cc
@@ -57,8 +57,6 @@
   initialized_ = InitializationState::kNotInitialized;
   error_detected_ = false;
   last_transaction_id_ = 0;
-  sectors_.Reset();
-  entry_cache_.Reset();
 
   INF("Initializing key value store");
   if (partition_.sector_count() > sectors_.max_size()) {
@@ -80,6 +78,53 @@
     return Status::FAILED_PRECONDITION;
   }
 
+  InitializeMetadata();
+
+  if (!error_detected_) {
+    initialized_ = InitializationState::kReady;
+  } else {
+    if (options_.recovery != ErrorRecovery::kManual) {
+      Status recovery_status = FixErrors();
+
+      if (recovery_status.ok()) {
+        WRN("KVS init: Corruption detected and fully repaired");
+        initialized_ = InitializationState::kReady;
+      } else if (recovery_status == Status::RESOURCE_EXHAUSTED) {
+        WRN("KVS init: Unable to maintain required free sector");
+        initialized_ = InitializationState::kNeedsMaintenance;
+      } else {
+        WRN("KVS init: Corruption detected and unable repair");
+        initialized_ = InitializationState::kNeedsMaintenance;
+      }
+    } else {
+      WRN("KVS init: Corruption detected, no repair attempted due to options");
+      initialized_ = InitializationState::kNeedsMaintenance;
+    }
+  }
+
+  INF("KeyValueStore init complete: active keys %zu, deleted keys %zu, sectors "
+      "%zu, logical sector size %zu bytes",
+      size(),
+      (entry_cache_.total_entries() - size()),
+      sectors_.size(),
+      partition_.sector_size_bytes());
+
+  // Report any corruption was not repaired.
+  if (error_detected_) {
+    WRN("KVS init: Corruption found but not repaired, KVS unavailable until "
+        "successful maintenance.");
+    return Status::DATA_LOSS;
+  }
+
+  return Status::OK;
+}
+
+void KeyValueStore::InitializeMetadata() {
+  const size_t sector_size_bytes = partition_.sector_size_bytes();
+
+  sectors_.Reset();
+  entry_cache_.Reset();
+
   DBG("First pass: Read all entries from all sectors");
   Address sector_address = 0;
 
@@ -93,10 +138,10 @@
     size_t sector_corrupt_bytes = 0;
 
     for (int num_entries_in_sector = 0; true; num_entries_in_sector++) {
-      DBG("Load entry: sector=%" PRIx32 ", entry#=%d, address=%" PRIx32,
-          sector_address,
+      DBG("Load entry: sector=%u, entry#=%d, address=%u",
+          unsigned(sector_address),
           num_entries_in_sector,
-          entry_address);
+          unsigned(entry_address));
 
       if (!sectors_.AddressInSector(sector, entry_address)) {
         DBG("Fell off end of sector; moving to the next sector");
@@ -108,13 +153,10 @@
       if (status == Status::NOT_FOUND) {
         DBG("Hit un-written data in sector; moving to the next sector");
         break;
-      }
-      if (status == Status::DATA_LOSS) {
-        // The entry could not be read, indicating data corruption within the
-        // sector. Try to scan the remainder of the sector for other entries.
-        WRN("KVS init: data loss detected in sector %u at address %zu",
-            sectors_.Index(sector),
-            size_t(entry_address));
+      } else if (!status.ok()) {
+        // The entry could not be read, indicating likely data corruption within
+        // the sector. Try to scan the remainder of the sector for other
+        // entries.
 
         error_detected_ = true;
         corrupt_entries++;
@@ -122,7 +164,7 @@
         status = ScanForEntry(sector,
                               entry_address + Entry::kMinAlignmentBytes,
                               &next_entry_address);
-        if (status == Status::NOT_FOUND) {
+        if (!status.ok()) {
           // No further entries in this sector. Mark the remaining bytes in the
           // sector as corrupt (since we can't reliably know the size of the
           // corrupt entry).
@@ -131,15 +173,7 @@
           break;
         }
 
-        if (!status.ok()) {
-          ERR("Unexpected error in KVS initialization: %s", status.str());
-          return Status::UNKNOWN;
-        }
-
         sector_corrupt_bytes += next_entry_address - entry_address;
-      } else if (!status.ok()) {
-        ERR("Unexpected error in KVS initialization: %s", status.str());
-        return Status::UNKNOWN;
       }
 
       // Entry loaded successfully; so get ready to load the next one.
@@ -218,47 +252,11 @@
     error_detected_ = true;
   }
 
-  if (!error_detected_) {
-    initialized_ = InitializationState::kReady;
-  } else {
-    if (options_.recovery != ErrorRecovery::kManual) {
-      WRN("KVS init: Corruption detected, beginning repair. Found %zu corrupt "
-          "bytes and %d corrupt entries.",
-          total_corrupt_bytes,
-          corrupt_entries);
-      Status recovery_status = Repair();
-
-      if (recovery_status.ok()) {
-        WRN("KVS init: Corruption detected and fully repaired");
-        initialized_ = InitializationState::kReady;
-      } else if (recovery_status == Status::RESOURCE_EXHAUSTED) {
-        WRN("KVS init: Unable to maintain required free sector");
-        initialized_ = InitializationState::kNeedsMaintenance;
-      } else {
-        WRN("KVS init: Corruption detected and unable repair");
-        initialized_ = InitializationState::kNeedsMaintenance;
-      }
-    } else {
-      WRN("KVS init: Corruption detected, no repair attempted due to options");
-      initialized_ = InitializationState::kNeedsMaintenance;
-    }
+  if (total_corrupt_bytes != 0 || corrupt_entries != 0) {
+    WRN("Corruption detected. Found %zu corrupt bytes and %d corrupt entries.",
+        total_corrupt_bytes,
+        corrupt_entries);
   }
-
-  INF("KeyValueStore init complete: active keys %zu, deleted keys %zu, sectors "
-      "%zu, logical sector size %zu bytes",
-      size(),
-      (entry_cache_.total_entries() - size()),
-      sectors_.size(),
-      partition_.sector_size_bytes());
-
-  // Report any corruption was not repaired.
-  if (error_detected_) {
-    WRN("KVS init: Corruption found but not repaired, KVS unavailable until "
-        "successful maintenance.");
-    return Status::DATA_LOSS;
-  }
-
-  return Status::OK;
 }
 
 KeyValueStore::StorageStats KeyValueStore::GetStorageStats() const {
@@ -336,9 +334,9 @@
 Status KeyValueStore::ScanForEntry(const SectorDescriptor& sector,
                                    Address start_address,
                                    Address* next_entry_address) {
-  DBG("Scanning sector %u for entries starting from address %zx",
+  DBG("Scanning sector %u for entries starting from address %u",
       sectors_.Index(sector),
-      size_t(start_address));
+      unsigned(start_address));
 
   // Entries must start at addresses which are aligned on a multiple of
   // Entry::kMinAlignmentBytes. However, that multiple can vary between entries.
@@ -348,9 +346,13 @@
        sectors_.AddressInSector(sector, address);
        address += Entry::kMinAlignmentBytes) {
     uint32_t magic;
-    TRY(partition_.Read(address, as_writable_bytes(span(&magic, 1))));
+    StatusWithSize read_result =
+        partition_.Read(address, as_writable_bytes(span(&magic, 1)));
+    if (!read_result.ok()) {
+      continue;
+    }
     if (formats_.KnownMagic(magic)) {
-      DBG("Found entry magic at address %zx", size_t(address));
+      DBG("Found entry magic at address %u", unsigned(address));
       *next_entry_address = address;
       return Status::OK;
     }
@@ -823,16 +825,16 @@
     return Status::FAILED_PRECONDITION;
   }
 
-  // Do automatic repair, if KVS options allow for it.
-  if (error_detected_ && options_.recovery != ErrorRecovery::kManual) {
-    TRY(Repair());
-  }
-
   DBG("Garbage Collect a single sector");
   for (Address address : reserved_addresses) {
     DBG("   Avoid address %u", unsigned(address));
   }
 
+  // Do automatic repair, if KVS options allow for it.
+  if (error_detected_ && options_.recovery != ErrorRecovery::kManual) {
+    TRY(Repair());
+  }
+
   // Step 1: Find the sector to garbage collect
   SectorDescriptor* sector_to_gc =
       sectors_.FindSectorToGarbageCollect(reserved_addresses);
@@ -848,7 +850,7 @@
 
 Status KeyValueStore::RelocateKeyAddressesInSector(
     SectorDescriptor& sector_to_gc,
-    EntryMetadata& metadata,
+    const EntryMetadata& metadata,
     span<const Address> reserved_addresses) {
   for (FlashPartition::Address& address : metadata.addresses()) {
     if (sectors_.AddressInSector(sector_to_gc, address)) {
@@ -1004,23 +1006,17 @@
   return repair_status;
 }
 
-Status KeyValueStore::Repair() {
-  // Collect and return the first error encountered.
-  Status overall_status = Status::OK;
-
-  DBG("KVS repair");
+Status KeyValueStore::FixErrors() {
+  DBG("Fixing KVS errors");
 
   // Step 1: Garbage collect any sectors marked as corrupt.
-  Status repair_status = RepairCorruptSectors();
-  if (overall_status.ok()) {
-    overall_status = repair_status;
-  }
+  Status overall_status = RepairCorruptSectors();
 
   // Step 2: Make sure there is at least 1 empty sector. This needs to be a
   // seperate check of sectors from step 1, because a found empty sector might
   // get written to by a later GC that fails and does not result in a free
   // sector.
-  repair_status = EnsureFreeSectorExists();
+  Status repair_status = EnsureFreeSectorExists();
   if (overall_status.ok()) {
     overall_status = repair_status;
   }
@@ -1039,6 +1035,17 @@
   return overall_status;
 }
 
+Status KeyValueStore::Repair() {
+  // If errors have been detected, just reinit the KVS metadata. This does a
+  // full deep error check and any needed repairs. Then repair any errors.
+  INF("Starting KVS repair");
+
+  DBG("Reinitialize KVS metadata");
+  InitializeMetadata();
+
+  return FixErrors();
+}
+
 KeyValueStore::Entry KeyValueStore::CreateEntry(Address address,
                                                 string_view key,
                                                 span<const byte> value,