Classify errors and advance URL index according to the error code. In CL https://gerrit.chromium.org/gerrit/39638, we always incremented the URL index irrespective of the error code. That would cause the first URL to be given up too quickly in favor of the second one even for transient errors such as when user closes a lid and reopens after some time. The right behavior in this case is to just count those failures towards the URL and only after repeated failures with no progress should we advance the URL index. This CL implements this logic and completes the multiple URL-related work items outlined in the design doc. BUG=chromium-os:37206 TEST=Tested all uses cases on my ZGB. Added and updated unit tests. Change-Id: Ida0cfbfeb9bfab732144049d1b27e3b8958bc252 Reviewed-on: https://gerrit.chromium.org/gerrit/39885 Commit-Queue: Jay Srinivasan <jaysri@chromium.org> Reviewed-by: Jay Srinivasan <jaysri@chromium.org> Tested-by: Jay Srinivasan <jaysri@chromium.org>

commit: 2b5a0f065187fd19179e3809148dbfc376ada7a0 [log] [tgz]
author: Jay Srinivasan <jaysri@chromium.org> Wed Dec 19 17:25:56 2012 -0800
committer: Gerrit <chrome-bot@google.com> Thu Dec 20 23:01:52 2012 -0800
tree: 3f7728d90f2ad4f34567300e731d5872352b6b6e
parent: 07428b5d247093f80434d742860749c3b9b397f4 [diff]
diff --git a/action_processor.h b/action_processor.h
index 3f8dcee..ba0e512 100644
--- a/action_processor.h
+++ b/action_processor.h

@@ -64,7 +64,13 @@
   kActionCodeDownloadOperationHashMissingError = 38,
   kActionCodeDownloadMetadataSignatureMissingError = 39,
 
-  // Any code above this is sent to both Omaha and UMA as-is.
+  // Note: When adding new error codes, please remember to add the
+  // error into one of the buckets in PayloadState::UpdateFailed method so
+  // that the retries across URLs and the payload back-off mechanism work
+  // correctly for those new error codes.
+
+  // Any code above this is sent to both Omaha and UMA as-is, except
+  // kActionCodeOmahaErrorInHTTPResponse (see error code 2000 for more details).
   // Codes/flags below this line is sent only to Omaha and not to UMA.
 
   // kActionCodeUmaReportedMax is not an error code per se, it's just the count

diff --git a/delta_performer.cc b/delta_performer.cc
index 5412a0f..27ee556 100644
--- a/delta_performer.cc
+++ b/delta_performer.cc

@@ -306,7 +306,7 @@
 
 
 // Wrapper around write. Returns true if all requested bytes
-// were written, or false on any error, reguardless of progress
+// were written, or false on any error, regardless of progress
 // and stores an action exit code in |error|.
 bool DeltaPerformer::Write(const void* bytes, size_t count,
                            ActionExitCode *error) {
@@ -314,6 +314,7 @@
 
   const char* c_bytes = reinterpret_cast<const char*>(bytes);
   buffer_.insert(buffer_.end(), c_bytes, c_bytes + count);
+  system_state_->payload_state()->DownloadProgress(count);
   if (!manifest_valid_) {
     MetadataParseResult result = ParsePayloadMetadata(buffer_,
                                                       &manifest_,
@@ -877,6 +878,14 @@
     utils::HexDumpVector(hash_data);
     return kActionCodeDownloadPayloadPubKeyVerificationError;
   }
+
+  // At this point, we are guaranteed to have downloaded a full payload, i.e
+  // the one whose size matches the size mentioned in Omaha response. If any
+  // errors happen after this, it's likely a problem with the payload itself or
+  // the state of the system and not a problem with the URL or network.  So,
+  // indicate that to the payload state so that AU can back-off appropriately.
+  system_state_->payload_state()->DownloadComplete();
+
   return kActionCodeSuccess;
 }
 

diff --git a/delta_performer_unittest.cc b/delta_performer_unittest.cc
index f5493c2..ba2e934 100644
--- a/delta_performer_unittest.cc
+++ b/delta_performer_unittest.cc

@@ -64,6 +64,10 @@
 
   // The in-memory copy of delta file.
   vector<char> delta;
+
+  // The mock system state object with which we initialize the
+  // delta performer.
+  MockSystemState mock_system_state;
 };
 
 enum SignatureTest {
@@ -492,8 +496,9 @@
       &install_plan.metadata_signature));
   EXPECT_FALSE(install_plan.metadata_signature.empty());
 
-  MockSystemState mock_system_state;
-  *performer = new DeltaPerformer(&prefs, &mock_system_state, &install_plan);
+  *performer = new DeltaPerformer(&prefs,
+                                  &state->mock_system_state,
+                                  &install_plan);
   EXPECT_TRUE(utils::FileExists(kUnittestPublicKeyPath));
   (*performer)->set_public_key_path(kUnittestPublicKeyPath);
 
@@ -572,6 +577,10 @@
     return;
   }
 
+  int expected_times = (expected_result == kActionCodeSuccess) ? 1 : 0;
+  EXPECT_CALL(*(state->mock_system_state.mock_payload_state()),
+              DownloadComplete()).Times(expected_times);
+
   LOG(INFO) << "Verifying payload for expected result "
             << expected_result;
   EXPECT_EQ(expected_result, performer->VerifyPayload(
@@ -754,8 +763,9 @@
 
   // Create the delta performer object.
   PrefsMock prefs;
-  MockSystemState mock_system_state;
-  DeltaPerformer delta_performer(&prefs, &mock_system_state, &install_plan);
+  DeltaPerformer delta_performer(&prefs,
+                                 &state.mock_system_state,
+                                 &install_plan);
 
   // Use the public key corresponding to the private key used above to
   // sign the metadata.
@@ -907,6 +917,25 @@
   EXPECT_FALSE(DeltaPerformer::IsIdempotentOperation(op));
 }
 
+TEST(DeltaPerformerTest, WriteUpdatesPayloadState) {
+  PrefsMock prefs;
+  InstallPlan install_plan;
+  MockSystemState mock_system_state;
+  DeltaPerformer performer(&prefs, &mock_system_state, &install_plan);
+  EXPECT_EQ(0, performer.Open("/dev/null", 0, 0));
+  EXPECT_TRUE(performer.OpenKernel("/dev/null"));
+
+  EXPECT_CALL(*(mock_system_state.mock_payload_state()),
+              DownloadProgress(4)).Times(1);
+  EXPECT_CALL(*(mock_system_state.mock_payload_state()),
+              DownloadProgress(8)).Times(2);
+
+  EXPECT_TRUE(performer.Write("junk", 4));
+  EXPECT_TRUE(performer.Write("morejunk", 8));
+  EXPECT_FALSE(performer.Write("morejunk", 8));
+  EXPECT_LT(performer.Close(), 0);
+}
+
 TEST(DeltaPerformerTest, MissingMandatoryMetadataSizeTest) {
   DoMetadataSizeTest(0, 75456, true);
 }

diff --git a/mock_payload_state.h b/mock_payload_state.h
index bb60e98..9756ac9 100644
--- a/mock_payload_state.h
+++ b/mock_payload_state.h

@@ -6,13 +6,25 @@
 #define CHROMEOS_PLATFORM_UPDATE_ENGINE_MOCK_PAYLOAD_STATE_H__
 
 #include "gmock/gmock.h"
-#include "update_engine/payload_state.h"
+#include "update_engine/omaha_request_action.h"
+#include "update_engine/payload_state_interface.h"
 
 namespace chromeos_update_engine {
 
-class MockPayloadState: public PayloadState {
+class MockPayloadState: public PayloadStateInterface {
  public:
+  bool Initialize(PrefsInterface* prefs) {
+    return true;
+  }
+
+  MOCK_METHOD1(SetResponse, void(const OmahaResponse& response));
+  MOCK_METHOD0(DownloadComplete, void());
+  MOCK_METHOD1(DownloadProgress, void(size_t count));
   MOCK_METHOD1(UpdateFailed, void(ActionExitCode error));
+  MOCK_METHOD0(GetResponse, std::string());
+  MOCK_METHOD0(GetPayloadAttemptNumber, uint32_t());
+  MOCK_METHOD0(GetUrlIndex, uint32_t());
+  MOCK_METHOD0(GetUrlFailureCount, uint32_t());
 };
 
 }  // namespace chromeos_update_engine

diff --git a/mock_system_state.h b/mock_system_state.h
index 2f82345..bb6ee98 100644
--- a/mock_system_state.h
+++ b/mock_system_state.h

@@ -41,7 +41,7 @@
     return prefs_;
   }
 
-  virtual PayloadState* payload_state() {
+  virtual PayloadStateInterface* payload_state() {
     return &mock_payload_state_;
   }
 
@@ -70,7 +70,7 @@
   // These are Mock objects or objects we own.
   MetricsLibraryMock mock_metrics_lib_;
   testing::NiceMock<PrefsMock> mock_prefs_;
-  MockPayloadState mock_payload_state_;
+  testing::NiceMock<MockPayloadState> mock_payload_state_;
 
   // These are pointers to objects which caller can override.
   PrefsInterface* prefs_;

diff --git a/omaha_request_action.cc b/omaha_request_action.cc
index 1a3517b..c98357b 100644
--- a/omaha_request_action.cc
+++ b/omaha_request_action.cc

@@ -29,6 +29,20 @@
 
 namespace chromeos_update_engine {
 
+// List of custom pair tags that we interpret in the Omaha Response:
+static const char* kTagDeadline = "deadline";
+static const char* kTagDisplayVersion = "DisplayVersion";
+static const char* kTagMaxFailureCountPerUrl = "MaxFailureCountPerUrl";
+static const char* kTagMaxDaysToScatter = "MaxDaysToScatter";
+// Deprecated: "ManifestSignatureRsa"
+// Deprecated: "ManifestSize"
+static const char* kTagMetadataSignatureRsa = "MetadataSignatureRsa";
+static const char* kTagMetadataSize = "MetadataSize";
+static const char* kTagMoreInfo = "MoreInfo";
+static const char* kTagNeedsAdmin = "needsadmin";
+static const char* kTagPrompt = "Prompt";
+static const char* kTagSha256 = "sha256";
+
 namespace {
 
 const string kGupdateVersion("ChromeOSUpdateEngine-0.1.0.0");
@@ -560,7 +574,7 @@
     return false;
   }
 
-  output_object->hash = XmlGetProperty(pie_action_node, "sha256");
+  output_object->hash = XmlGetProperty(pie_action_node, kTagSha256);
   if (output_object->hash.empty()) {
     LOG(ERROR) << "Omaha Response has empty sha256 value";
     completer->set_code(kActionCodeOmahaResponseInvalid);
@@ -569,18 +583,22 @@
 
   // Get the optional properties one by one.
   output_object->display_version =
-      XmlGetProperty(pie_action_node, "DisplayVersion");
-  output_object->more_info_url = XmlGetProperty(pie_action_node, "MoreInfo");
+      XmlGetProperty(pie_action_node, kTagDisplayVersion);
+  output_object->more_info_url = XmlGetProperty(pie_action_node, kTagMoreInfo);
   output_object->metadata_size =
-      ParseInt(XmlGetProperty(pie_action_node, "MetadataSize"));
+      ParseInt(XmlGetProperty(pie_action_node, kTagMetadataSize));
   output_object->metadata_signature =
-      XmlGetProperty(pie_action_node, "MetadataSignatureRsa");
+      XmlGetProperty(pie_action_node, kTagMetadataSignatureRsa);
   output_object->needs_admin =
-      XmlGetProperty(pie_action_node, "needsadmin") == "true";
-  output_object->prompt = XmlGetProperty(pie_action_node, "Prompt") == "true";
-  output_object->deadline = XmlGetProperty(pie_action_node, "deadline");
+      XmlGetProperty(pie_action_node, kTagNeedsAdmin) == "true";
+  output_object->prompt = XmlGetProperty(pie_action_node, kTagPrompt) == "true";
+  output_object->deadline = XmlGetProperty(pie_action_node, kTagDeadline);
   output_object->max_days_to_scatter =
-      ParseInt(XmlGetProperty(pie_action_node, "MaxDaysToScatter"));
+      ParseInt(XmlGetProperty(pie_action_node, kTagMaxDaysToScatter));
+
+  string max = XmlGetProperty(pie_action_node, kTagMaxFailureCountPerUrl);
+  if (!base::StringToInt(max, &output_object->max_failure_count_per_url))
+    output_object->max_failure_count_per_url = kDefaultMaxFailureCountPerUrl;
 
   return true;
 }
@@ -674,7 +692,7 @@
   // Update the payload state with the current response. The payload state
   // will automatically reset all stale state if this response is different
   // from what's stored already.
-  PayloadState* payload_state = system_state_->payload_state();
+  PayloadStateInterface* payload_state = system_state_->payload_state();
   payload_state->SetResponse(output_object);
 }
 

diff --git a/omaha_request_action.h b/omaha_request_action.h
index bc9b4fe..ebd3ed0 100644
--- a/omaha_request_action.h
+++ b/omaha_request_action.h

@@ -37,6 +37,7 @@
         size(0),
         metadata_size(0),
         max_days_to_scatter(0),
+        max_failure_count_per_url(0),
         needs_admin(false),
         prompt(false) {}
   // True iff there is an update to be downloaded.
@@ -59,6 +60,10 @@
   off_t size;
   off_t metadata_size;
   int max_days_to_scatter;
+  // The number of URL-related failures to tolerate before moving on to the
+  // next URL in the current pass. This is a configurable value from the
+  // Omaha Response attribute, if ever we need to fine tune the behavior.
+  int max_failure_count_per_url;
   bool needs_admin;
   bool prompt;
 };
@@ -124,6 +129,12 @@
  public:
   static const int kNeverPinged = -1;
   static const int kPingTimeJump = -2;
+  // We choose this value of 10 as a heuristic for a work day in trying
+  // each URL, assuming we check roughly every 45 mins. This is a good time to
+  // wait - neither too long nor too little - so we don't give up the preferred
+  // URLs that appear earlier in list too quickly before moving on to the
+  // fallback ones.
+  static const int kDefaultMaxFailureCountPerUrl = 10;
 
   // These are the possible outcome upon checking whether we satisfied
   // the wall-clock-based-wait.

diff --git a/payload_state.cc b/payload_state.cc
index b153fa7..a44fc92 100644
--- a/payload_state.cc
+++ b/payload_state.cc

@@ -9,6 +9,7 @@
 
 #include "update_engine/omaha_request_action.h"
 #include "update_engine/prefs.h"
+#include "update_engine/utils.h"
 
 using std::string;
 
@@ -41,34 +42,68 @@
   CHECK(prefs);
   prefs_ = prefs;
   LoadResponse();
+  LoadPayloadAttemptNumber();
   LoadUrlIndex();
+  LoadUrlFailureCount();
   LogPayloadState();
   return true;
 }
 
-void PayloadState::LogPayloadState() {
-  LOG(INFO) << "Current Payload State:\n"
-            << "Current Response = \n" << response_
-            << "Current URL Index = " << url_index_;
-}
-
 void PayloadState::SetResponse(const OmahaResponse& omaha_response) {
   CHECK(prefs_);
   num_urls_ = omaha_response.payload_urls.size();
+  max_failure_count_per_url_ = omaha_response.max_failure_count_per_url;
   string new_response = GetFilteredResponse(omaha_response);
   bool has_response_changed = (response_ != new_response);
   response_ = new_response;
   LOG(INFO) << "Stored Response = \n" << response_;
   prefs_->SetString(kPrefsCurrentResponse, response_);
 
+  bool should_reset = false;
   if (has_response_changed) {
     LOG(INFO) << "Resetting all payload state as this is a new response";
+    should_reset = true;
+  } else if (url_index_ >= num_urls_) {
+    LOG(INFO) << "Resetting all payload state as the persisted state "
+              << "seems to have been tampered with";
+    should_reset = true;
+  }
+
+  if (should_reset) {
+    SetPayloadAttemptNumber(0);
     SetUrlIndex(0);
   }
 }
 
+void PayloadState::DownloadComplete() {
+  LOG(INFO) << "Payload downloaded successfully";
+  IncrementPayloadAttemptNumber();
+}
+
+void PayloadState::DownloadProgress(size_t count) {
+  if (count == 0)
+    return;
+
+  // We've received non-zero bytes from a recent download operation.  Since our
+  // URL failure count is meant to penalize a URL only for consecutive
+  // failures, downloading bytes successfully means we should reset the failure
+  // count (as we know at least that the URL is working). In future, we can
+  // design this to be more sophisticated to check for more intelligent failure
+  // patterns, but right now, even 1 byte downloaded will mark the URL to be
+  // good unless it hits 10 (or configured number of) consecutive failures
+  // again.
+
+  if (GetUrlFailureCount() == 0)
+    return;
+
+  LOG(INFO) << "Resetting failure count of Url" << GetUrlIndex()
+            << " to 0 as we received " << count << " bytes successfully";
+  SetUrlFailureCount(0);
+}
+
 void PayloadState::UpdateFailed(ActionExitCode error) {
-  LOG(INFO) << "Updating payload state for error code: " << error;
+  ActionExitCode base_error = utils::GetBaseErrorCode(error);
+  LOG(INFO) << "Updating payload state for error code: " << base_error;
 
   if (!num_urls_) {
     // Since we don't persist num_urls_, it's possible that we get an error in
@@ -79,51 +114,210 @@
     return;
   }
 
-  // chromium-os:37206: Classify the errors and advance the URL index at
-  // different rates for different errors in the next CL. Until then, advance
-  // URL index on every single error.
+  switch (base_error) {
+    // Errors which are good indicators of a problem with a particular URL or
+    // the protocol used in the URL or entities in the communication channel
+    // (e.g. proxies). We should try the next available URL in the next update
+    // check to quickly recover from these errors.
+    case kActionCodePayloadHashMismatchError:
+    case kActionCodePayloadSizeMismatchError:
+    case kActionCodeDownloadPayloadVerificationError:
+    case kActionCodeDownloadPayloadPubKeyVerificationError:
+    case kActionCodeSignedDeltaPayloadExpectedError:
+    case kActionCodeDownloadInvalidMetadataMagicString:
+    case kActionCodeDownloadSignatureMissingInManifest:
+    case kActionCodeDownloadManifestParseError:
+    case kActionCodeDownloadMetadataSignatureError:
+    case kActionCodeDownloadMetadataSignatureVerificationError:
+    case kActionCodeDownloadMetadataSignatureMismatch:
+    case kActionCodeDownloadOperationHashVerificationError:
+    case kActionCodeDownloadOperationExecutionError:
+    case kActionCodeDownloadOperationHashMismatch:
+    case kActionCodeDownloadInvalidMetadataSize:
+    case kActionCodeDownloadInvalidMetadataSignature:
+    case kActionCodeDownloadOperationHashMissingError:
+    case kActionCodeDownloadMetadataSignatureMissingError:
+      IncrementUrlIndex();
+      break;
+
+    // Errors which seem to be just transient network/communication related
+    // failures and do not indicate any inherent problem with the URL itself.
+    // So, we should keep the current URL but just increment the
+    // failure count to give it more chances. This way, while we maximize our
+    // chances of downloading from the URLs that appear earlier in the response
+    // (because download from a local server URL that appears earlier in a
+    // response is preferable than downloading from the next URL which could be
+    // a internet URL and thus could be more expensive).
+    case kActionCodeError:
+    case kActionCodeDownloadTransferError:
+    case kActionCodeDownloadWriteError:
+    case kActionCodeDownloadStateInitializationError:
+    case kActionCodeOmahaErrorInHTTPResponse: // Aggregate code for HTTP errors.
+      IncrementFailureCount();
+      break;
+
+    // Errors which are not specific to a URL and hence shouldn't result in
+    // the URL being penalized. This can happen in two cases:
+    // 1. We haven't started downloading anything: These errors don't cost us
+    // anything in terms of actual payload bytes, so we should just do the
+    // regular retries at the next update check.
+    // 2. We have successfully downloaded the payload: In this case, the
+    // payload attempt number would have been incremented and would take care
+    // of the back-off at the next update check.
+    // In either case, there's no need to update URL index or failure count.
+    case kActionCodeOmahaRequestError:
+    case kActionCodeOmahaResponseHandlerError:
+    case kActionCodePostinstallRunnerError:
+    case kActionCodeFilesystemCopierError:
+    case kActionCodeInstallDeviceOpenError:
+    case kActionCodeKernelDeviceOpenError:
+    case kActionCodeDownloadNewPartitionInfoError:
+    case kActionCodeNewRootfsVerificationError:
+    case kActionCodeNewKernelVerificationError:
+    case kActionCodePostinstallBootedFromFirmwareB:
+    case kActionCodeOmahaRequestEmptyResponseError:
+    case kActionCodeOmahaRequestXMLParseError:
+    case kActionCodeOmahaResponseInvalid:
+    case kActionCodeOmahaUpdateIgnoredPerPolicy:
+    case kActionCodeOmahaUpdateDeferredPerPolicy:
+      LOG(INFO) << "Not incrementing URL index or failure count for this error";
+      break;
+
+    case kActionCodeSuccess:                            // success code
+    case kActionCodeSetBootableFlagError:               // unused
+    case kActionCodeUmaReportedMax:                     // not an error code
+    case kActionCodeOmahaRequestHTTPResponseBase:       // aggregated already
+    case kActionCodeResumedFlag:                        // not an error code
+    case kActionCodeBootModeFlag:                       // not an error code
+    case kActualCodeMask:                               // not an error code
+      // These shouldn't happen. Enumerating these  explicitly here so that we
+      // can let the compiler warn about new error codes that are added to
+      // action_processor.h but not added here.
+      LOG(WARNING) << "Unexpected error code for UpdateFailed";
+      break;
+
+    // Note: Not adding a default here so as to let the compiler warn us of
+    // any new enums that were added in the .h but not listed in this switch.
+  }
+}
+
+void PayloadState::LogPayloadState() {
+  LOG(INFO) << "Current Payload State:\n"
+            << "Current Response = \n" << response_
+            << "\nPayload Attempt Number = " << payload_attempt_number_
+            << "\nCurrent URL Index = " << url_index_
+            << "\nCurrent URL Failure Count = " << url_failure_count_;
+}
+
+void PayloadState::IncrementPayloadAttemptNumber() {
+  LOG(INFO) << "Incrementing the payload attempt number";
+  SetPayloadAttemptNumber(GetPayloadAttemptNumber() + 1);
+
+  // TODO(jaysri): chromium-os:36806: Implement the payload back-off logic
+  // that uses the payload attempt number.
+}
+
+void PayloadState::IncrementUrlIndex() {
   uint32_t next_url_index = GetUrlIndex() + 1;
   if (next_url_index < num_urls_) {
-    LOG(INFO) << "Advancing the URL index for next attempt";
+    LOG(INFO) << "Incrementing the URL index for next attempt";
+    SetUrlIndex(next_url_index);
   } else {
     LOG(INFO) << "Resetting the current URL index (" << GetUrlIndex() << ") to "
               << "0 as we only have " << num_urls_ << " URL(s)";
-    next_url_index = 0;
-
-    // TODO(jaysri): This is the place where we should increment the
-    // payload_attempt_number so that we can back-off appropriately.
+    SetUrlIndex(0);
+    IncrementPayloadAttemptNumber();
   }
-
-  SetUrlIndex(next_url_index);
 }
 
-string PayloadState::LoadResponse() {
+void PayloadState::IncrementFailureCount() {
+  uint32_t next_url_failure_count = GetUrlFailureCount() + 1;
+  if (next_url_failure_count < max_failure_count_per_url_) {
+    LOG(INFO) << "Incrementing the URL failure count";
+    SetUrlFailureCount(next_url_failure_count);
+  } else {
+    LOG(INFO) << "Reached max number of failures for Url" << GetUrlIndex()
+              << ". Trying next available URL";
+    IncrementUrlIndex();
+  }
+}
+
+void PayloadState::LoadResponse() {
   CHECK(prefs_);
   string stored_value;
   if (prefs_->Exists(kPrefsCurrentResponse) &&
       prefs_->GetString(kPrefsCurrentResponse, &stored_value)) {
     response_ = stored_value;
   }
-  return response_;
 }
 
-uint32_t PayloadState::LoadUrlIndex() {
+void PayloadState::LoadPayloadAttemptNumber() {
+  CHECK(prefs_);
+  int64_t stored_value;
+  if (prefs_->Exists(kPrefsPayloadAttemptNumber) &&
+      prefs_->GetInt64(kPrefsPayloadAttemptNumber, &stored_value)) {
+    if (stored_value < 0) {
+      LOG(ERROR) << "Invalid payload attempt number (" << stored_value
+                 << ") in persisted state. Defaulting to 0";
+      stored_value = 0;
+    }
+    payload_attempt_number_ = stored_value;
+  }
+}
+
+void PayloadState::SetPayloadAttemptNumber(uint32_t payload_attempt_number) {
+  CHECK(prefs_);
+  payload_attempt_number_ = payload_attempt_number;
+  LOG(INFO) << "Payload Attempt Number = " << payload_attempt_number_;
+  prefs_->SetInt64(kPrefsPayloadAttemptNumber, payload_attempt_number_);
+}
+
+void PayloadState::LoadUrlIndex() {
   CHECK(prefs_);
   int64_t stored_value;
   if (prefs_->Exists(kPrefsCurrentUrlIndex) &&
       prefs_->GetInt64(kPrefsCurrentUrlIndex, &stored_value)) {
+    if (stored_value < 0) {
+      LOG(ERROR) << "Invalid URL Index (" << stored_value
+                 << ") in persisted state. Defaulting to 0";
+      stored_value = 0;
+    }
     url_index_ = stored_value;
   }
-  return url_index_;
 }
 
 void PayloadState::SetUrlIndex(uint32_t url_index) {
   CHECK(prefs_);
-  // TODO(jaysri): When we implement failure count, make sure to reset
-  // the failure count when url index changes.
   url_index_ = url_index;
   LOG(INFO) << "Current URL Index = " << url_index_;
   prefs_->SetInt64(kPrefsCurrentUrlIndex, url_index_);
+
+  // Everytime we set the URL index, we should also reset its failure count.
+  // Otherwise, the URL will be tried only once, instead of
+  // max_failure_count_per_url times in the next round.
+  SetUrlFailureCount(0);
+}
+
+void PayloadState::LoadUrlFailureCount() {
+  CHECK(prefs_);
+  int64_t stored_value;
+  if (prefs_->Exists(kPrefsCurrentUrlFailureCount) &&
+      prefs_->GetInt64(kPrefsCurrentUrlFailureCount, &stored_value)) {
+    if (stored_value < 0) {
+      LOG(ERROR) << "Invalid URL Failure count (" << stored_value
+                 << ") in persisted state. Defaulting to 0";
+      stored_value = 0;
+    }
+    url_failure_count_ = stored_value;
+  }
+}
+
+void PayloadState::SetUrlFailureCount(uint32_t url_failure_count) {
+  CHECK(prefs_);
+  url_failure_count_ = url_failure_count;
+  LOG(INFO) << "Current URL (Url" << GetUrlIndex()
+            << ")'s Failure Count = " << url_failure_count_;
+  prefs_->SetInt64(kPrefsCurrentUrlFailureCount, url_failure_count_);
 }
 
 }  // namespace chromeos_update_engine

diff --git a/payload_state.h b/payload_state.h
index 6861e70..cc90cf4 100644
--- a/payload_state.h
+++ b/payload_state.h

@@ -5,27 +5,29 @@
 #ifndef CHROMEOS_PLATFORM_UPDATE_ENGINE_PAYLOAD_STATE_H__
 #define CHROMEOS_PLATFORM_UPDATE_ENGINE_PAYLOAD_STATE_H__
 
-#include "base/file_path.h"
-
-#include "update_engine/action_processor.h"
+#include "update_engine/payload_state_interface.h"
 #include "update_engine/prefs_interface.h"
 
 namespace chromeos_update_engine {
 
-// Forward declaration here because we get a circular dependency if
-// we include omaha_request_action.h directly.
-struct OmahaResponse;
-
 // Encapsulates all the payload state required for download. This includes the
 // state necessary for handling multiple URLs in Omaha response, the back-off
 // state, etc. All state is persisted so that we use the most recently saved
 // value when resuming the update_engine process. All state is also cached in
 // memory so that we ensure we always make progress based on last known good
 // state even when there's any issue in reading/writing from the file system.
-class PayloadState {
+class PayloadState : public PayloadStateInterface {
  public:
 
-  PayloadState() : prefs_(NULL), num_urls_(0), url_index_(0) {}
+  PayloadState()
+      : prefs_(NULL),
+        payload_attempt_number_(0),
+        num_urls_(0),
+        url_index_(0),
+        url_failure_count_(0),
+        max_failure_count_per_url_(0) {}
+
+  virtual ~PayloadState() {}
 
   // Initializes a payload state object using |prefs| for storing the
   // persisted state. It also performs the initial loading of all persisted
@@ -34,63 +36,112 @@
   // on this object.
   bool Initialize(PrefsInterface* prefs);
 
-  // Logs the current payload state.
-  void LogPayloadState();
 
-  // Sets the internal payload state based on the given Omaha response. This
-  // response could be the same or different from the one for which we've stored
-  // the internal state. If it's different, then this method resets all the
-  // internal state corresponding to the old response. Since the Omaha response
-  // has a lot of fields that are not related to payload state, it uses only
-  // a subset of the fields in the Omaha response to compare equality.
-  void SetResponse(const OmahaResponse& response);
+  // Implementation of PayloadStateInterface methods.
+  virtual void SetResponse(const OmahaResponse& response);
+  virtual void DownloadComplete();
+  virtual void DownloadProgress(size_t count);
+  virtual void UpdateFailed(ActionExitCode error);
 
-  // Updates the payload state when the current update attempt has failed.
-  void UpdateFailed(ActionExitCode error);
-
-  // Returns the internally stored subset of the response state as a string.
-  // This is logically a private method, but exposed here for unit tests.
-  std::string GetResponse() {
+  virtual inline std::string GetResponse() {
     return response_;
   }
 
-  // Returns the current URL index.
-  uint32_t GetUrlIndex() {
+  virtual inline uint32_t GetPayloadAttemptNumber() {
+    return payload_attempt_number_;
+  }
+
+  virtual inline uint32_t GetUrlIndex() {
     return url_index_;
   }
 
+  virtual inline uint32_t GetUrlFailureCount() {
+    return url_failure_count_;
+  }
+
  private:
-  // Sets the stored response_ value from the currently persisted value for
-  // the response. Returns the same value.
-  std::string LoadResponse();
+  // Logs the current payload state.
+  void LogPayloadState();
 
-  // Sets the url_index_ value from the currently persisted value for
-  // URL index. Returns the same value.
-  uint32_t LoadUrlIndex();
+  // Increments the payload attempt number which governs the back-off behavior
+  // at the time of the next update check.
+  void IncrementPayloadAttemptNumber();
 
-  // Sets the current URL index.
+  // Advances the current URL index to the next available one. If all URLs have
+  // been exhausted during the current payload download attempt (as indicated
+  // by the payload attempt number), then it will increment the payload attempt
+  // number and wrap around again with the first URL in the list.
+  void IncrementUrlIndex();
+
+  // Increments the failure count of the current URL. If the configured max
+  // failure count is reached for this URL, it advances the current URL index
+  // to the next URL and resets the failure count for that URL.
+  void IncrementFailureCount();
+
+  // Initializes the current response from the persisted state.
+  void LoadResponse();
+
+  // Initializes the payload attempt number from the persisted state.
+  void LoadPayloadAttemptNumber();
+
+  // Sets the payload attempt number to the given value. Also persists the
+  // value being set so that we resume from the same value in case of a process
+  // restart.
+  void SetPayloadAttemptNumber(uint32_t payload_attempt_number);
+
+  // Initializes the current URL index from the persisted state.
+  void LoadUrlIndex();
+
+  // Sets the current URL index to the given value. Also persists the value
+  // being set so that we resume from the same value in case of a process
+  // restart.
   void SetUrlIndex(uint32_t url_index);
 
+  // Initializes the current URL's failure count from the persisted stae.
+  void LoadUrlFailureCount();
+
+  // Sets the current URL's failure count to the given value. Also persists the
+  // value being set so that we resume from the same value in case of a process
+  // restart.
+  void SetUrlFailureCount(uint32_t url_failure_count);
+
   // Interface object with which we read/write persisted state. This must
   // be set by calling the Initialize method before calling any other method.
   PrefsInterface* prefs_;
 
-  // Cached value of the latest subset of the Omaha response with which we're
-  // working off currently. This value is persisted so we load it off the next
-  // time when update_engine restarts. The rest of the state in this class will
-  // be cleared when we set a new  response.
+  // This stores a subset of the current response from Omaha.  Each update to
+  // this value is persisted so we resume from the same value in case of a
+  // process restart.
   std::string response_;
 
-  // The number of urls in the current response. Not persisted.
+  // The number of times we've tried to download the payload in full. This is
+  // incremented each time we download the payload in full successsfully or
+  // when we exhaust all failure limits for all URLs and are about to wrap
+  // around back to the first URL.  Each update to this value is persisted so
+  // we resume from the same value in case of a process restart.
+  uint32_t payload_attempt_number_;
+
+  // The number of urls in the current response.  This value is not persisted,
+  // as we will always get a response from Omaha before we need this value.
   uint32_t num_urls_;
 
-  // Cached value of the index of the current URL, to be used in case we are
-  // unable to read from the persisted store for any reason. This type is
-  // different from the one in the accessor methods because PrefsInterface
-  // supports only int64_t but we want to provide a stronger abstraction of
-  // uint32_t.
+  // The index of the current URL.  This type is different from the one in the
+  // accessor methods because PrefsInterface supports only int64_t but we want
+  // to provide a stronger abstraction of uint32_t.  Each update to this value
+  // is persisted so we resume from the same value in case of a process
+  // restart.
   int64_t url_index_;
 
+  // The count of failures encountered in the current attempt to download using
+  // the current URL (specified by url_index_).  Each update to this value is
+  // persisted so we resume from the same value in case of a process restart.
+  int64_t url_failure_count_;
+
+  // The max failure count per url configured in the current response.  This
+  // value is not persisted, as we will always get a response from Omaha before
+  // we need this value.
+  uint32_t max_failure_count_per_url_;
+
   DISALLOW_COPY_AND_ASSIGN(PayloadState);
 };
 

diff --git a/payload_state_interface.h b/payload_state_interface.h
new file mode 100644
index 0000000..031fbec
--- /dev/null
+++ b/payload_state_interface.h

@@ -0,0 +1,65 @@
+// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROMEOS_PLATFORM_UPDATE_ENGINE_PAYLOAD_STATE_INTERFACE_H__
+#define CHROMEOS_PLATFORM_UPDATE_ENGINE_PAYLOAD_STATE_INTERFACE_H__
+
+#include <string>
+
+#include "update_engine/action_processor.h"
+
+namespace chromeos_update_engine {
+
+// Forward declaration here because we get a circular dependency if
+// we include omaha_request_action.h directly.
+struct OmahaResponse;
+
+// Describes the methods that need to be implemented by the PayloadState class.
+// This interface has been carved out to support mocking of the PayloadState
+// object.
+class PayloadStateInterface {
+ public:
+  // Sets the internal payload state based on the given Omaha response. This
+  // response could be the same or different from the one for which we've stored
+  // the internal state. If it's different, then this method resets all the
+  // internal state corresponding to the old response. Since the Omaha response
+  // has a lot of fields that are not related to payload state, it uses only
+  // a subset of the fields in the Omaha response to compare equality.
+  virtual void SetResponse(const OmahaResponse& response) = 0;
+
+  // This method should be called whenever we have completed downloading all
+  // the bytes of a payload and have verified that its size and hash match the
+  // expected values. We use this notificaiton to increment the payload attempt
+  // number so that the throttle the next attempt to download the same payload
+  // (in case there's an error in subsequent steps such as post-install)
+  // appropriately.
+  virtual void DownloadComplete() = 0;
+
+  // This method should be called whenever we receive new bytes from the
+  // network for the current payload. We use this notification to reset the
+  // failure count for a given URL since receipt of some bytes means we are
+  // able to make forward progress with the current URL.
+  virtual void DownloadProgress(size_t count) = 0;
+
+  // This method should be called whenever an update attempt fails with the
+  // given error code. We use this notification to update the payload state
+  // depending on the type of the error that happened.
+  virtual void UpdateFailed(ActionExitCode error) = 0;
+
+  // Returns the currently stored response.
+  virtual std::string GetResponse() = 0;
+
+  // Returns the payload attempt number.
+  virtual uint32_t GetPayloadAttemptNumber() = 0;
+
+  // Returns the current URL index.
+  virtual uint32_t GetUrlIndex() = 0;
+
+  // Returns the current URL's failure count.
+  virtual uint32_t GetUrlFailureCount() = 0;
+ };
+
+}  // namespace chromeos_update_engine
+
+#endif  // CHROMEOS_PLATFORM_UPDATE_ENGINE_PAYLOAD_STATE_INTERFACE_H__

diff --git a/payload_state_unittest.cc b/payload_state_unittest.cc
index bfca903..f1d586a 100644
--- a/payload_state_unittest.cc
+++ b/payload_state_unittest.cc

@@ -4,8 +4,10 @@
 
 #include <glib.h>
 
-#include "gtest/gtest.h"
+#include "base/stringprintf.h"
 #include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
 #include "update_engine/omaha_request_action.h"
 #include "update_engine/payload_state.h"
 #include "update_engine/prefs_mock.h"
@@ -15,16 +17,55 @@
 using std::string;
 using testing::_;
 using testing::NiceMock;
+using testing::Return;
 using testing::SetArgumentPointee;
 
 namespace chromeos_update_engine {
 
+static void SetupPayloadStateWith2Urls(string hash,
+                                       PayloadState* payload_state,
+                                       OmahaResponse* response) {
+  response->payload_urls.clear();
+  response->payload_urls.push_back("http://test");
+  response->payload_urls.push_back("https://test");
+  response->size = 523456789;
+  response->hash = hash;
+  response->metadata_size = 558123;
+  response->metadata_signature = "metasign";
+  response->max_failure_count_per_url = 3;
+  payload_state->SetResponse(*response);
+  string stored_response = payload_state->GetResponse();
+  string expected_response = StringPrintf(
+      "NumURLs = 2\n"
+      "Url0 = http://test\n"
+      "Url1 = https://test\n"
+      "Payload Size = 523456789\n"
+      "Payload Sha256 Hash = %s\n"
+      "Metadata Size = 558123\n"
+      "Metadata Signature = metasign\n",
+      hash.c_str());
+  EXPECT_EQ(expected_response, stored_response);
+}
+
 class PayloadStateTest : public ::testing::Test { };
 
+TEST(PayloadStateTest, DidYouAddANewActionExitCode) {
+  if (kActionCodeUmaReportedMax != 40) {
+    LOG(ERROR) << "The following failure is intentional. If you added a new "
+               << "ActionExitCode enum value, make sure to add it to the "
+               << "PayloadState::UpdateFailed method and then update this test "
+               << "to the new value of kActionCodeUmaReportedMax, which is "
+               << kActionCodeUmaReportedMax;
+    EXPECT_FALSE("Please see the log line above");
+  }
+}
+
 TEST(PayloadStateTest, SetResponseWorksWithEmptyResponse) {
   OmahaResponse response;
   NiceMock<PrefsMock> prefs;
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 0));
   EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 0));
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 0));
   PayloadState payload_state;
   EXPECT_TRUE(payload_state.Initialize(&prefs));
   payload_state.SetResponse(response);
@@ -36,6 +77,7 @@
                              "Metadata Signature = \n";
   EXPECT_EQ(expected_response, stored_response);
   EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
 }
 
 TEST(PayloadStateTest, SetResponseWorksWithSingleUrl) {
@@ -46,7 +88,9 @@
   response.metadata_size = 58123;
   response.metadata_signature = "msign";
   NiceMock<PrefsMock> prefs;
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 0));
   EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 0));
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 0));
   PayloadState payload_state;
   EXPECT_TRUE(payload_state.Initialize(&prefs));
   payload_state.SetResponse(response);
@@ -59,6 +103,7 @@
                              "Metadata Signature = msign\n";
   EXPECT_EQ(expected_response, stored_response);
   EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
 }
 
 TEST(PayloadStateTest, SetResponseWorksWithMultipleUrls) {
@@ -70,7 +115,9 @@
   response.metadata_size = 558123;
   response.metadata_signature = "metasign";
   NiceMock<PrefsMock> prefs;
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 0));
   EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 0));
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 0));
   PayloadState payload_state;
   EXPECT_TRUE(payload_state.Initialize(&prefs));
   payload_state.SetResponse(response);
@@ -84,32 +131,31 @@
                              "Metadata Signature = metasign\n";
   EXPECT_EQ(expected_response, stored_response);
   EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
 }
 
 TEST(PayloadStateTest, CanAdvanceUrlIndexCorrectly) {
   OmahaResponse response;
-  // For a variation, don't set the metadata signature, as it's optional.
-  response.payload_urls.push_back("http://set.url.index");
-  response.payload_urls.push_back("https://set.url.index");
-  response.size = 523456789;
-  response.hash = "rhash";
-
   NiceMock<PrefsMock> prefs;
-  // Should be called 4 times, one for each UpdateFailed call plus one
-  // for SetResponse.
-  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, _)).Times(4);
   PayloadState payload_state;
+
+  // Payload attempt should start with 0 and then advance to 1.
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 0)).Times(1);
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 1)).Times(1);
+
+  // Url index should go from 0 to 1 twice.
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 0)).Times(2);
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 1)).Times(2);
+
+  // Failure count should be called each times url index is set, so that's
+  // 4 times for this test.
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 0)).Times(4);
+
   EXPECT_TRUE(payload_state.Initialize(&prefs));
-  payload_state.SetResponse(response);
-  string stored_response = payload_state.GetResponse();
-  string expected_response = "NumURLs = 2\n"
-                             "Url0 = http://set.url.index\n"
-                             "Url1 = https://set.url.index\n"
-                             "Payload Size = 523456789\n"
-                             "Payload Sha256 Hash = rhash\n"
-                             "Metadata Size = 0\n"
-                             "Metadata Signature = \n";
-  EXPECT_EQ(expected_response, stored_response);
+
+  // This does a SetResponse which causes all the states to be set to 0 for
+  // the first time.
+  SetupPayloadStateWith2Urls("Hash1235", &payload_state, &response);
   EXPECT_EQ(0, payload_state.GetUrlIndex());
 
   // Verify that on the first error, the URL index advances to 1.
@@ -128,27 +174,13 @@
 
 TEST(PayloadStateTest, NewResponseResetsPayloadState) {
   OmahaResponse response;
-  response.payload_urls.push_back("http://reset.url.index");
-  response.payload_urls.push_back("https://reset.url.index");
-  response.size = 523456789;
-  response.hash = "rhash";
-  response.metadata_size = 558123;
-  response.metadata_signature = "metasign";
   NiceMock<PrefsMock> prefs;
-  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 0)).Times(2);
-  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 1)).Times(1);
   PayloadState payload_state;
+
   EXPECT_TRUE(payload_state.Initialize(&prefs));
-  payload_state.SetResponse(response);
-  string stored_response = payload_state.GetResponse();
-  string expected_response = "NumURLs = 2\n"
-                             "Url0 = http://reset.url.index\n"
-                             "Url1 = https://reset.url.index\n"
-                             "Payload Size = 523456789\n"
-                             "Payload Sha256 Hash = rhash\n"
-                             "Metadata Size = 558123\n"
-                             "Metadata Signature = metasign\n";
-  EXPECT_EQ(expected_response, stored_response);
+
+  // Set the first response.
+  SetupPayloadStateWith2Urls("Hash5823", &payload_state, &response);
 
   // Advance the URL index to 1 by faking an error.
   ActionExitCode error = kActionCodeDownloadMetadataSignatureMismatch;
@@ -156,21 +188,157 @@
   EXPECT_EQ(1, payload_state.GetUrlIndex());
 
   // Now, slightly change the response and set it again.
-  response.hash = "newhash";
-  payload_state.SetResponse(response);
-  stored_response = payload_state.GetResponse();
-  expected_response = "NumURLs = 2\n"
-                      "Url0 = http://reset.url.index\n"
-                      "Url1 = https://reset.url.index\n"
-                      "Payload Size = 523456789\n"
-                      "Payload Sha256 Hash = newhash\n"
-                      "Metadata Size = 558123\n"
-                      "Metadata Signature = metasign\n";
-  // Verify the new response.
-  EXPECT_EQ(expected_response, stored_response);
+  SetupPayloadStateWith2Urls("Hash8225", &payload_state, &response);
 
   // Make sure the url index was reset to 0 because of the new response.
   EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
 }
 
+TEST(PayloadStateTest, AllCountersGetUpdatedProperlyOnErrorCodesAndEvents) {
+  OmahaResponse response;
+  PayloadState payload_state;
+  NiceMock<PrefsMock> prefs;
+
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 0)).Times(2);
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 1)).Times(1);
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 2)).Times(1);
+
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 0)).Times(4);
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 1)).Times(2);
+
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 0)).Times(7);
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 1)).Times(2);
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 2)).Times(1);
+
+  EXPECT_TRUE(payload_state.Initialize(&prefs));
+
+  SetupPayloadStateWith2Urls("Hash5873", &payload_state, &response);
+
+  // This should advance the URL index.
+  payload_state.UpdateFailed(kActionCodeDownloadMetadataSignatureMismatch);
+  EXPECT_EQ(0, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(1, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+
+  // This should advance the failure count only.
+  payload_state.UpdateFailed(kActionCodeDownloadTransferError);
+  EXPECT_EQ(0, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(1, payload_state.GetUrlIndex());
+  EXPECT_EQ(1, payload_state.GetUrlFailureCount());
+
+  // This should advance the failure count only.
+  payload_state.UpdateFailed(kActionCodeDownloadTransferError);
+  EXPECT_EQ(0, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(1, payload_state.GetUrlIndex());
+  EXPECT_EQ(2, payload_state.GetUrlFailureCount());
+
+  // This should advance the URL index as we've reached the
+  // max failure count and reset the failure count for the new URL index.
+  // This should also wrap around the URL index and thus cause the payload
+  // attempt number to be incremented.
+  payload_state.UpdateFailed(kActionCodeDownloadTransferError);
+  EXPECT_EQ(1, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+
+  // This should advance the URL index.
+  payload_state.UpdateFailed(kActionCodePayloadHashMismatchError);
+  EXPECT_EQ(1, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(1, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+
+  // This should advance the URL index and payload attempt number due to
+  // wrap-around of URL index.
+  payload_state.UpdateFailed(kActionCodeDownloadMetadataSignatureMissingError);
+  EXPECT_EQ(2, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+
+  // This HTTP error code should only increase the failure count.
+  payload_state.UpdateFailed(static_cast<ActionExitCode>(
+      kActionCodeOmahaRequestHTTPResponseBase + 404));
+  EXPECT_EQ(2, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(1, payload_state.GetUrlFailureCount());
+
+  // And that failure count should be reset when we download some bytes
+  // afterwards.
+  payload_state.DownloadProgress(100);
+  EXPECT_EQ(2, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+
+  // Now, slightly change the response and set it again.
+  SetupPayloadStateWith2Urls("Hash8532", &payload_state, &response);
+
+  // Make sure the url index was reset to 0 because of the new response.
+  EXPECT_EQ(0, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+}
+
+TEST(PayloadStateTest, PayloadAttemptNumberIncreasesOnSuccessfulDownload) {
+  OmahaResponse response;
+  PayloadState payload_state;
+  NiceMock<PrefsMock> prefs;
+
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 0)).Times(1);
+  EXPECT_CALL(prefs, SetInt64(kPrefsPayloadAttemptNumber, 1)).Times(1);
+
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlIndex, 0)).Times(1);
+  EXPECT_CALL(prefs, SetInt64(kPrefsCurrentUrlFailureCount, 0)).Times(1);
+
+  EXPECT_TRUE(payload_state.Initialize(&prefs));
+
+  SetupPayloadStateWith2Urls("Hash8593", &payload_state, &response);
+
+  // This should just advance the payload attempt number;
+  EXPECT_EQ(0, payload_state.GetPayloadAttemptNumber());
+  payload_state.DownloadComplete();
+  EXPECT_EQ(1, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+}
+
+TEST(PayloadStateTest, SetResponseResetsInvalidUrlIndex) {
+  OmahaResponse response;
+  PayloadState payload_state;
+  NiceMock<PrefsMock> prefs;
+
+  EXPECT_TRUE(payload_state.Initialize(&prefs));
+  SetupPayloadStateWith2Urls("Hash4427", &payload_state, &response);
+
+  // Generate enough events to advance URL index, failure count and
+  // payload attempt number all to 1.
+  payload_state.DownloadComplete();
+  payload_state.UpdateFailed(kActionCodeDownloadMetadataSignatureMismatch);
+  payload_state.UpdateFailed(kActionCodeDownloadTransferError);
+  EXPECT_EQ(1, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(1, payload_state.GetUrlIndex());
+  EXPECT_EQ(1, payload_state.GetUrlFailureCount());
+
+  // Now, simulate a corrupted url index on persisted store which gets
+  // loaded when update_engine restarts. Using a different prefs object
+  // so as to not bother accounting for the uninteresting calls above.
+  NiceMock<PrefsMock> prefs2;
+  EXPECT_CALL(prefs2, Exists(_)).WillRepeatedly(Return(true));
+  EXPECT_CALL(prefs2, GetInt64(kPrefsPayloadAttemptNumber, _));
+  EXPECT_CALL(prefs2, GetInt64(kPrefsCurrentUrlIndex, _))
+      .WillOnce(DoAll(SetArgumentPointee<1>(2), Return(true)));
+  EXPECT_CALL(prefs2, GetInt64(kPrefsCurrentUrlFailureCount, _));
+
+  // Note: This will be a different payload object, but the response should
+  // have the same hash as before so as to not trivially reset because the
+  // response was different. We want to specifically test that even if the
+  // response is same, we should reset the state if we find it corrupted.
+  EXPECT_TRUE(payload_state.Initialize(&prefs2));
+  SetupPayloadStateWith2Urls("Hash4427", &payload_state, &response);
+
+  // Make sure all counters get reset to 0 because of the corrupted URL index
+  // we supplied above.
+  EXPECT_EQ(0, payload_state.GetPayloadAttemptNumber());
+  EXPECT_EQ(0, payload_state.GetUrlIndex());
+  EXPECT_EQ(0, payload_state.GetUrlFailureCount());
+}
 }

diff --git a/prefs.cc b/prefs.cc
index 400056e..03a122a 100644
--- a/prefs.cc
+++ b/prefs.cc

@@ -37,8 +37,10 @@
 const char kPrefsWallClockWaitPeriod[] = "wall-clock-wait-period";
 const char kPrefsUpdateFirstSeenAt[] = "update-first-seen-at";
 
+const char kPrefsPayloadAttemptNumber[] = "payload-attempt-number";
 const char kPrefsCurrentResponse[] = "current-omaha-response";
 const char kPrefsCurrentUrlIndex[] = "current-url-index";
+const char kPrefsCurrentUrlFailureCount[] = "current-url-failure-count";
 
 bool Prefs::Init(const FilePath& prefs_dir) {
   prefs_dir_ = prefs_dir;

diff --git a/prefs_interface.h b/prefs_interface.h
index cc7c4d1..7bc56ca 100644
--- a/prefs_interface.h
+++ b/prefs_interface.h

@@ -27,8 +27,10 @@
 extern const char kPrefsUpdateCheckCount[];
 extern const char kPrefsWallClockWaitPeriod[];
 extern const char kPrefsUpdateFirstSeenAt[];
+extern const char kPrefsPayloadAttemptNumber[];
 extern const char kPrefsCurrentResponse[];
 extern const char kPrefsCurrentUrlIndex[];
+extern const char kPrefsCurrentUrlFailureCount[];
 
 // The prefs interface allows access to a persistent preferences
 // store. The two reasons for providing this as an interface are

diff --git a/system_state.cc b/system_state.cc
index d13e18c..78d5036 100644
--- a/system_state.cc
+++ b/system_state.cc

@@ -34,29 +34,4 @@
   return file_util::PathExists(FilePath(kOOBECompletedMarker));
 }
 
-void RealSystemState::set_device_policy(
-    const policy::DevicePolicy* device_policy) {
-  device_policy_ = device_policy;
-}
-
-const policy::DevicePolicy* RealSystemState::device_policy() const {
-  return device_policy_;
-}
-
-ConnectionManager* RealSystemState::connection_manager() {
-  return &connection_manager_;
-}
-
-MetricsLibraryInterface* RealSystemState::metrics_lib() {
-  return &metrics_lib_;
-}
-
-PrefsInterface* RealSystemState::prefs() {
-  return &prefs_;
-}
-
-PayloadState* RealSystemState::payload_state() {
-  return &payload_state_;
-}
-
 }  // namespace chromeos_update_engine

diff --git a/system_state.h b/system_state.h
index 04ad382..4dee1eb 100644
--- a/system_state.h
+++ b/system_state.h

@@ -44,8 +44,8 @@
   // Gets the interface object for persisted store.
   virtual PrefsInterface* prefs() = 0;
 
-  // Gets the URL State object.
-  virtual PayloadState* payload_state() = 0;
+  // Gets the interface for the payload state object.
+  virtual PayloadStateInterface* payload_state() = 0;
 };
 
 // A real implementation of the SystemStateInterface which is
@@ -58,16 +58,30 @@
 
   virtual bool IsOOBEComplete();
 
-  virtual void set_device_policy(const policy::DevicePolicy* device_policy);
-  virtual const policy::DevicePolicy* device_policy() const;
+  virtual inline void set_device_policy(
+      const policy::DevicePolicy* device_policy) {
+    device_policy_ = device_policy;
+  }
 
-  virtual ConnectionManager* connection_manager();
+  virtual inline const policy::DevicePolicy* device_policy() const {
+    return device_policy_;
+  }
 
-  virtual MetricsLibraryInterface* metrics_lib();
+  virtual inline ConnectionManager* connection_manager() {
+    return &connection_manager_;
+  }
 
-  virtual PrefsInterface* prefs();
+  virtual inline MetricsLibraryInterface* metrics_lib() {
+    return &metrics_lib_;
+  }
 
-  virtual PayloadState* payload_state();
+  virtual inline PrefsInterface* prefs() {
+    return &prefs_;
+  }
+
+  virtual inline PayloadStateInterface* payload_state() {
+    return &payload_state_;
+  }
 
   // Initializs this concrete object. Other methods should be invoked only
   // if the object has been initialized successfully.

diff --git a/update_attempter.cc b/update_attempter.cc
index d799c33..a0ee020 100644
--- a/update_attempter.cc
+++ b/update_attempter.cc

@@ -150,7 +150,6 @@
                              bool interactive,
                              bool is_test_mode,
                              bool is_user_initiated) {
-  LOG(INFO) << "Update called";
   chrome_proxy_resolver_.Init();
   fake_update_success_ = false;
   if (status_ == UPDATE_STATUS_UPDATED_NEED_REBOOT) {
@@ -184,7 +183,6 @@
   // before any update processing starts.
   start_action_processor_ = true;
   UpdateBootFlags();
-  LOG(INFO) << "Update finished";
 }
 
 bool UpdateAttempter::CalculateUpdateParams(const string& app_version,

diff --git a/update_attempter_unittest.cc b/update_attempter_unittest.cc
index 23e60fe..aa6e94f 100644
--- a/update_attempter_unittest.cc
+++ b/update_attempter_unittest.cc

@@ -252,16 +252,12 @@
                                      OmahaRequestAction::StaticType())))
       .Times(1);
   EXPECT_CALL(*processor_, StartProcessing()).Times(1);
-  OmahaResponse response;
-  response.payload_urls.push_back("http://url");
-  response.payload_urls.push_back("https://url");
-  mock_system_state_.mock_payload_state()->SetResponse(response);
   ActionExitCode err = kActionCodeError;
+  EXPECT_CALL(*mock_system_state_.mock_payload_state(), UpdateFailed(err));
   attempter_.error_event_.reset(new OmahaEvent(OmahaEvent::kTypeUpdateComplete,
                                                OmahaEvent::kResultError,
                                                err));
   attempter_.ScheduleErrorEventAction();
-  EXPECT_EQ(1, mock_system_state_.mock_payload_state()->GetUrlIndex());
   EXPECT_EQ(UPDATE_STATUS_REPORTING_ERROR_EVENT, attempter_.status());
 }
 

diff --git a/utils.cc b/utils.cc
index b4763e4..5a1d812 100644
--- a/utils.cc
+++ b/utils.cc

@@ -709,26 +709,34 @@
                       exp_time.second);
 }
 
-void SendErrorCodeToUma(MetricsLibraryInterface* metrics_lib,
-                        ActionExitCode code)
-{
-  string metric = utils::IsNormalBootMode() ? "Installer.NormalErrorCodes" :
-                                              "Installer.DevModeErrorCodes";
-
+ActionExitCode GetBaseErrorCode(ActionExitCode code) {
   // Ignore the higher order bits in the code by applying the mask as
   // we want the enumerations to be in the small contiguous range
   // with values less than kActionCodeUmaReportedMax.
-  int reported_code = code & kActualCodeMask;
+  ActionExitCode base_code = static_cast<ActionExitCode>(
+      code & kActualCodeMask);
 
-  // Make additional adjustments required for UMA.
-  // TODO(jaysri): Move this logic to UeErrorCode.cc when we
-  // fix BUG 34369.
-  if (reported_code >= kActionCodeOmahaRequestHTTPResponseBase) {
+  // Make additional adjustments required for UMA and error classification.
+  // TODO(jaysri): Move this logic to UeErrorCode.cc when we fix
+  // chromium-os:34369.
+  if (base_code >= kActionCodeOmahaRequestHTTPResponseBase) {
     // Since we want to keep the enums to a small value, aggregate all HTTP
-    // errors into this one bucket for UMA purposes.
-    reported_code = kActionCodeOmahaErrorInHTTPResponse;
+    // errors into this one bucket for UMA and error classification purposes.
+    base_code = kActionCodeOmahaErrorInHTTPResponse;
   }
 
+  return base_code;
+}
+
+
+
+void SendErrorCodeToUma(MetricsLibraryInterface* metrics_lib,
+                        ActionExitCode code) {
+  string metric = utils::IsNormalBootMode() ? "Installer.NormalErrorCodes" :
+                                              "Installer.DevModeErrorCodes";
+
+  ActionExitCode reported_code = GetBaseErrorCode(code);
+
   LOG(INFO) << "Sending error code " << reported_code
             << " to UMA metric: " << metric;
   metrics_lib->SendEnumToUMA(metric, reported_code, kActionCodeUmaReportedMax);

diff --git a/utils.h b/utils.h
index ffc2f39..c2cd21d 100644
--- a/utils.h
+++ b/utils.h

@@ -279,9 +279,16 @@
 // when applicable.
 std::string FormatTimeDelta(base::TimeDelta delta);
 
+// This method transforms the given error code to be suitable for UMA and
+// for error classification purposes by removing the higher order bits and
+// aggregating error codes beyond the enum range, etc. This method is
+// idempotent, i.e. if called with a value previously returned by this method,
+// it'll return the same value again.
+ActionExitCode GetBaseErrorCode(ActionExitCode code);
+
 // Sends the error code to the appropriate bucket in UMA using the metrics_lib
-// interface. This method also massages the error code to be suitable for UMA
-// purposes.
+// interface. This method uses GetBaseErrorCode to process the given code and
+// report only the base error code.
 void SendErrorCodeToUma(MetricsLibraryInterface* metrics_lib,
                         ActionExitCode code);
 }  // namespace utils
commit	2b5a0f065187fd19179e3809148dbfc376ada7a0	[log] [tgz]
author	Jay Srinivasan <jaysri@chromium.org>	Wed Dec 19 17:25:56 2012 -0800
committer	Gerrit <chrome-bot@google.com>	Thu Dec 20 23:01:52 2012 -0800
tree	3f7728d90f2ad4f34567300e731d5872352b6b6e
parent	07428b5d247093f80434d742860749c3b9b397f4 [diff]