blob: 9c4a7e456b4b093909c872df3468103a8720e3ee [file] [log] [blame]
Jay Srinivasan6f6ea002012-12-14 11:26:28 -08001// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "update_engine/payload_state.h"
6
Jay Srinivasan08262882012-12-28 19:29:43 -08007#include <algorithm>
8
Jay Srinivasan6f6ea002012-12-14 11:26:28 -08009#include <base/logging.h>
10#include <base/stringprintf.h>
11
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080012#include "update_engine/prefs.h"
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080013#include "update_engine/utils.h"
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080014
Jay Srinivasan08262882012-12-28 19:29:43 -080015using base::Time;
16using base::TimeDelta;
17using std::min;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080018using std::string;
19
20namespace chromeos_update_engine {
21
Jay Srinivasan08262882012-12-28 19:29:43 -080022// We want to upperbound backoffs to 16 days
23static const uint32_t kMaxBackoffDays = 16;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080024
Jay Srinivasan08262882012-12-28 19:29:43 -080025// We want to randomize retry attempts after the backoff by +/- 6 hours.
26static const uint32_t kMaxBackoffFuzzMinutes = 12 * 60;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080027
28bool PayloadState::Initialize(PrefsInterface* prefs) {
29 CHECK(prefs);
30 prefs_ = prefs;
Jay Srinivasan08262882012-12-28 19:29:43 -080031 LoadResponseSignature();
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080032 LoadPayloadAttemptNumber();
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080033 LoadUrlIndex();
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080034 LoadUrlFailureCount();
Jay Srinivasan08262882012-12-28 19:29:43 -080035 LoadBackoffExpiryTime();
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080036 return true;
37}
38
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080039void PayloadState::SetResponse(const OmahaResponse& omaha_response) {
Jay Srinivasan08262882012-12-28 19:29:43 -080040 // Always store the latest response.
41 response_ = omaha_response;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080042
Jay Srinivasan08262882012-12-28 19:29:43 -080043 // Check if the "signature" of this response (i.e. the fields we care about)
44 // has changed.
45 string new_response_signature = CalculateResponseSignature();
46 bool has_response_changed = (response_signature_ != new_response_signature);
47
48 // If the response has changed, we should persist the new signature and
49 // clear away all the existing state.
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080050 if (has_response_changed) {
Jay Srinivasan08262882012-12-28 19:29:43 -080051 LOG(INFO) << "Resetting all persisted state as this is a new response";
52 SetResponseSignature(new_response_signature);
53 ResetPersistedState();
54 return;
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080055 }
56
Jay Srinivasan08262882012-12-28 19:29:43 -080057 // This is the earliest point at which we can validate whether the URL index
58 // we loaded from the persisted state is a valid value. If the response
59 // hasn't changed but the URL index is invalid, it's indicative of some
60 // tampering of the persisted state.
61 if (url_index_ >= GetNumUrls()) {
62 LOG(INFO) << "Resetting all payload state as the url index seems to have "
63 "been tampered with";
64 ResetPersistedState();
65 return;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080066 }
67}
68
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080069void PayloadState::DownloadComplete() {
70 LOG(INFO) << "Payload downloaded successfully";
71 IncrementPayloadAttemptNumber();
72}
73
74void PayloadState::DownloadProgress(size_t count) {
75 if (count == 0)
76 return;
77
78 // We've received non-zero bytes from a recent download operation. Since our
79 // URL failure count is meant to penalize a URL only for consecutive
80 // failures, downloading bytes successfully means we should reset the failure
81 // count (as we know at least that the URL is working). In future, we can
82 // design this to be more sophisticated to check for more intelligent failure
83 // patterns, but right now, even 1 byte downloaded will mark the URL to be
84 // good unless it hits 10 (or configured number of) consecutive failures
85 // again.
86
87 if (GetUrlFailureCount() == 0)
88 return;
89
90 LOG(INFO) << "Resetting failure count of Url" << GetUrlIndex()
91 << " to 0 as we received " << count << " bytes successfully";
92 SetUrlFailureCount(0);
93}
94
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080095void PayloadState::UpdateFailed(ActionExitCode error) {
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080096 ActionExitCode base_error = utils::GetBaseErrorCode(error);
Jay Srinivasan55f50c22013-01-10 19:24:35 -080097 LOG(INFO) << "Updating payload state for error code: " << base_error
98 << " (" << utils::CodeToString(base_error) << ")";
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080099
Jay Srinivasan08262882012-12-28 19:29:43 -0800100 if (GetNumUrls() == 0) {
101 // This means we got this error even before we got a valid Omaha response.
102 // So we should not advance the url_index_ in such cases.
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800103 LOG(INFO) << "Ignoring failures until we get a valid Omaha response.";
104 return;
105 }
106
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800107 switch (base_error) {
108 // Errors which are good indicators of a problem with a particular URL or
109 // the protocol used in the URL or entities in the communication channel
110 // (e.g. proxies). We should try the next available URL in the next update
111 // check to quickly recover from these errors.
112 case kActionCodePayloadHashMismatchError:
113 case kActionCodePayloadSizeMismatchError:
114 case kActionCodeDownloadPayloadVerificationError:
115 case kActionCodeDownloadPayloadPubKeyVerificationError:
116 case kActionCodeSignedDeltaPayloadExpectedError:
117 case kActionCodeDownloadInvalidMetadataMagicString:
118 case kActionCodeDownloadSignatureMissingInManifest:
119 case kActionCodeDownloadManifestParseError:
120 case kActionCodeDownloadMetadataSignatureError:
121 case kActionCodeDownloadMetadataSignatureVerificationError:
122 case kActionCodeDownloadMetadataSignatureMismatch:
123 case kActionCodeDownloadOperationHashVerificationError:
124 case kActionCodeDownloadOperationExecutionError:
125 case kActionCodeDownloadOperationHashMismatch:
126 case kActionCodeDownloadInvalidMetadataSize:
127 case kActionCodeDownloadInvalidMetadataSignature:
128 case kActionCodeDownloadOperationHashMissingError:
129 case kActionCodeDownloadMetadataSignatureMissingError:
130 IncrementUrlIndex();
131 break;
132
133 // Errors which seem to be just transient network/communication related
134 // failures and do not indicate any inherent problem with the URL itself.
135 // So, we should keep the current URL but just increment the
136 // failure count to give it more chances. This way, while we maximize our
137 // chances of downloading from the URLs that appear earlier in the response
138 // (because download from a local server URL that appears earlier in a
139 // response is preferable than downloading from the next URL which could be
140 // a internet URL and thus could be more expensive).
141 case kActionCodeError:
142 case kActionCodeDownloadTransferError:
143 case kActionCodeDownloadWriteError:
144 case kActionCodeDownloadStateInitializationError:
145 case kActionCodeOmahaErrorInHTTPResponse: // Aggregate code for HTTP errors.
146 IncrementFailureCount();
147 break;
148
149 // Errors which are not specific to a URL and hence shouldn't result in
150 // the URL being penalized. This can happen in two cases:
151 // 1. We haven't started downloading anything: These errors don't cost us
152 // anything in terms of actual payload bytes, so we should just do the
153 // regular retries at the next update check.
154 // 2. We have successfully downloaded the payload: In this case, the
155 // payload attempt number would have been incremented and would take care
Jay Srinivasan08262882012-12-28 19:29:43 -0800156 // of the backoff at the next update check.
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800157 // In either case, there's no need to update URL index or failure count.
158 case kActionCodeOmahaRequestError:
159 case kActionCodeOmahaResponseHandlerError:
160 case kActionCodePostinstallRunnerError:
161 case kActionCodeFilesystemCopierError:
162 case kActionCodeInstallDeviceOpenError:
163 case kActionCodeKernelDeviceOpenError:
164 case kActionCodeDownloadNewPartitionInfoError:
165 case kActionCodeNewRootfsVerificationError:
166 case kActionCodeNewKernelVerificationError:
167 case kActionCodePostinstallBootedFromFirmwareB:
168 case kActionCodeOmahaRequestEmptyResponseError:
169 case kActionCodeOmahaRequestXMLParseError:
170 case kActionCodeOmahaResponseInvalid:
171 case kActionCodeOmahaUpdateIgnoredPerPolicy:
172 case kActionCodeOmahaUpdateDeferredPerPolicy:
Jay Srinivasan08262882012-12-28 19:29:43 -0800173 case kActionCodeOmahaUpdateDeferredForBackoff:
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800174 LOG(INFO) << "Not incrementing URL index or failure count for this error";
175 break;
176
177 case kActionCodeSuccess: // success code
178 case kActionCodeSetBootableFlagError: // unused
179 case kActionCodeUmaReportedMax: // not an error code
180 case kActionCodeOmahaRequestHTTPResponseBase: // aggregated already
Jay Srinivasan55f50c22013-01-10 19:24:35 -0800181 case kActionCodeDevModeFlag: // not an error code
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800182 case kActionCodeResumedFlag: // not an error code
Jay Srinivasan55f50c22013-01-10 19:24:35 -0800183 case kActionCodeTestImageFlag: // not an error code
184 case kActionCodeTestOmahaUrlFlag: // not an error code
185 case kSpecialFlags: // not an error code
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800186 // These shouldn't happen. Enumerating these explicitly here so that we
187 // can let the compiler warn about new error codes that are added to
188 // action_processor.h but not added here.
189 LOG(WARNING) << "Unexpected error code for UpdateFailed";
190 break;
191
192 // Note: Not adding a default here so as to let the compiler warn us of
193 // any new enums that were added in the .h but not listed in this switch.
194 }
195}
196
Jay Srinivasan08262882012-12-28 19:29:43 -0800197bool PayloadState::ShouldBackoffDownload() {
198 if (response_.disable_payload_backoff) {
199 LOG(INFO) << "Payload backoff logic is disabled. "
200 "Can proceed with the download";
201 return false;
202 }
203
204 if (response_.is_delta_payload) {
205 // If delta payloads fail, we want to fallback quickly to full payloads as
206 // they are more likely to succeed. Exponential backoffs would greatly
207 // slow down the fallback to full payloads. So we don't backoff for delta
208 // payloads.
209 LOG(INFO) << "No backoffs for delta payloads. "
210 << "Can proceed with the download";
211 return false;
212 }
213
214 if (!utils::IsOfficialBuild()) {
215 // Backoffs are needed only for official builds. We do not want any delays
216 // or update failures due to backoffs during testing or development.
217 LOG(INFO) << "No backoffs for test/dev images. "
218 << "Can proceed with the download";
219 return false;
220 }
221
222 if (backoff_expiry_time_.is_null()) {
223 LOG(INFO) << "No backoff expiry time has been set. "
224 << "Can proceed with the download";
225 return false;
226 }
227
228 if (backoff_expiry_time_ < Time::Now()) {
229 LOG(INFO) << "The backoff expiry time ("
230 << utils::ToString(backoff_expiry_time_)
231 << ") has elapsed. Can proceed with the download";
232 return false;
233 }
234
235 LOG(INFO) << "Cannot proceed with downloads as we need to backoff until "
236 << utils::ToString(backoff_expiry_time_);
237 return true;
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800238}
239
240void PayloadState::IncrementPayloadAttemptNumber() {
Jay Srinivasan08262882012-12-28 19:29:43 -0800241 if (response_.is_delta_payload) {
242 LOG(INFO) << "Not incrementing payload attempt number for delta payloads";
243 return;
244 }
245
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800246 LOG(INFO) << "Incrementing the payload attempt number";
247 SetPayloadAttemptNumber(GetPayloadAttemptNumber() + 1);
Jay Srinivasan08262882012-12-28 19:29:43 -0800248 UpdateBackoffExpiryTime();
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800249}
250
251void PayloadState::IncrementUrlIndex() {
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800252 uint32_t next_url_index = GetUrlIndex() + 1;
Jay Srinivasan08262882012-12-28 19:29:43 -0800253 if (next_url_index < GetNumUrls()) {
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800254 LOG(INFO) << "Incrementing the URL index for next attempt";
255 SetUrlIndex(next_url_index);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800256 } else {
257 LOG(INFO) << "Resetting the current URL index (" << GetUrlIndex() << ") to "
Jay Srinivasan08262882012-12-28 19:29:43 -0800258 << "0 as we only have " << GetNumUrls() << " URL(s)";
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800259 SetUrlIndex(0);
260 IncrementPayloadAttemptNumber();
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800261 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800262
263 // Whenever we update the URL index, we should also clear the URL failure
264 // count so we can start over fresh for the new URL.
265 SetUrlFailureCount(0);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800266}
267
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800268void PayloadState::IncrementFailureCount() {
269 uint32_t next_url_failure_count = GetUrlFailureCount() + 1;
Jay Srinivasan08262882012-12-28 19:29:43 -0800270 if (next_url_failure_count < response_.max_failure_count_per_url) {
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800271 LOG(INFO) << "Incrementing the URL failure count";
272 SetUrlFailureCount(next_url_failure_count);
273 } else {
274 LOG(INFO) << "Reached max number of failures for Url" << GetUrlIndex()
275 << ". Trying next available URL";
276 IncrementUrlIndex();
277 }
278}
279
Jay Srinivasan08262882012-12-28 19:29:43 -0800280void PayloadState::UpdateBackoffExpiryTime() {
281 if (response_.disable_payload_backoff) {
282 LOG(INFO) << "Resetting backoff expiry time as payload backoff is disabled";
283 SetBackoffExpiryTime(Time());
284 return;
285 }
286
287 if (GetPayloadAttemptNumber() == 0) {
288 SetBackoffExpiryTime(Time());
289 return;
290 }
291
292 // Since we're doing left-shift below, make sure we don't shift more
293 // than this. E.g. if uint32_t is 4-bytes, don't left-shift more than 30 bits,
294 // since we don't expect value of kMaxBackoffDays to be more than 100 anyway.
295 uint32_t num_days = 1; // the value to be shifted.
296 const uint32_t kMaxShifts = (sizeof(num_days) * 8) - 2;
297
298 // Normal backoff days is 2 raised to (payload_attempt_number - 1).
299 // E.g. if payload_attempt_number is over 30, limit power to 30.
300 uint32_t power = min(GetPayloadAttemptNumber() - 1, kMaxShifts);
301
302 // The number of days is the minimum of 2 raised to (payload_attempt_number
303 // - 1) or kMaxBackoffDays.
304 num_days = min(num_days << power, kMaxBackoffDays);
305
306 // We don't want all retries to happen exactly at the same time when
307 // retrying after backoff. So add some random minutes to fuzz.
308 int fuzz_minutes = utils::FuzzInt(0, kMaxBackoffFuzzMinutes);
309 TimeDelta next_backoff_interval = TimeDelta::FromDays(num_days) +
310 TimeDelta::FromMinutes(fuzz_minutes);
311 LOG(INFO) << "Incrementing the backoff expiry time by "
312 << utils::FormatTimeDelta(next_backoff_interval);
313 SetBackoffExpiryTime(Time::Now() + next_backoff_interval);
314}
315
316void PayloadState::ResetPersistedState() {
317 SetPayloadAttemptNumber(0);
318 SetUrlIndex(0);
319 SetUrlFailureCount(0);
320 UpdateBackoffExpiryTime(); // This will reset the backoff expiry time.
321}
322
323string PayloadState::CalculateResponseSignature() {
324 string response_sign = StringPrintf("NumURLs = %d\n",
325 response_.payload_urls.size());
326
327 for (size_t i = 0; i < response_.payload_urls.size(); i++)
328 response_sign += StringPrintf("Url%d = %s\n",
329 i, response_.payload_urls[i].c_str());
330
331 response_sign += StringPrintf("Payload Size = %llu\n"
332 "Payload Sha256 Hash = %s\n"
333 "Metadata Size = %llu\n"
334 "Metadata Signature = %s\n"
335 "Is Delta Payload = %d\n"
336 "Max Failure Count Per Url = %d\n"
337 "Disable Payload Backoff = %d\n",
338 response_.size,
339 response_.hash.c_str(),
340 response_.metadata_size,
341 response_.metadata_signature.c_str(),
342 response_.is_delta_payload,
343 response_.max_failure_count_per_url,
344 response_.disable_payload_backoff);
345 return response_sign;
346}
347
348void PayloadState::LoadResponseSignature() {
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800349 CHECK(prefs_);
350 string stored_value;
Jay Srinivasan08262882012-12-28 19:29:43 -0800351 if (prefs_->Exists(kPrefsCurrentResponseSignature) &&
352 prefs_->GetString(kPrefsCurrentResponseSignature, &stored_value)) {
353 SetResponseSignature(stored_value);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800354 }
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800355}
356
Jay Srinivasan08262882012-12-28 19:29:43 -0800357void PayloadState::SetResponseSignature(string response_signature) {
358 CHECK(prefs_);
359 response_signature_ = response_signature;
360 LOG(INFO) << "Current Response Signature = \n" << response_signature_;
361 prefs_->SetString(kPrefsCurrentResponseSignature, response_signature_);
362}
363
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800364void PayloadState::LoadPayloadAttemptNumber() {
365 CHECK(prefs_);
366 int64_t stored_value;
367 if (prefs_->Exists(kPrefsPayloadAttemptNumber) &&
368 prefs_->GetInt64(kPrefsPayloadAttemptNumber, &stored_value)) {
369 if (stored_value < 0) {
370 LOG(ERROR) << "Invalid payload attempt number (" << stored_value
371 << ") in persisted state. Defaulting to 0";
372 stored_value = 0;
373 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800374 SetPayloadAttemptNumber(stored_value);
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800375 }
376}
377
378void PayloadState::SetPayloadAttemptNumber(uint32_t payload_attempt_number) {
379 CHECK(prefs_);
380 payload_attempt_number_ = payload_attempt_number;
381 LOG(INFO) << "Payload Attempt Number = " << payload_attempt_number_;
382 prefs_->SetInt64(kPrefsPayloadAttemptNumber, payload_attempt_number_);
383}
384
385void PayloadState::LoadUrlIndex() {
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800386 CHECK(prefs_);
387 int64_t stored_value;
388 if (prefs_->Exists(kPrefsCurrentUrlIndex) &&
389 prefs_->GetInt64(kPrefsCurrentUrlIndex, &stored_value)) {
Jay Srinivasan08262882012-12-28 19:29:43 -0800390 // We only check for basic sanity value here. Detailed check will be
391 // done in SetResponse once the first response comes in.
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800392 if (stored_value < 0) {
393 LOG(ERROR) << "Invalid URL Index (" << stored_value
394 << ") in persisted state. Defaulting to 0";
395 stored_value = 0;
396 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800397 SetUrlIndex(stored_value);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800398 }
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800399}
400
401void PayloadState::SetUrlIndex(uint32_t url_index) {
402 CHECK(prefs_);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800403 url_index_ = url_index;
404 LOG(INFO) << "Current URL Index = " << url_index_;
405 prefs_->SetInt64(kPrefsCurrentUrlIndex, url_index_);
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800406}
407
408void PayloadState::LoadUrlFailureCount() {
409 CHECK(prefs_);
410 int64_t stored_value;
411 if (prefs_->Exists(kPrefsCurrentUrlFailureCount) &&
412 prefs_->GetInt64(kPrefsCurrentUrlFailureCount, &stored_value)) {
413 if (stored_value < 0) {
414 LOG(ERROR) << "Invalid URL Failure count (" << stored_value
415 << ") in persisted state. Defaulting to 0";
416 stored_value = 0;
417 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800418 SetUrlFailureCount(stored_value);
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800419 }
420}
421
422void PayloadState::SetUrlFailureCount(uint32_t url_failure_count) {
423 CHECK(prefs_);
424 url_failure_count_ = url_failure_count;
425 LOG(INFO) << "Current URL (Url" << GetUrlIndex()
426 << ")'s Failure Count = " << url_failure_count_;
427 prefs_->SetInt64(kPrefsCurrentUrlFailureCount, url_failure_count_);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800428}
429
Jay Srinivasan08262882012-12-28 19:29:43 -0800430void PayloadState::LoadBackoffExpiryTime() {
431 CHECK(prefs_);
432 int64_t stored_value;
433 if (!prefs_->Exists(kPrefsBackoffExpiryTime))
434 return;
435
436 if (!prefs_->GetInt64(kPrefsBackoffExpiryTime, &stored_value))
437 return;
438
439 Time stored_time = Time::FromInternalValue(stored_value);
440 if (stored_time > Time::Now() + TimeDelta::FromDays(kMaxBackoffDays)) {
441 LOG(ERROR) << "Invalid backoff expiry time ("
442 << utils::ToString(stored_time)
443 << ") in persisted state. Resetting.";
444 stored_time = Time();
445 }
446 SetBackoffExpiryTime(stored_time);
447}
448
449void PayloadState::SetBackoffExpiryTime(const Time& new_time) {
450 CHECK(prefs_);
451 backoff_expiry_time_ = new_time;
452 LOG(INFO) << "Backoff Expiry Time = "
453 << utils::ToString(backoff_expiry_time_);
454 prefs_->SetInt64(kPrefsBackoffExpiryTime,
455 backoff_expiry_time_.ToInternalValue());
456}
457
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800458} // namespace chromeos_update_engine