blob: 26a3a3faecde89b7d52f650d822f4c4850eaf970 [file] [log] [blame]
Jay Srinivasan6f6ea002012-12-14 11:26:28 -08001// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "update_engine/payload_state.h"
6
Jay Srinivasan08262882012-12-28 19:29:43 -08007#include <algorithm>
8
Jay Srinivasan6f6ea002012-12-14 11:26:28 -08009#include <base/logging.h>
10#include <base/stringprintf.h>
11
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080012#include "update_engine/prefs.h"
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080013#include "update_engine/utils.h"
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080014
Jay Srinivasan08262882012-12-28 19:29:43 -080015using base::Time;
16using base::TimeDelta;
17using std::min;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080018using std::string;
19
20namespace chromeos_update_engine {
21
Jay Srinivasan08262882012-12-28 19:29:43 -080022// We want to upperbound backoffs to 16 days
23static const uint32_t kMaxBackoffDays = 16;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080024
Jay Srinivasan08262882012-12-28 19:29:43 -080025// We want to randomize retry attempts after the backoff by +/- 6 hours.
26static const uint32_t kMaxBackoffFuzzMinutes = 12 * 60;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080027
28bool PayloadState::Initialize(PrefsInterface* prefs) {
29 CHECK(prefs);
30 prefs_ = prefs;
Jay Srinivasan08262882012-12-28 19:29:43 -080031 LoadResponseSignature();
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080032 LoadPayloadAttemptNumber();
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080033 LoadUrlIndex();
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080034 LoadUrlFailureCount();
Jay Srinivasan08262882012-12-28 19:29:43 -080035 LoadBackoffExpiryTime();
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080036 return true;
37}
38
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080039void PayloadState::SetResponse(const OmahaResponse& omaha_response) {
Jay Srinivasan08262882012-12-28 19:29:43 -080040 // Always store the latest response.
41 response_ = omaha_response;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080042
Jay Srinivasan08262882012-12-28 19:29:43 -080043 // Check if the "signature" of this response (i.e. the fields we care about)
44 // has changed.
45 string new_response_signature = CalculateResponseSignature();
46 bool has_response_changed = (response_signature_ != new_response_signature);
47
48 // If the response has changed, we should persist the new signature and
49 // clear away all the existing state.
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080050 if (has_response_changed) {
Jay Srinivasan08262882012-12-28 19:29:43 -080051 LOG(INFO) << "Resetting all persisted state as this is a new response";
52 SetResponseSignature(new_response_signature);
53 ResetPersistedState();
54 return;
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080055 }
56
Jay Srinivasan08262882012-12-28 19:29:43 -080057 // This is the earliest point at which we can validate whether the URL index
58 // we loaded from the persisted state is a valid value. If the response
59 // hasn't changed but the URL index is invalid, it's indicative of some
60 // tampering of the persisted state.
61 if (url_index_ >= GetNumUrls()) {
62 LOG(INFO) << "Resetting all payload state as the url index seems to have "
63 "been tampered with";
64 ResetPersistedState();
65 return;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080066 }
67}
68
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080069void PayloadState::DownloadComplete() {
70 LOG(INFO) << "Payload downloaded successfully";
71 IncrementPayloadAttemptNumber();
72}
73
74void PayloadState::DownloadProgress(size_t count) {
75 if (count == 0)
76 return;
77
78 // We've received non-zero bytes from a recent download operation. Since our
79 // URL failure count is meant to penalize a URL only for consecutive
80 // failures, downloading bytes successfully means we should reset the failure
81 // count (as we know at least that the URL is working). In future, we can
82 // design this to be more sophisticated to check for more intelligent failure
83 // patterns, but right now, even 1 byte downloaded will mark the URL to be
84 // good unless it hits 10 (or configured number of) consecutive failures
85 // again.
86
87 if (GetUrlFailureCount() == 0)
88 return;
89
90 LOG(INFO) << "Resetting failure count of Url" << GetUrlIndex()
91 << " to 0 as we received " << count << " bytes successfully";
92 SetUrlFailureCount(0);
93}
94
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080095void PayloadState::UpdateFailed(ActionExitCode error) {
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -080096 ActionExitCode base_error = utils::GetBaseErrorCode(error);
97 LOG(INFO) << "Updating payload state for error code: " << base_error;
Jay Srinivasan6f6ea002012-12-14 11:26:28 -080098
Jay Srinivasan08262882012-12-28 19:29:43 -080099 if (GetNumUrls() == 0) {
100 // This means we got this error even before we got a valid Omaha response.
101 // So we should not advance the url_index_ in such cases.
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800102 LOG(INFO) << "Ignoring failures until we get a valid Omaha response.";
103 return;
104 }
105
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800106 switch (base_error) {
107 // Errors which are good indicators of a problem with a particular URL or
108 // the protocol used in the URL or entities in the communication channel
109 // (e.g. proxies). We should try the next available URL in the next update
110 // check to quickly recover from these errors.
111 case kActionCodePayloadHashMismatchError:
112 case kActionCodePayloadSizeMismatchError:
113 case kActionCodeDownloadPayloadVerificationError:
114 case kActionCodeDownloadPayloadPubKeyVerificationError:
115 case kActionCodeSignedDeltaPayloadExpectedError:
116 case kActionCodeDownloadInvalidMetadataMagicString:
117 case kActionCodeDownloadSignatureMissingInManifest:
118 case kActionCodeDownloadManifestParseError:
119 case kActionCodeDownloadMetadataSignatureError:
120 case kActionCodeDownloadMetadataSignatureVerificationError:
121 case kActionCodeDownloadMetadataSignatureMismatch:
122 case kActionCodeDownloadOperationHashVerificationError:
123 case kActionCodeDownloadOperationExecutionError:
124 case kActionCodeDownloadOperationHashMismatch:
125 case kActionCodeDownloadInvalidMetadataSize:
126 case kActionCodeDownloadInvalidMetadataSignature:
127 case kActionCodeDownloadOperationHashMissingError:
128 case kActionCodeDownloadMetadataSignatureMissingError:
129 IncrementUrlIndex();
130 break;
131
132 // Errors which seem to be just transient network/communication related
133 // failures and do not indicate any inherent problem with the URL itself.
134 // So, we should keep the current URL but just increment the
135 // failure count to give it more chances. This way, while we maximize our
136 // chances of downloading from the URLs that appear earlier in the response
137 // (because download from a local server URL that appears earlier in a
138 // response is preferable than downloading from the next URL which could be
139 // a internet URL and thus could be more expensive).
140 case kActionCodeError:
141 case kActionCodeDownloadTransferError:
142 case kActionCodeDownloadWriteError:
143 case kActionCodeDownloadStateInitializationError:
144 case kActionCodeOmahaErrorInHTTPResponse: // Aggregate code for HTTP errors.
145 IncrementFailureCount();
146 break;
147
148 // Errors which are not specific to a URL and hence shouldn't result in
149 // the URL being penalized. This can happen in two cases:
150 // 1. We haven't started downloading anything: These errors don't cost us
151 // anything in terms of actual payload bytes, so we should just do the
152 // regular retries at the next update check.
153 // 2. We have successfully downloaded the payload: In this case, the
154 // payload attempt number would have been incremented and would take care
Jay Srinivasan08262882012-12-28 19:29:43 -0800155 // of the backoff at the next update check.
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800156 // In either case, there's no need to update URL index or failure count.
157 case kActionCodeOmahaRequestError:
158 case kActionCodeOmahaResponseHandlerError:
159 case kActionCodePostinstallRunnerError:
160 case kActionCodeFilesystemCopierError:
161 case kActionCodeInstallDeviceOpenError:
162 case kActionCodeKernelDeviceOpenError:
163 case kActionCodeDownloadNewPartitionInfoError:
164 case kActionCodeNewRootfsVerificationError:
165 case kActionCodeNewKernelVerificationError:
166 case kActionCodePostinstallBootedFromFirmwareB:
167 case kActionCodeOmahaRequestEmptyResponseError:
168 case kActionCodeOmahaRequestXMLParseError:
169 case kActionCodeOmahaResponseInvalid:
170 case kActionCodeOmahaUpdateIgnoredPerPolicy:
171 case kActionCodeOmahaUpdateDeferredPerPolicy:
Jay Srinivasan08262882012-12-28 19:29:43 -0800172 case kActionCodeOmahaUpdateDeferredForBackoff:
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800173 LOG(INFO) << "Not incrementing URL index or failure count for this error";
174 break;
175
176 case kActionCodeSuccess: // success code
177 case kActionCodeSetBootableFlagError: // unused
178 case kActionCodeUmaReportedMax: // not an error code
179 case kActionCodeOmahaRequestHTTPResponseBase: // aggregated already
180 case kActionCodeResumedFlag: // not an error code
181 case kActionCodeBootModeFlag: // not an error code
182 case kActualCodeMask: // not an error code
183 // These shouldn't happen. Enumerating these explicitly here so that we
184 // can let the compiler warn about new error codes that are added to
185 // action_processor.h but not added here.
186 LOG(WARNING) << "Unexpected error code for UpdateFailed";
187 break;
188
189 // Note: Not adding a default here so as to let the compiler warn us of
190 // any new enums that were added in the .h but not listed in this switch.
191 }
192}
193
Jay Srinivasan08262882012-12-28 19:29:43 -0800194bool PayloadState::ShouldBackoffDownload() {
195 if (response_.disable_payload_backoff) {
196 LOG(INFO) << "Payload backoff logic is disabled. "
197 "Can proceed with the download";
198 return false;
199 }
200
201 if (response_.is_delta_payload) {
202 // If delta payloads fail, we want to fallback quickly to full payloads as
203 // they are more likely to succeed. Exponential backoffs would greatly
204 // slow down the fallback to full payloads. So we don't backoff for delta
205 // payloads.
206 LOG(INFO) << "No backoffs for delta payloads. "
207 << "Can proceed with the download";
208 return false;
209 }
210
211 if (!utils::IsOfficialBuild()) {
212 // Backoffs are needed only for official builds. We do not want any delays
213 // or update failures due to backoffs during testing or development.
214 LOG(INFO) << "No backoffs for test/dev images. "
215 << "Can proceed with the download";
216 return false;
217 }
218
219 if (backoff_expiry_time_.is_null()) {
220 LOG(INFO) << "No backoff expiry time has been set. "
221 << "Can proceed with the download";
222 return false;
223 }
224
225 if (backoff_expiry_time_ < Time::Now()) {
226 LOG(INFO) << "The backoff expiry time ("
227 << utils::ToString(backoff_expiry_time_)
228 << ") has elapsed. Can proceed with the download";
229 return false;
230 }
231
232 LOG(INFO) << "Cannot proceed with downloads as we need to backoff until "
233 << utils::ToString(backoff_expiry_time_);
234 return true;
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800235}
236
237void PayloadState::IncrementPayloadAttemptNumber() {
Jay Srinivasan08262882012-12-28 19:29:43 -0800238 if (response_.is_delta_payload) {
239 LOG(INFO) << "Not incrementing payload attempt number for delta payloads";
240 return;
241 }
242
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800243 LOG(INFO) << "Incrementing the payload attempt number";
244 SetPayloadAttemptNumber(GetPayloadAttemptNumber() + 1);
Jay Srinivasan08262882012-12-28 19:29:43 -0800245 UpdateBackoffExpiryTime();
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800246}
247
248void PayloadState::IncrementUrlIndex() {
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800249 uint32_t next_url_index = GetUrlIndex() + 1;
Jay Srinivasan08262882012-12-28 19:29:43 -0800250 if (next_url_index < GetNumUrls()) {
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800251 LOG(INFO) << "Incrementing the URL index for next attempt";
252 SetUrlIndex(next_url_index);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800253 } else {
254 LOG(INFO) << "Resetting the current URL index (" << GetUrlIndex() << ") to "
Jay Srinivasan08262882012-12-28 19:29:43 -0800255 << "0 as we only have " << GetNumUrls() << " URL(s)";
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800256 SetUrlIndex(0);
257 IncrementPayloadAttemptNumber();
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800258 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800259
260 // Whenever we update the URL index, we should also clear the URL failure
261 // count so we can start over fresh for the new URL.
262 SetUrlFailureCount(0);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800263}
264
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800265void PayloadState::IncrementFailureCount() {
266 uint32_t next_url_failure_count = GetUrlFailureCount() + 1;
Jay Srinivasan08262882012-12-28 19:29:43 -0800267 if (next_url_failure_count < response_.max_failure_count_per_url) {
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800268 LOG(INFO) << "Incrementing the URL failure count";
269 SetUrlFailureCount(next_url_failure_count);
270 } else {
271 LOG(INFO) << "Reached max number of failures for Url" << GetUrlIndex()
272 << ". Trying next available URL";
273 IncrementUrlIndex();
274 }
275}
276
Jay Srinivasan08262882012-12-28 19:29:43 -0800277void PayloadState::UpdateBackoffExpiryTime() {
278 if (response_.disable_payload_backoff) {
279 LOG(INFO) << "Resetting backoff expiry time as payload backoff is disabled";
280 SetBackoffExpiryTime(Time());
281 return;
282 }
283
284 if (GetPayloadAttemptNumber() == 0) {
285 SetBackoffExpiryTime(Time());
286 return;
287 }
288
289 // Since we're doing left-shift below, make sure we don't shift more
290 // than this. E.g. if uint32_t is 4-bytes, don't left-shift more than 30 bits,
291 // since we don't expect value of kMaxBackoffDays to be more than 100 anyway.
292 uint32_t num_days = 1; // the value to be shifted.
293 const uint32_t kMaxShifts = (sizeof(num_days) * 8) - 2;
294
295 // Normal backoff days is 2 raised to (payload_attempt_number - 1).
296 // E.g. if payload_attempt_number is over 30, limit power to 30.
297 uint32_t power = min(GetPayloadAttemptNumber() - 1, kMaxShifts);
298
299 // The number of days is the minimum of 2 raised to (payload_attempt_number
300 // - 1) or kMaxBackoffDays.
301 num_days = min(num_days << power, kMaxBackoffDays);
302
303 // We don't want all retries to happen exactly at the same time when
304 // retrying after backoff. So add some random minutes to fuzz.
305 int fuzz_minutes = utils::FuzzInt(0, kMaxBackoffFuzzMinutes);
306 TimeDelta next_backoff_interval = TimeDelta::FromDays(num_days) +
307 TimeDelta::FromMinutes(fuzz_minutes);
308 LOG(INFO) << "Incrementing the backoff expiry time by "
309 << utils::FormatTimeDelta(next_backoff_interval);
310 SetBackoffExpiryTime(Time::Now() + next_backoff_interval);
311}
312
313void PayloadState::ResetPersistedState() {
314 SetPayloadAttemptNumber(0);
315 SetUrlIndex(0);
316 SetUrlFailureCount(0);
317 UpdateBackoffExpiryTime(); // This will reset the backoff expiry time.
318}
319
320string PayloadState::CalculateResponseSignature() {
321 string response_sign = StringPrintf("NumURLs = %d\n",
322 response_.payload_urls.size());
323
324 for (size_t i = 0; i < response_.payload_urls.size(); i++)
325 response_sign += StringPrintf("Url%d = %s\n",
326 i, response_.payload_urls[i].c_str());
327
328 response_sign += StringPrintf("Payload Size = %llu\n"
329 "Payload Sha256 Hash = %s\n"
330 "Metadata Size = %llu\n"
331 "Metadata Signature = %s\n"
332 "Is Delta Payload = %d\n"
333 "Max Failure Count Per Url = %d\n"
334 "Disable Payload Backoff = %d\n",
335 response_.size,
336 response_.hash.c_str(),
337 response_.metadata_size,
338 response_.metadata_signature.c_str(),
339 response_.is_delta_payload,
340 response_.max_failure_count_per_url,
341 response_.disable_payload_backoff);
342 return response_sign;
343}
344
345void PayloadState::LoadResponseSignature() {
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800346 CHECK(prefs_);
347 string stored_value;
Jay Srinivasan08262882012-12-28 19:29:43 -0800348 if (prefs_->Exists(kPrefsCurrentResponseSignature) &&
349 prefs_->GetString(kPrefsCurrentResponseSignature, &stored_value)) {
350 SetResponseSignature(stored_value);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800351 }
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800352}
353
Jay Srinivasan08262882012-12-28 19:29:43 -0800354void PayloadState::SetResponseSignature(string response_signature) {
355 CHECK(prefs_);
356 response_signature_ = response_signature;
357 LOG(INFO) << "Current Response Signature = \n" << response_signature_;
358 prefs_->SetString(kPrefsCurrentResponseSignature, response_signature_);
359}
360
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800361void PayloadState::LoadPayloadAttemptNumber() {
362 CHECK(prefs_);
363 int64_t stored_value;
364 if (prefs_->Exists(kPrefsPayloadAttemptNumber) &&
365 prefs_->GetInt64(kPrefsPayloadAttemptNumber, &stored_value)) {
366 if (stored_value < 0) {
367 LOG(ERROR) << "Invalid payload attempt number (" << stored_value
368 << ") in persisted state. Defaulting to 0";
369 stored_value = 0;
370 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800371 SetPayloadAttemptNumber(stored_value);
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800372 }
373}
374
375void PayloadState::SetPayloadAttemptNumber(uint32_t payload_attempt_number) {
376 CHECK(prefs_);
377 payload_attempt_number_ = payload_attempt_number;
378 LOG(INFO) << "Payload Attempt Number = " << payload_attempt_number_;
379 prefs_->SetInt64(kPrefsPayloadAttemptNumber, payload_attempt_number_);
380}
381
382void PayloadState::LoadUrlIndex() {
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800383 CHECK(prefs_);
384 int64_t stored_value;
385 if (prefs_->Exists(kPrefsCurrentUrlIndex) &&
386 prefs_->GetInt64(kPrefsCurrentUrlIndex, &stored_value)) {
Jay Srinivasan08262882012-12-28 19:29:43 -0800387 // We only check for basic sanity value here. Detailed check will be
388 // done in SetResponse once the first response comes in.
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800389 if (stored_value < 0) {
390 LOG(ERROR) << "Invalid URL Index (" << stored_value
391 << ") in persisted state. Defaulting to 0";
392 stored_value = 0;
393 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800394 SetUrlIndex(stored_value);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800395 }
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800396}
397
398void PayloadState::SetUrlIndex(uint32_t url_index) {
399 CHECK(prefs_);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800400 url_index_ = url_index;
401 LOG(INFO) << "Current URL Index = " << url_index_;
402 prefs_->SetInt64(kPrefsCurrentUrlIndex, url_index_);
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800403}
404
405void PayloadState::LoadUrlFailureCount() {
406 CHECK(prefs_);
407 int64_t stored_value;
408 if (prefs_->Exists(kPrefsCurrentUrlFailureCount) &&
409 prefs_->GetInt64(kPrefsCurrentUrlFailureCount, &stored_value)) {
410 if (stored_value < 0) {
411 LOG(ERROR) << "Invalid URL Failure count (" << stored_value
412 << ") in persisted state. Defaulting to 0";
413 stored_value = 0;
414 }
Jay Srinivasan08262882012-12-28 19:29:43 -0800415 SetUrlFailureCount(stored_value);
Jay Srinivasan2b5a0f02012-12-19 17:25:56 -0800416 }
417}
418
419void PayloadState::SetUrlFailureCount(uint32_t url_failure_count) {
420 CHECK(prefs_);
421 url_failure_count_ = url_failure_count;
422 LOG(INFO) << "Current URL (Url" << GetUrlIndex()
423 << ")'s Failure Count = " << url_failure_count_;
424 prefs_->SetInt64(kPrefsCurrentUrlFailureCount, url_failure_count_);
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800425}
426
Jay Srinivasan08262882012-12-28 19:29:43 -0800427void PayloadState::LoadBackoffExpiryTime() {
428 CHECK(prefs_);
429 int64_t stored_value;
430 if (!prefs_->Exists(kPrefsBackoffExpiryTime))
431 return;
432
433 if (!prefs_->GetInt64(kPrefsBackoffExpiryTime, &stored_value))
434 return;
435
436 Time stored_time = Time::FromInternalValue(stored_value);
437 if (stored_time > Time::Now() + TimeDelta::FromDays(kMaxBackoffDays)) {
438 LOG(ERROR) << "Invalid backoff expiry time ("
439 << utils::ToString(stored_time)
440 << ") in persisted state. Resetting.";
441 stored_time = Time();
442 }
443 SetBackoffExpiryTime(stored_time);
444}
445
446void PayloadState::SetBackoffExpiryTime(const Time& new_time) {
447 CHECK(prefs_);
448 backoff_expiry_time_ = new_time;
449 LOG(INFO) << "Backoff Expiry Time = "
450 << utils::ToString(backoff_expiry_time_);
451 prefs_->SetInt64(kPrefsBackoffExpiryTime,
452 backoff_expiry_time_.ToInternalValue());
453}
454
Jay Srinivasan6f6ea002012-12-14 11:26:28 -0800455} // namespace chromeos_update_engine