blob: 9f84c3bdd1643d2970cb71fe097e5477c4864a3a [file] [log] [blame]
Yifan Hong120c3162020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hong4ed0e692020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hong120c3162020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong58397102020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hong120c3162020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hong4ed0e692020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hong120c3162020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
Howard Chen23279922020-04-17 11:22:13 +080033using android::base::GetBoolProperty;
Yifan Hong120c3162020-02-19 17:19:49 -080034using android::snapshot::SnapshotManager;
Yifan Hong4ed0e692020-02-25 14:51:42 -080035using android::snapshot::SnapshotMergeStats;
Yifan Hong120c3162020-02-19 17:19:49 -080036using android::snapshot::UpdateState;
37using brillo::MessageLoop;
38
39constexpr char kBootCompletedProp[] = "sys.boot_completed";
40// Interval to check sys.boot_completed.
41constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
42// Interval to check IBootControl::isSlotMarkedSuccessful
43constexpr auto kCheckSlotMarkedSuccessfulInterval =
44 base::TimeDelta::FromSeconds(2);
45// Interval to call SnapshotManager::ProcessUpdateState
46constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
47
Yifan Hong832b6172020-03-16 12:31:16 -070048#ifdef __ANDROID_RECOVERY__
49static constexpr bool kIsRecovery = true;
50#else
51static constexpr bool kIsRecovery = false;
52#endif
53
Yifan Hong120c3162020-02-19 17:19:49 -080054namespace chromeos_update_engine {
55
56CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
57 PrefsInterface* prefs,
58 BootControlInterface* boot_control,
59 android::snapshot::SnapshotManager* snapshot,
60 CleanupPreviousUpdateActionDelegateInterface* delegate)
61 : prefs_(prefs),
62 boot_control_(boot_control),
63 snapshot_(snapshot),
64 delegate_(delegate),
65 running_(false),
66 cancel_failed_(false),
Yifan Hong4ed0e692020-02-25 14:51:42 -080067 last_percentage_(0),
68 merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {}
Yifan Hong120c3162020-02-19 17:19:49 -080069
70void CleanupPreviousUpdateAction::PerformAction() {
71 ResumeAction();
72}
73
74void CleanupPreviousUpdateAction::TerminateProcessing() {
75 SuspendAction();
76}
77
78void CleanupPreviousUpdateAction::ResumeAction() {
79 CHECK(prefs_);
80 CHECK(boot_control_);
81
82 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
83 running_ = true;
84 StartActionInternal();
85}
86
87void CleanupPreviousUpdateAction::SuspendAction() {
88 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
89 running_ = false;
90}
91
92void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
93 running_ = false;
Yifan Hong4ed0e692020-02-25 14:51:42 -080094 ReportMergeStats();
Yifan Hong832b6172020-03-16 12:31:16 -070095 metadata_device_ = nullptr;
Yifan Hong120c3162020-02-19 17:19:49 -080096}
97
98std::string CleanupPreviousUpdateAction::Type() const {
99 return StaticType();
100}
101
102std::string CleanupPreviousUpdateAction::StaticType() {
103 return "CleanupPreviousUpdateAction";
104}
105
106void CleanupPreviousUpdateAction::StartActionInternal() {
107 // Do nothing on non-VAB device.
108 if (!boot_control_->GetDynamicPartitionControl()
109 ->GetVirtualAbFeatureFlag()
110 .IsEnabled()) {
111 processor_->ActionComplete(this, ErrorCode::kSuccess);
112 return;
113 }
114 // SnapshotManager is only available on VAB devices.
115 CHECK(snapshot_);
116 WaitBootCompletedOrSchedule();
117}
118
119void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
120 TEST_AND_RETURN(running_);
121 MessageLoop::current()->PostDelayedTask(
122 FROM_HERE,
123 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
124 base::Unretained(this)),
125 kCheckBootCompletedInterval);
126}
127
128void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
129 TEST_AND_RETURN(running_);
Yifan Hong832b6172020-03-16 12:31:16 -0700130 if (!kIsRecovery &&
131 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hong120c3162020-02-19 17:19:49 -0800132 // repeat
133 ScheduleWaitBootCompleted();
134 return;
135 }
136
137 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
138 CheckSlotMarkedSuccessfulOrSchedule();
139}
140
141void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
142 TEST_AND_RETURN(running_);
143 MessageLoop::current()->PostDelayedTask(
144 FROM_HERE,
145 base::Bind(
146 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
147 base::Unretained(this)),
148 kCheckSlotMarkedSuccessfulInterval);
149}
150
151void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
152 TEST_AND_RETURN(running_);
Yifan Hong832b6172020-03-16 12:31:16 -0700153 if (!kIsRecovery &&
154 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hong120c3162020-02-19 17:19:49 -0800155 ScheduleWaitMarkBootSuccessful();
Kelvin Zhang28d49bc2021-04-02 13:19:40 -0400156 return;
Yifan Hong120c3162020-02-19 17:19:49 -0800157 }
Yifan Hong832b6172020-03-16 12:31:16 -0700158
159 if (metadata_device_ == nullptr) {
160 metadata_device_ = snapshot_->EnsureMetadataMounted();
161 }
162
163 if (metadata_device_ == nullptr) {
164 LOG(ERROR) << "Failed to mount /metadata.";
Yifan Hong700e6b02020-04-03 11:31:50 -0700165 // If metadata is erased but not formatted, it is possible to not mount
166 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
167 processor_->ActionComplete(
168 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
Yifan Hong832b6172020-03-16 12:31:16 -0700169 return;
170 }
171
Yifan Hong58397102020-03-19 19:25:38 -0700172 if (kIsRecovery) {
173 auto snapshots_created =
174 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
175 switch (snapshots_created) {
176 case android::snapshot::CreateResult::CREATED: {
177 // If previous update has not finished merging, snapshots exists and are
178 // created here so that ProcessUpdateState can proceed.
179 LOG(INFO) << "Snapshot devices are created";
180 break;
181 }
182 case android::snapshot::CreateResult::NOT_CREATED: {
183 // If there is no previous update, no snapshot devices are created and
184 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
185 // considered an error.
186 LOG(INFO) << "Snapshot devices are not created";
187 break;
188 }
189 case android::snapshot::CreateResult::ERROR:
190 default: {
191 LOG(ERROR)
192 << "Failed to create snapshot devices (CreateResult = "
193 << static_cast<
194 std::underlying_type_t<android::snapshot::CreateResult>>(
195 snapshots_created);
196 processor_->ActionComplete(this, ErrorCode::kError);
197 return;
198 }
199 }
200 }
201
Yifan Hong4ed0e692020-02-25 14:51:42 -0800202 if (!merge_stats_->Start()) {
203 // Not an error because CleanupPreviousUpdateAction may be paused and
204 // resumed while kernel continues merging snapshots in the background.
205 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
206 }
Yifan Hong120c3162020-02-19 17:19:49 -0800207 LOG(INFO) << "Waiting for any previous merge request to complete. "
208 << "This can take up to several minutes.";
209 WaitForMergeOrSchedule();
210}
211
212void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
213 TEST_AND_RETURN(running_);
214 MessageLoop::current()->PostDelayedTask(
215 FROM_HERE,
216 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
217 base::Unretained(this)),
218 kWaitForMergeInterval);
219}
220
221void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
222 TEST_AND_RETURN(running_);
223 auto state = snapshot_->ProcessUpdateState(
224 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
225 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hong4ed0e692020-02-25 14:51:42 -0800226 merge_stats_->set_state(state);
Yifan Hong120c3162020-02-19 17:19:49 -0800227
Yifan Hong120c3162020-02-19 17:19:49 -0800228 switch (state) {
229 case UpdateState::None: {
230 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong48fe4252020-03-05 21:02:36 -0800231 ErrorCode error_code = ErrorCode::kSuccess;
232 if (!snapshot_->CancelUpdate()) {
233 error_code = ErrorCode::kError;
234 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
235 }
236 processor_->ActionComplete(this, error_code);
Yifan Hong120c3162020-02-19 17:19:49 -0800237 return;
238 }
239
240 case UpdateState::Initiated: {
241 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
242 processor_->ActionComplete(this, ErrorCode::kSuccess);
243 return;
244 }
245
246 case UpdateState::Unverified: {
247 InitiateMergeAndWait();
248 return;
249 }
250
251 case UpdateState::Merging: {
252 ScheduleWaitForMerge();
253 return;
254 }
255
256 case UpdateState::MergeNeedsReboot: {
257 LOG(ERROR) << "Need reboot to finish merging.";
258 processor_->ActionComplete(this, ErrorCode::kError);
259 return;
260 }
261
262 case UpdateState::MergeCompleted: {
263 LOG(INFO) << "Merge finished with state MergeCompleted.";
264 processor_->ActionComplete(this, ErrorCode::kSuccess);
265 return;
266 }
267
268 case UpdateState::MergeFailed: {
269 LOG(ERROR) << "Merge failed. Device may be corrupted.";
270 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
271 return;
272 }
273
274 case UpdateState::Cancelled: {
275 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
276 // not deleted to avoid inconsistency.
277 // Nothing can be done here; just try next time.
278 ErrorCode error_code =
279 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
280 processor_->ActionComplete(this, error_code);
281 return;
282 }
283
284 default: {
285 // Protobuf has some reserved enum values, so a default case is needed.
286 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
287 << static_cast<int32_t>(state);
288 }
289 }
290}
291
292bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
293 double percentage = 0.0;
294 snapshot_->GetUpdateState(&percentage);
295 if (delegate_) {
296 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
297 delegate_->OnCleanupProgressUpdate(percentage / 100);
298 }
299
300 // Log if percentage increments by at least 1.
301 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
302 last_percentage_ = percentage;
303 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
304 }
305
306 // Do not continue to wait for merge. Instead, let ProcessUpdateState
307 // return Merging directly so that we can ScheduleWaitForMerge() in
308 // MessageLoop.
309 return false;
310}
311
312bool CleanupPreviousUpdateAction::BeforeCancel() {
313 if (DeltaPerformer::ResetUpdateProgress(
314 prefs_,
315 false /* quick */,
316 false /* skip dynamic partitions metadata*/)) {
317 return true;
318 }
319
320 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
321 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
322
323 std::string val;
324 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
325 if (val.empty()) {
326 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
327 << " is empty, assuming successful cleanup";
328 return true;
329 }
330 LOG(WARNING)
331 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
332 << ", not deleting snapshots even though UpdateState is Cancelled.";
333 cancel_failed_ = true;
334 return false;
335}
336
337void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
338 TEST_AND_RETURN(running_);
339 LOG(INFO) << "Attempting to initiate merge.";
Howard Chen23279922020-04-17 11:22:13 +0800340 // suspend the VAB merge when running a DSU
341 if (GetBoolProperty("ro.gsid.image_running", false)) {
342 LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
343 processor_->ActionComplete(this, ErrorCode::kError);
344 return;
345 }
Yifan Hong120c3162020-02-19 17:19:49 -0800346
Alessio Balsini4ed05122020-05-26 22:17:03 +0100347 uint64_t cow_file_size;
348 if (snapshot_->InitiateMerge(&cow_file_size)) {
349 merge_stats_->set_cow_file_size(cow_file_size);
Yifan Hong120c3162020-02-19 17:19:49 -0800350 WaitForMergeOrSchedule();
351 return;
352 }
353
354 LOG(WARNING) << "InitiateMerge failed.";
355 auto state = snapshot_->GetUpdateState();
Yifan Hong4ed0e692020-02-25 14:51:42 -0800356 merge_stats_->set_state(state);
Yifan Hong120c3162020-02-19 17:19:49 -0800357 if (state == UpdateState::Unverified) {
358 // We are stuck at unverified state. This can happen if the update has
359 // been applied, but it has not even been attempted yet (in libsnapshot,
360 // rollback indicator does not exist); for example, if update_engine
361 // restarts before the device reboots, then this state may be reached.
362 // Nothing should be done here.
363 LOG(WARNING) << "InitiateMerge leaves the device at "
364 << "UpdateState::Unverified. (Did update_engine "
365 << "restarted?)";
366 processor_->ActionComplete(this, ErrorCode::kSuccess);
367 return;
368 }
369
370 // State does seems to be advanced.
371 // It is possibly racy. For example, on a userdebug build, the user may
372 // manually initiate a merge with snapshotctl between last time
373 // update_engine checks UpdateState. Hence, just call
374 // WaitForMergeOrSchedule one more time.
375 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
376 << android::snapshot::UpdateState_Name(state)
377 << ", try to wait for merge again.";
378 WaitForMergeOrSchedule();
379 return;
380}
381
Yifan Hong4ed0e692020-02-25 14:51:42 -0800382void CleanupPreviousUpdateAction::ReportMergeStats() {
383 auto result = merge_stats_->Finish();
384 if (result == nullptr) {
385 LOG(WARNING) << "Not reporting merge stats because "
386 "SnapshotMergeStats::Finish failed.";
387 return;
388 }
389
390#ifdef __ANDROID_RECOVERY__
391 LOG(INFO) << "Skip reporting merge stats in recovery.";
392#else
393 const auto& report = result->report();
394
395 if (report.state() == UpdateState::None ||
396 report.state() == UpdateState::Initiated ||
397 report.state() == UpdateState::Unverified) {
398 LOG(INFO) << "Not reporting merge stats because state is "
399 << android::snapshot::UpdateState_Name(report.state());
400 return;
401 }
402
403 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
404 result->merge_time());
Alessio Balsini4ed05122020-05-26 22:17:03 +0100405
406 bool vab_retrofit = boot_control_->GetDynamicPartitionControl()
407 ->GetVirtualAbFeatureFlag()
408 .IsRetrofit();
409
Yifan Hong4ed0e692020-02-25 14:51:42 -0800410 LOG(INFO) << "Reporting merge stats: "
411 << android::snapshot::UpdateState_Name(report.state()) << " in "
412 << passed_ms.count() << "ms (resumed " << report.resume_count()
Alessio Balsini4ed05122020-05-26 22:17:03 +0100413 << " times), using " << report.cow_file_size()
414 << " bytes of COW image.";
Yifan Hong0c4ed292020-03-02 15:45:14 -0800415 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
416 static_cast<int32_t>(report.state()),
417 static_cast<int64_t>(passed_ms.count()),
Alessio Balsini4ed05122020-05-26 22:17:03 +0100418 static_cast<int32_t>(report.resume_count()),
419 vab_retrofit,
420 static_cast<int64_t>(report.cow_file_size()));
Yifan Hong4ed0e692020-02-25 14:51:42 -0800421#endif
422}
423
Yifan Hong120c3162020-02-19 17:19:49 -0800424} // namespace chromeos_update_engine