blob: b464422e9e3da11d1f415f01aef889b82748d233 [file] [log] [blame]
Zimuzo6efba542018-11-29 12:47:58 +00001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
Zimuzo1a9aac72019-04-24 14:07:24 +010019import static android.service.watchdog.ExplicitHealthCheckService.PackageConfig;
20
Zimuzoe5009cd2019-01-23 18:11:58 +000021import static java.lang.annotation.RetentionPolicy.SOURCE;
22
23import android.annotation.IntDef;
Zimuzo3eee4382019-01-08 20:42:39 +000024import android.annotation.Nullable;
Zimuzo6efba542018-11-29 12:47:58 +000025import android.content.Context;
Zimuzoef65fb82019-02-28 10:44:29 +000026import android.content.pm.PackageManager;
Zimuzo972e1cd2019-01-28 16:30:01 +000027import android.content.pm.VersionedPackage;
Roshan Pius7e6f5f52019-06-27 14:14:40 -070028import android.net.ConnectivityModuleConnector;
Zimuzo6efba542018-11-29 12:47:58 +000029import android.os.Environment;
30import android.os.Handler;
Zimuzo6efba542018-11-29 12:47:58 +000031import android.os.Looper;
Gavin Corkeryaa57ef32019-12-17 19:02:54 +000032import android.os.Process;
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +000033import android.os.SystemProperties;
Zimuzodef38be2019-04-05 17:24:29 +010034import android.provider.DeviceConfig;
Zimuzo6efba542018-11-29 12:47:58 +000035import android.text.TextUtils;
36import android.util.ArrayMap;
Zimuzocaa435e2019-03-20 11:16:06 +000037import android.util.ArraySet;
Zimuzo6efba542018-11-29 12:47:58 +000038import android.util.AtomicFile;
JW Wang88525f72019-09-11 14:34:43 +080039import android.util.LongArrayQueue;
Gavin Corkeryaa57ef32019-12-17 19:02:54 +000040import android.util.MathUtils;
Zimuzo6efba542018-11-29 12:47:58 +000041import android.util.Slog;
42import android.util.Xml;
43
44import com.android.internal.annotations.GuardedBy;
Zimuzo3eee4382019-01-08 20:42:39 +000045import com.android.internal.annotations.VisibleForTesting;
Zimuzocfaed762019-01-03 21:13:01 +000046import com.android.internal.os.BackgroundThread;
Zimuzo6efba542018-11-29 12:47:58 +000047import com.android.internal.util.FastXmlSerializer;
Gavin Corkeryefb3ff12019-12-02 18:15:24 +000048import com.android.internal.util.IndentingPrintWriter;
Zimuzo6efba542018-11-29 12:47:58 +000049import com.android.internal.util.XmlUtils;
50
51import libcore.io.IoUtils;
52
53import org.xmlpull.v1.XmlPullParser;
54import org.xmlpull.v1.XmlPullParserException;
55import org.xmlpull.v1.XmlSerializer;
56
57import java.io.File;
58import java.io.FileNotFoundException;
59import java.io.FileOutputStream;
60import java.io.IOException;
61import java.io.InputStream;
Zimuzoe5009cd2019-01-23 18:11:58 +000062import java.lang.annotation.Retention;
Gavin Corkeryf305f4d2019-11-27 15:46:29 +000063import java.lang.annotation.RetentionPolicy;
Zimuzo6efba542018-11-29 12:47:58 +000064import java.nio.charset.StandardCharsets;
65import java.util.ArrayList;
Zimuzo3eee4382019-01-08 20:42:39 +000066import java.util.Collections;
Zimuzo6efba542018-11-29 12:47:58 +000067import java.util.Iterator;
68import java.util.List;
Zimuzo6997f3c2019-04-05 17:06:25 +010069import java.util.Map;
Zimuzo3eee4382019-01-08 20:42:39 +000070import java.util.Set;
Zimuzodef38be2019-04-05 17:24:29 +010071import java.util.concurrent.TimeUnit;
Zimuzo6efba542018-11-29 12:47:58 +000072
73/**
74 * Monitors the health of packages on the system and notifies interested observers when packages
Zimuzoe5009cd2019-01-23 18:11:58 +000075 * fail. On failure, the registered observer with the least user impacting mitigation will
76 * be notified.
Zimuzo6efba542018-11-29 12:47:58 +000077 */
78public class PackageWatchdog {
79 private static final String TAG = "PackageWatchdog";
Zimuzodef38be2019-04-05 17:24:29 +010080
81 static final String PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS =
82 "watchdog_trigger_failure_duration_millis";
83 static final String PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT =
84 "watchdog_trigger_failure_count";
85 static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED =
86 "watchdog_explicit_health_check_enabled";
87
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +000088 // TODO: make the following values configurable via DeviceConfig
89 private static final long NATIVE_CRASH_POLLING_INTERVAL_MILLIS =
90 TimeUnit.SECONDS.toMillis(30);
91 private static final long NUMBER_OF_NATIVE_CRASH_POLLS = 10;
92
93
Gavin Corkeryf305f4d2019-11-27 15:46:29 +000094 public static final int FAILURE_REASON_UNKNOWN = 0;
95 public static final int FAILURE_REASON_NATIVE_CRASH = 1;
96 public static final int FAILURE_REASON_EXPLICIT_HEALTH_CHECK = 2;
97 public static final int FAILURE_REASON_APP_CRASH = 3;
98 public static final int FAILURE_REASON_APP_NOT_RESPONDING = 4;
99
100 @IntDef(prefix = { "FAILURE_REASON_" }, value = {
101 FAILURE_REASON_UNKNOWN,
102 FAILURE_REASON_NATIVE_CRASH,
103 FAILURE_REASON_EXPLICIT_HEALTH_CHECK,
104 FAILURE_REASON_APP_CRASH,
105 FAILURE_REASON_APP_NOT_RESPONDING
106 })
107 @Retention(RetentionPolicy.SOURCE)
108 public @interface FailureReasons {}
109
Zimuzo6efba542018-11-29 12:47:58 +0000110 // Duration to count package failures before it resets to 0
JW Wangdfa52682019-09-11 15:16:49 +0800111 @VisibleForTesting
112 static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS =
Zimuzodef38be2019-04-05 17:24:29 +0100113 (int) TimeUnit.MINUTES.toMillis(1);
Zimuzo6efba542018-11-29 12:47:58 +0000114 // Number of package failures within the duration above before we notify observers
JW Wangdfa52682019-09-11 15:16:49 +0800115 @VisibleForTesting
116 static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5;
JW Wange0f2f3d2019-09-10 15:06:43 +0800117 @VisibleForTesting
118 static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2);
Zimuzodef38be2019-04-05 17:24:29 +0100119 // Whether explicit health checks are enabled or not
120 private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true;
121
Gavin Corkeryaa57ef32019-12-17 19:02:54 +0000122 @VisibleForTesting
123 static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5;
124 static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10);
125 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
126 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
127
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000128 private long mNumberOfNativeCrashPollsRemaining;
129
Zimuzo6efba542018-11-29 12:47:58 +0000130 private static final int DB_VERSION = 1;
131 private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog";
132 private static final String TAG_PACKAGE = "package";
133 private static final String TAG_OBSERVER = "observer";
134 private static final String ATTR_VERSION = "version";
135 private static final String ATTR_NAME = "name";
136 private static final String ATTR_DURATION = "duration";
Zimuzo7c6c28f2019-04-05 12:03:10 +0100137 private static final String ATTR_EXPLICIT_HEALTH_CHECK_DURATION = "health-check-duration";
Zimuzo9284e742019-02-22 12:09:28 +0000138 private static final String ATTR_PASSED_HEALTH_CHECK = "passed-health-check";
Zimuzo6efba542018-11-29 12:47:58 +0000139
Zimuzodef38be2019-04-05 17:24:29 +0100140 @GuardedBy("PackageWatchdog.class")
Zimuzo6efba542018-11-29 12:47:58 +0000141 private static PackageWatchdog sPackageWatchdog;
142
143 private final Object mLock = new Object();
144 // System server context
145 private final Context mContext;
Zimuzocaa435e2019-03-20 11:16:06 +0000146 // Handler to run short running tasks
147 private final Handler mShortTaskHandler;
148 // Handler for processing IO and long running tasks
149 private final Handler mLongTaskHandler;
Zimuzo3eee4382019-01-08 20:42:39 +0000150 // Contains (observer-name -> observer-handle) that have ever been registered from
Zimuzocfaed762019-01-03 21:13:01 +0000151 // previous boots. Observers with all packages expired are periodically pruned.
152 // It is saved to disk on system shutdown and repouplated on startup so it survives reboots.
Zimuzo6efba542018-11-29 12:47:58 +0000153 @GuardedBy("mLock")
Zimuzo3eee4382019-01-08 20:42:39 +0000154 private final ArrayMap<String, ObserverInternal> mAllObservers = new ArrayMap<>();
Zimuzocfaed762019-01-03 21:13:01 +0000155 // File containing the XML data of monitored packages /data/system/package-watchdog.xml
Zimuzo3eee4382019-01-08 20:42:39 +0000156 private final AtomicFile mPolicyFile;
Zimuzocaa435e2019-03-20 11:16:06 +0000157 private final ExplicitHealthCheckController mHealthCheckController;
Roshan Pius7e6f5f52019-06-27 14:14:40 -0700158 private final ConnectivityModuleConnector mConnectivityModuleConnector;
JW Wang82bbb652019-09-04 14:37:12 +0800159 private final Runnable mSyncRequests = this::syncRequests;
160 private final Runnable mSyncStateWithScheduledReason = this::syncStateWithScheduledReason;
161 private final Runnable mSaveToFile = this::saveToFile;
162 private final SystemClock mSystemClock;
Gavin Corkeryaa57ef32019-12-17 19:02:54 +0000163 private final BootThreshold mBootThreshold;
Zimuzocaa435e2019-03-20 11:16:06 +0000164 @GuardedBy("mLock")
165 private boolean mIsPackagesReady;
Zimuzodef38be2019-04-05 17:24:29 +0100166 // Flag to control whether explicit health checks are supported or not
167 @GuardedBy("mLock")
168 private boolean mIsHealthCheckEnabled = DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED;
169 @GuardedBy("mLock")
170 private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS;
171 @GuardedBy("mLock")
172 private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
Zimuzo6997f3c2019-04-05 17:06:25 +0100173 // SystemClock#uptimeMillis when we last executed #syncState
Zimuzo7c6c28f2019-04-05 12:03:10 +0100174 // 0 if no prune is scheduled.
175 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +0100176 private long mUptimeAtLastStateSync;
Zimuzo6efba542018-11-29 12:47:58 +0000177
JW Wang2d085672019-09-02 03:28:31 +0000178 @FunctionalInterface
179 @VisibleForTesting
180 interface SystemClock {
Gavin Corkeryaa57ef32019-12-17 19:02:54 +0000181 // TODO: Add elapsedRealtime to this interface
JW Wang2d085672019-09-02 03:28:31 +0000182 long uptimeMillis();
183 }
184
Zimuzo6efba542018-11-29 12:47:58 +0000185 private PackageWatchdog(Context context) {
Zimuzocaa435e2019-03-20 11:16:06 +0000186 // Needs to be constructed inline
187 this(context, new AtomicFile(
188 new File(new File(Environment.getDataDirectory(), "system"),
189 "package-watchdog.xml")),
190 new Handler(Looper.myLooper()), BackgroundThread.getHandler(),
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900191 new ExplicitHealthCheckController(context),
JW Wang2d085672019-09-02 03:28:31 +0000192 ConnectivityModuleConnector.getInstance(),
193 android.os.SystemClock::uptimeMillis);
Zimuzo6efba542018-11-29 12:47:58 +0000194 }
195
Zimuzo3eee4382019-01-08 20:42:39 +0000196 /**
Zimuzocaa435e2019-03-20 11:16:06 +0000197 * Creates a PackageWatchdog that allows injecting dependencies.
Zimuzo3eee4382019-01-08 20:42:39 +0000198 */
199 @VisibleForTesting
Zimuzocaa435e2019-03-20 11:16:06 +0000200 PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler,
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900201 Handler longTaskHandler, ExplicitHealthCheckController controller,
JW Wang2d085672019-09-02 03:28:31 +0000202 ConnectivityModuleConnector connectivityModuleConnector, SystemClock clock) {
Zimuzo3eee4382019-01-08 20:42:39 +0000203 mContext = context;
Zimuzocaa435e2019-03-20 11:16:06 +0000204 mPolicyFile = policyFile;
205 mShortTaskHandler = shortTaskHandler;
206 mLongTaskHandler = longTaskHandler;
Zimuzocaa435e2019-03-20 11:16:06 +0000207 mHealthCheckController = controller;
Roshan Pius7e6f5f52019-06-27 14:14:40 -0700208 mConnectivityModuleConnector = connectivityModuleConnector;
JW Wang2d085672019-09-02 03:28:31 +0000209 mSystemClock = clock;
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000210 mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS;
Gavin Corkeryaa57ef32019-12-17 19:02:54 +0000211 mBootThreshold = new BootThreshold(DEFAULT_BOOT_LOOP_TRIGGER_COUNT,
212 DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS);
Zimuzo3eee4382019-01-08 20:42:39 +0000213 loadFromFile();
Gavin Corkery69395652019-12-12 19:06:47 +0000214 sPackageWatchdog = this;
Zimuzo3eee4382019-01-08 20:42:39 +0000215 }
216
Zimuzo6efba542018-11-29 12:47:58 +0000217 /** Creates or gets singleton instance of PackageWatchdog. */
Zimuzocfaed762019-01-03 21:13:01 +0000218 public static PackageWatchdog getInstance(Context context) {
219 synchronized (PackageWatchdog.class) {
220 if (sPackageWatchdog == null) {
Gavin Corkery69395652019-12-12 19:06:47 +0000221 new PackageWatchdog(context);
Zimuzocfaed762019-01-03 21:13:01 +0000222 }
223 return sPackageWatchdog;
Zimuzo6efba542018-11-29 12:47:58 +0000224 }
Zimuzo6efba542018-11-29 12:47:58 +0000225 }
226
227 /**
Zimuzocaa435e2019-03-20 11:16:06 +0000228 * Called during boot to notify when packages are ready on the device so we can start
229 * binding.
230 */
231 public void onPackagesReady() {
232 synchronized (mLock) {
233 mIsPackagesReady = true;
Zimuzocb148b22019-04-01 18:54:17 +0100234 mHealthCheckController.setCallbacks(packageName -> onHealthCheckPassed(packageName),
235 packages -> onSupportedPackages(packages),
236 () -> syncRequestsAsync());
Zimuzodef38be2019-04-05 17:24:29 +0100237 setPropertyChangedListenerLocked();
238 updateConfigs();
Roshan Pius7e6f5f52019-06-27 14:14:40 -0700239 registerConnectivityModuleHealthListener();
Zimuzocaa435e2019-03-20 11:16:06 +0000240 }
241 }
242
243 /**
Gavin Corkery84e99022019-11-20 15:40:45 +0000244 * Registers {@code observer} to listen for package failures. Add a new ObserverInternal for
245 * this observer if it does not already exist.
Zimuzo6efba542018-11-29 12:47:58 +0000246 *
247 * <p>Observers are expected to call this on boot. It does not specify any packages but
248 * it will resume observing any packages requested from a previous boot.
249 */
250 public void registerHealthObserver(PackageHealthObserver observer) {
251 synchronized (mLock) {
Zimuzo3eee4382019-01-08 20:42:39 +0000252 ObserverInternal internalObserver = mAllObservers.get(observer.getName());
253 if (internalObserver != null) {
JW Wangcc44fc62019-09-10 14:26:18 +0800254 internalObserver.registeredObserver = observer;
Gavin Corkery84e99022019-11-20 15:40:45 +0000255 } else {
256 internalObserver = new ObserverInternal(observer.getName(), new ArrayList<>());
257 internalObserver.registeredObserver = observer;
258 mAllObservers.put(observer.getName(), internalObserver);
259 syncState("added new observer");
Zimuzo3eee4382019-01-08 20:42:39 +0000260 }
Zimuzo6efba542018-11-29 12:47:58 +0000261 }
262 }
263
264 /**
265 * Starts observing the health of the {@code packages} for {@code observer} and notifies
266 * {@code observer} of any package failures within the monitoring duration.
267 *
Zimuzocaa435e2019-03-20 11:16:06 +0000268 * <p>If monitoring a package supporting explicit health check, at the end of the monitoring
269 * duration if {@link #onHealthCheckPassed} was never called,
Zimuzo9284e742019-02-22 12:09:28 +0000270 * {@link PackageHealthObserver#execute} will be called as if the package failed.
271 *
Zimuzo6efba542018-11-29 12:47:58 +0000272 * <p>If {@code observer} is already monitoring a package in {@code packageNames},
Zimuzo9284e742019-02-22 12:09:28 +0000273 * the monitoring window of that package will be reset to {@code durationMs} and the health
Zimuzocaa435e2019-03-20 11:16:06 +0000274 * check state will be reset to a default depending on if the package is contained in
275 * {@link mPackagesWithExplicitHealthCheckEnabled}.
Zimuzo6efba542018-11-29 12:47:58 +0000276 *
JW Wange0f2f3d2019-09-10 15:06:43 +0800277 * <p>If {@code packageNames} is empty, this will be a no-op.
278 *
279 * <p>If {@code durationMs} is less than 1, a default monitoring duration
280 * {@link #DEFAULT_OBSERVING_DURATION_MS} will be used.
Zimuzo6efba542018-11-29 12:47:58 +0000281 */
Zimuzocb148b22019-04-01 18:54:17 +0100282 public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames,
Zimuzocaa435e2019-03-20 11:16:06 +0000283 long durationMs) {
Zimuzocb148b22019-04-01 18:54:17 +0100284 if (packageNames.isEmpty()) {
285 Slog.wtf(TAG, "No packages to observe, " + observer.getName());
Zimuzocaa435e2019-03-20 11:16:06 +0000286 return;
287 }
Zimuzocb148b22019-04-01 18:54:17 +0100288 if (durationMs < 1) {
JW Wange0f2f3d2019-09-10 15:06:43 +0800289 Slog.wtf(TAG, "Invalid duration " + durationMs + "ms for observer "
Zimuzocb148b22019-04-01 18:54:17 +0100290 + observer.getName() + ". Not observing packages " + packageNames);
JW Wange0f2f3d2019-09-10 15:06:43 +0800291 durationMs = DEFAULT_OBSERVING_DURATION_MS;
Zimuzocaa435e2019-03-20 11:16:06 +0000292 }
Zimuzocaa435e2019-03-20 11:16:06 +0000293
Zimuzo6efba542018-11-29 12:47:58 +0000294 List<MonitoredPackage> packages = new ArrayList<>();
Zimuzocfaed762019-01-03 21:13:01 +0000295 for (int i = 0; i < packageNames.size(); i++) {
Zimuzo7c6c28f2019-04-05 12:03:10 +0100296 // Health checks not available yet so health check state will start INACTIVE
JW Wang5f6719f2019-11-28 10:38:53 +0800297 MonitoredPackage pkg = newMonitoredPackage(packageNames.get(i), durationMs, false);
298 if (pkg != null) {
299 packages.add(pkg);
300 }
301 }
302
303 if (packages.isEmpty()) {
304 return;
Zimuzo6efba542018-11-29 12:47:58 +0000305 }
Zimuzocb148b22019-04-01 18:54:17 +0100306
Zimuzo6997f3c2019-04-05 17:06:25 +0100307 // Sync before we add the new packages to the observers. This will #pruneObservers,
308 // causing any elapsed time to be deducted from all existing packages before we add new
309 // packages. This maintains the invariant that the elapsed time for ALL (new and existing)
310 // packages is the same.
311 syncState("observing new packages");
312
Zimuzo6efba542018-11-29 12:47:58 +0000313 synchronized (mLock) {
314 ObserverInternal oldObserver = mAllObservers.get(observer.getName());
315 if (oldObserver == null) {
Zimuzocaa435e2019-03-20 11:16:06 +0000316 Slog.d(TAG, observer.getName() + " started monitoring health "
317 + "of packages " + packageNames);
Zimuzo6efba542018-11-29 12:47:58 +0000318 mAllObservers.put(observer.getName(),
319 new ObserverInternal(observer.getName(), packages));
320 } else {
Zimuzocaa435e2019-03-20 11:16:06 +0000321 Slog.d(TAG, observer.getName() + " added the following "
322 + "packages to monitor " + packageNames);
Zimuzo6997f3c2019-04-05 17:06:25 +0100323 oldObserver.updatePackagesLocked(packages);
Zimuzo6efba542018-11-29 12:47:58 +0000324 }
325 }
Zimuzo6997f3c2019-04-05 17:06:25 +0100326
327 // Register observer in case not already registered
Zimuzo6efba542018-11-29 12:47:58 +0000328 registerHealthObserver(observer);
Zimuzo6997f3c2019-04-05 17:06:25 +0100329
330 // Sync after we add the new packages to the observers. We may have received packges
331 // requiring an earlier schedule than we are currently scheduled for.
332 syncState("updated observers");
Zimuzo6efba542018-11-29 12:47:58 +0000333 }
334
335 /**
336 * Unregisters {@code observer} from listening to package failure.
337 * Additionally, this stops observing any packages that may have previously been observed
338 * even from a previous boot.
339 */
340 public void unregisterHealthObserver(PackageHealthObserver observer) {
341 synchronized (mLock) {
342 mAllObservers.remove(observer.getName());
Zimuzo6efba542018-11-29 12:47:58 +0000343 }
Zimuzo6997f3c2019-04-05 17:06:25 +0100344 syncState("unregistering observer: " + observer.getName());
Zimuzo6efba542018-11-29 12:47:58 +0000345 }
346
Zimuzo3eee4382019-01-08 20:42:39 +0000347 /**
Gavin Corkeryf305f4d2019-11-27 15:46:29 +0000348 * Called when a process fails due to a crash, ANR or explicit health check.
Zimuzo6efba542018-11-29 12:47:58 +0000349 *
Zimuzoe5009cd2019-01-23 18:11:58 +0000350 * <p>For each package contained in the process, one registered observer with the least user
351 * impact will be notified for mitigation.
Zimuzo6efba542018-11-29 12:47:58 +0000352 *
353 * <p>This method could be called frequently if there is a severe problem on the device.
354 */
Gavin Corkeryf305f4d2019-11-27 15:46:29 +0000355 public void onPackageFailure(List<VersionedPackage> packages,
356 @FailureReasons int failureReason) {
Zimuzocaa435e2019-03-20 11:16:06 +0000357 mLongTaskHandler.post(() -> {
Zimuzoe5009cd2019-01-23 18:11:58 +0000358 synchronized (mLock) {
359 if (mAllObservers.isEmpty()) {
360 return;
361 }
Gavin Corkeryc69ea402019-12-12 13:42:55 +0000362 boolean requiresImmediateAction = (failureReason == FAILURE_REASON_NATIVE_CRASH
363 || failureReason == FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
364 if (requiresImmediateAction) {
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000365 handleFailureImmediately(packages, failureReason);
366 } else {
367 for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
368 VersionedPackage versionedPackage = packages.get(pIndex);
369 // Observer that will receive failure for versionedPackage
370 PackageHealthObserver currentObserverToNotify = null;
371 int currentObserverImpact = Integer.MAX_VALUE;
Zimuzoe5009cd2019-01-23 18:11:58 +0000372
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000373 // Find observer with least user impact
374 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
375 ObserverInternal observer = mAllObservers.valueAt(oIndex);
376 PackageHealthObserver registeredObserver = observer.registeredObserver;
377 if (registeredObserver != null
378 && observer.onPackageFailureLocked(
379 versionedPackage.getPackageName())) {
380 int impact = registeredObserver.onHealthCheckFailed(
381 versionedPackage, failureReason);
382 if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
383 && impact < currentObserverImpact) {
384 currentObserverToNotify = registeredObserver;
385 currentObserverImpact = impact;
386 }
Zimuzoe5009cd2019-01-23 18:11:58 +0000387 }
388 }
Zimuzoe5009cd2019-01-23 18:11:58 +0000389
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000390 // Execute action with least user impact
391 if (currentObserverToNotify != null) {
392 currentObserverToNotify.execute(versionedPackage, failureReason);
393 }
Zimuzo6efba542018-11-29 12:47:58 +0000394 }
Zimuzo3eee4382019-01-08 20:42:39 +0000395 }
Zimuzocfaed762019-01-03 21:13:01 +0000396 }
Zimuzoe5009cd2019-01-23 18:11:58 +0000397 });
Zimuzo6efba542018-11-29 12:47:58 +0000398 }
399
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000400 /**
Gavin Corkeryc69ea402019-12-12 13:42:55 +0000401 * For native crashes or explicit health check failures, call directly into each observer to
402 * mitigate the error without going through failure threshold logic.
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000403 */
404 private void handleFailureImmediately(List<VersionedPackage> packages,
405 @FailureReasons int failureReason) {
406 VersionedPackage failingPackage = packages.size() > 0 ? packages.get(0) : null;
407 PackageHealthObserver currentObserverToNotify = null;
408 int currentObserverImpact = Integer.MAX_VALUE;
409 for (ObserverInternal observer: mAllObservers.values()) {
410 PackageHealthObserver registeredObserver = observer.registeredObserver;
411 if (registeredObserver != null) {
412 int impact = registeredObserver.onHealthCheckFailed(
413 failingPackage, failureReason);
414 if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
415 && impact < currentObserverImpact) {
416 currentObserverToNotify = registeredObserver;
417 currentObserverImpact = impact;
418 }
419 }
420 }
421 if (currentObserverToNotify != null) {
422 currentObserverToNotify.execute(failingPackage, failureReason);
423 }
424 }
425
Gavin Corkeryaa57ef32019-12-17 19:02:54 +0000426 /**
427 * Called when the system server boots. If the system server is detected to be in a boot loop,
428 * query each observer and perform the mitigation action with the lowest user impact.
429 */
430 public void noteBoot() {
431 synchronized (mLock) {
432 if (mBootThreshold.incrementAndTest()) {
433 mBootThreshold.reset();
434 PackageHealthObserver currentObserverToNotify = null;
435 int currentObserverImpact = Integer.MAX_VALUE;
436 for (int i = 0; i < mAllObservers.size(); i++) {
437 final ObserverInternal observer = mAllObservers.valueAt(i);
438 PackageHealthObserver registeredObserver = observer.registeredObserver;
439 if (registeredObserver != null) {
440 int impact = registeredObserver.onBootLoop();
441 if (impact != PackageHealthObserverImpact.USER_IMPACT_NONE
442 && impact < currentObserverImpact) {
443 currentObserverToNotify = registeredObserver;
444 currentObserverImpact = impact;
445 }
446 }
447 }
448 if (currentObserverToNotify != null) {
449 currentObserverToNotify.executeBootLoopMitigation();
450 }
451 }
452 }
453 }
454
Zimuzo7c6c28f2019-04-05 12:03:10 +0100455 // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also
456 // avoid holding lock?
Zimuzo6efba542018-11-29 12:47:58 +0000457 // This currently adds about 7ms extra to shutdown thread
458 /** Writes the package information to file during shutdown. */
459 public void writeNow() {
Zimuzo7c6c28f2019-04-05 12:03:10 +0100460 synchronized (mLock) {
Zimuzo6997f3c2019-04-05 17:06:25 +0100461 // Must only run synchronous tasks as this runs on the ShutdownThread and no other
462 // thread is guaranteed to run during shutdown.
Zimuzo7c6c28f2019-04-05 12:03:10 +0100463 if (!mAllObservers.isEmpty()) {
jwwang1b85cce2019-08-27 15:39:19 +0800464 mLongTaskHandler.removeCallbacks(mSaveToFile);
Zimuzo6997f3c2019-04-05 17:06:25 +0100465 pruneObserversLocked();
Zimuzo7c6c28f2019-04-05 12:03:10 +0100466 saveToFile();
467 Slog.i(TAG, "Last write to update package durations");
468 }
Zimuzo6efba542018-11-29 12:47:58 +0000469 }
470 }
471
Zimuzocaa435e2019-03-20 11:16:06 +0000472 /**
473 * Enables or disables explicit health checks.
474 * <p> If explicit health checks are enabled, the health check service is started.
475 * <p> If explicit health checks are disabled, pending explicit health check requests are
476 * passed and the health check service is stopped.
477 */
shafik82962902019-04-24 10:31:54 +0100478 private void setExplicitHealthCheckEnabled(boolean enabled) {
Zimuzocaa435e2019-03-20 11:16:06 +0000479 synchronized (mLock) {
480 mIsHealthCheckEnabled = enabled;
481 mHealthCheckController.setEnabled(enabled);
Zimuzo6997f3c2019-04-05 17:06:25 +0100482 // Prune to update internal state whenever health check is enabled/disabled
483 syncState("health check state " + (enabled ? "enabled" : "disabled"));
Zimuzocaa435e2019-03-20 11:16:06 +0000484 }
485 }
486
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000487 /**
488 * This method should be only called on mShortTaskHandler, since it modifies
489 * {@link #mNumberOfNativeCrashPollsRemaining}.
490 */
491 private void checkAndMitigateNativeCrashes() {
492 mNumberOfNativeCrashPollsRemaining--;
493 // Check if native watchdog reported a crash
494 if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) {
495 // We rollback everything available when crash is unattributable
496 onPackageFailure(Collections.EMPTY_LIST, FAILURE_REASON_NATIVE_CRASH);
497 // we stop polling after an attempt to execute rollback, regardless of whether the
498 // attempt succeeds or not
499 } else {
500 if (mNumberOfNativeCrashPollsRemaining > 0) {
501 mShortTaskHandler.postDelayed(() -> checkAndMitigateNativeCrashes(),
502 NATIVE_CRASH_POLLING_INTERVAL_MILLIS);
503 }
504 }
505 }
506
507 /**
508 * Since this method can eventually trigger a rollback, it should be called
509 * only once boot has completed {@code onBootCompleted} and not earlier, because the install
510 * session must be entirely completed before we try to rollback.
511 */
512 public void scheduleCheckAndMitigateNativeCrashes() {
513 Slog.i(TAG, "Scheduling " + mNumberOfNativeCrashPollsRemaining + " polls to check "
514 + "and mitigate native crashes");
515 mShortTaskHandler.post(()->checkAndMitigateNativeCrashes());
516 }
517
Zimuzoe5009cd2019-01-23 18:11:58 +0000518 /** Possible severity values of the user impact of a {@link PackageHealthObserver#execute}. */
519 @Retention(SOURCE)
520 @IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_NONE,
521 PackageHealthObserverImpact.USER_IMPACT_LOW,
522 PackageHealthObserverImpact.USER_IMPACT_MEDIUM,
523 PackageHealthObserverImpact.USER_IMPACT_HIGH})
524 public @interface PackageHealthObserverImpact {
525 /** No action to take. */
526 int USER_IMPACT_NONE = 0;
527 /* Action has low user impact, user of a device will barely notice. */
528 int USER_IMPACT_LOW = 1;
529 /* Action has medium user impact, user of a device will likely notice. */
530 int USER_IMPACT_MEDIUM = 3;
531 /* Action has high user impact, a last resort, user of a device will be very frustrated. */
532 int USER_IMPACT_HIGH = 5;
533 }
534
Zimuzo6efba542018-11-29 12:47:58 +0000535 /** Register instances of this interface to receive notifications on package failure. */
536 public interface PackageHealthObserver {
537 /**
Zimuzo71d931e2019-02-01 13:08:16 +0000538 * Called when health check fails for the {@code versionedPackage}.
Zimuzoe5009cd2019-01-23 18:11:58 +0000539 *
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000540 * @param versionedPackage the package that is failing. This may be null if a native
541 * service is crashing.
542 * @param failureReason the type of failure that is occurring.
543 *
544 *
Zimuzoe5009cd2019-01-23 18:11:58 +0000545 * @return any one of {@link PackageHealthObserverImpact} to express the impact
546 * to the user on {@link #execute}
Zimuzo6efba542018-11-29 12:47:58 +0000547 */
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000548 @PackageHealthObserverImpact int onHealthCheckFailed(
549 @Nullable VersionedPackage versionedPackage,
550 @FailureReasons int failureReason);
Zimuzoe5009cd2019-01-23 18:11:58 +0000551
552 /**
553 * Executes mitigation for {@link #onHealthCheckFailed}.
554 *
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000555 * @param versionedPackage the package that is failing. This may be null if a native
556 * service is crashing.
557 * @param failureReason the type of failure that is occurring.
Zimuzoe5009cd2019-01-23 18:11:58 +0000558 * @return {@code true} if action was executed successfully, {@code false} otherwise
559 */
Gavin Corkeryf9b3fd42019-12-10 17:18:54 +0000560 boolean execute(@Nullable VersionedPackage versionedPackage,
561 @FailureReasons int failureReason);
Zimuzo6efba542018-11-29 12:47:58 +0000562
Gavin Corkeryaa57ef32019-12-17 19:02:54 +0000563
564 /**
565 * Called when the system server has booted several times within a window of time, defined
566 * by {@link #mBootThreshold}
567 */
568 default @PackageHealthObserverImpact int onBootLoop() {
569 return PackageHealthObserverImpact.USER_IMPACT_NONE;
570 }
571
572 /**
573 * Executes mitigation for {@link #onBootLoop}
574 */
575 default boolean executeBootLoopMitigation() {
576 return false;
577 }
578
Zimuzo9284e742019-02-22 12:09:28 +0000579 // TODO(b/120598832): Ensure uniqueness?
Zimuzo6efba542018-11-29 12:47:58 +0000580 /**
581 * Identifier for the observer, should not change across device updates otherwise the
582 * watchdog may drop observing packages with the old name.
583 */
584 String getName();
Gavin Corkery84e99022019-11-20 15:40:45 +0000585
586 /**
587 * An observer will not be pruned if this is set, even if the observer is not explicitly
588 * monitoring any packages.
589 */
590 default boolean isPersistent() {
591 return false;
592 }
Gavin Corkerybfb96ce2019-12-11 16:39:25 +0000593
594 /**
595 * Returns {@code true} if this observer wishes to observe the given package, {@code false}
596 * otherwise
597 *
598 * <p> A persistent observer may choose to start observing certain failing packages, even if
599 * it has not explicitly asked to watch the package with {@link #startObservingHealth}.
600 */
601 default boolean mayObservePackage(String packageName) {
602 return false;
603 }
Zimuzo6efba542018-11-29 12:47:58 +0000604 }
605
Zimuzodef38be2019-04-05 17:24:29 +0100606 long getTriggerFailureCount() {
607 synchronized (mLock) {
608 return mTriggerFailureCount;
609 }
610 }
611
Zimuzocaa435e2019-03-20 11:16:06 +0000612 /**
Zimuzocb148b22019-04-01 18:54:17 +0100613 * Serializes and syncs health check requests with the {@link ExplicitHealthCheckController}.
614 */
615 private void syncRequestsAsync() {
jwwang1b85cce2019-08-27 15:39:19 +0800616 mShortTaskHandler.removeCallbacks(mSyncRequests);
617 mShortTaskHandler.post(mSyncRequests);
Zimuzocb148b22019-04-01 18:54:17 +0100618 }
619
620 /**
621 * Syncs health check requests with the {@link ExplicitHealthCheckController}.
622 * Calls to this must be serialized.
623 *
624 * @see #syncRequestsAsync
625 */
626 private void syncRequests() {
627 Set<String> packages = null;
628 synchronized (mLock) {
629 if (mIsPackagesReady) {
630 packages = getPackagesPendingHealthChecksLocked();
631 } // else, we will sync requests when packages become ready
632 }
633
634 // Call outside lock to avoid holding lock when calling into the controller.
635 if (packages != null) {
Zimuzo6997f3c2019-04-05 17:06:25 +0100636 Slog.i(TAG, "Syncing health check requests for packages: " + packages);
Zimuzocb148b22019-04-01 18:54:17 +0100637 mHealthCheckController.syncRequests(packages);
638 }
639 }
640
641 /**
Zimuzocaa435e2019-03-20 11:16:06 +0000642 * Updates the observers monitoring {@code packageName} that explicit health check has passed.
643 *
644 * <p> This update is strictly for registered observers at the time of the call
645 * Observers that register after this signal will have no knowledge of prior signals and will
646 * effectively behave as if the explicit health check hasn't passed for {@code packageName}.
647 *
648 * <p> {@code packageName} can still be considered failed if reported by
Zimuzo6997f3c2019-04-05 17:06:25 +0100649 * {@link #onPackageFailureLocked} before the package expires.
Zimuzocaa435e2019-03-20 11:16:06 +0000650 *
651 * <p> Triggered by components outside the system server when they are fully functional after an
652 * update.
653 */
654 private void onHealthCheckPassed(String packageName) {
655 Slog.i(TAG, "Health check passed for package: " + packageName);
Zimuzo6997f3c2019-04-05 17:06:25 +0100656 boolean isStateChanged = false;
657
Zimuzocaa435e2019-03-20 11:16:06 +0000658 synchronized (mLock) {
659 for (int observerIdx = 0; observerIdx < mAllObservers.size(); observerIdx++) {
660 ObserverInternal observer = mAllObservers.valueAt(observerIdx);
JW Wangcc44fc62019-09-10 14:26:18 +0800661 MonitoredPackage monitoredPackage = observer.packages.get(packageName);
Zimuzo6997f3c2019-04-05 17:06:25 +0100662
663 if (monitoredPackage != null) {
664 int oldState = monitoredPackage.getHealthCheckStateLocked();
665 int newState = monitoredPackage.tryPassHealthCheckLocked();
666 isStateChanged |= oldState != newState;
Zimuzocaa435e2019-03-20 11:16:06 +0000667 }
668 }
669 }
Zimuzocb148b22019-04-01 18:54:17 +0100670
Zimuzo6997f3c2019-04-05 17:06:25 +0100671 if (isStateChanged) {
672 syncState("health check passed for " + packageName);
Zimuzocaa435e2019-03-20 11:16:06 +0000673 }
674 }
675
Zimuzo1a9aac72019-04-24 14:07:24 +0100676 private void onSupportedPackages(List<PackageConfig> supportedPackages) {
Zimuzo6997f3c2019-04-05 17:06:25 +0100677 boolean isStateChanged = false;
678
679 Map<String, Long> supportedPackageTimeouts = new ArrayMap<>();
Zimuzo1a9aac72019-04-24 14:07:24 +0100680 Iterator<PackageConfig> it = supportedPackages.iterator();
Zimuzo6997f3c2019-04-05 17:06:25 +0100681 while (it.hasNext()) {
Zimuzo1a9aac72019-04-24 14:07:24 +0100682 PackageConfig info = it.next();
Zimuzo6997f3c2019-04-05 17:06:25 +0100683 supportedPackageTimeouts.put(info.getPackageName(), info.getHealthCheckTimeoutMillis());
684 }
Zimuzocb148b22019-04-01 18:54:17 +0100685
686 synchronized (mLock) {
Zimuzo7c6c28f2019-04-05 12:03:10 +0100687 Slog.d(TAG, "Received supported packages " + supportedPackages);
Zimuzocb148b22019-04-01 18:54:17 +0100688 Iterator<ObserverInternal> oit = mAllObservers.values().iterator();
689 while (oit.hasNext()) {
JW Wangcc44fc62019-09-10 14:26:18 +0800690 Iterator<MonitoredPackage> pit = oit.next().packages.values().iterator();
Zimuzocb148b22019-04-01 18:54:17 +0100691 while (pit.hasNext()) {
692 MonitoredPackage monitoredPackage = pit.next();
Zimuzo6997f3c2019-04-05 17:06:25 +0100693 String packageName = monitoredPackage.getName();
694 int oldState = monitoredPackage.getHealthCheckStateLocked();
695 int newState;
Zimuzo7c6c28f2019-04-05 12:03:10 +0100696
Zimuzo6997f3c2019-04-05 17:06:25 +0100697 if (supportedPackageTimeouts.containsKey(packageName)) {
698 // Supported packages become ACTIVE if currently INACTIVE
699 newState = monitoredPackage.setHealthCheckActiveLocked(
700 supportedPackageTimeouts.get(packageName));
Zimuzo7c6c28f2019-04-05 12:03:10 +0100701 } else {
Zimuzo6997f3c2019-04-05 17:06:25 +0100702 // Unsupported packages are marked as PASSED unless already FAILED
703 newState = monitoredPackage.tryPassHealthCheckLocked();
Zimuzocb148b22019-04-01 18:54:17 +0100704 }
Zimuzo6997f3c2019-04-05 17:06:25 +0100705 isStateChanged |= oldState != newState;
Zimuzocb148b22019-04-01 18:54:17 +0100706 }
707 }
708 }
709
Zimuzo6997f3c2019-04-05 17:06:25 +0100710 if (isStateChanged) {
711 syncState("updated health check supported packages " + supportedPackages);
Zimuzo7c6c28f2019-04-05 12:03:10 +0100712 }
Zimuzocb148b22019-04-01 18:54:17 +0100713 }
714
Zimuzo6997f3c2019-04-05 17:06:25 +0100715 @GuardedBy("mLock")
Zimuzocb148b22019-04-01 18:54:17 +0100716 private Set<String> getPackagesPendingHealthChecksLocked() {
717 Slog.d(TAG, "Getting all observed packages pending health checks");
718 Set<String> packages = new ArraySet<>();
719 Iterator<ObserverInternal> oit = mAllObservers.values().iterator();
720 while (oit.hasNext()) {
721 ObserverInternal observer = oit.next();
722 Iterator<MonitoredPackage> pit =
JW Wangcc44fc62019-09-10 14:26:18 +0800723 observer.packages.values().iterator();
Zimuzocb148b22019-04-01 18:54:17 +0100724 while (pit.hasNext()) {
725 MonitoredPackage monitoredPackage = pit.next();
Zimuzo6997f3c2019-04-05 17:06:25 +0100726 String packageName = monitoredPackage.getName();
Zimuzod3d2ada2019-05-07 17:48:02 +0100727 if (monitoredPackage.isPendingHealthChecksLocked()) {
Zimuzocb148b22019-04-01 18:54:17 +0100728 packages.add(packageName);
729 }
730 }
731 }
732 return packages;
733 }
734
Zimuzo6997f3c2019-04-05 17:06:25 +0100735 /**
736 * Syncs the state of the observers.
737 *
738 * <p> Prunes all observers, saves new state to disk, syncs health check requests with the
739 * health check service and schedules the next state sync.
740 */
741 private void syncState(String reason) {
Zimuzo6efba542018-11-29 12:47:58 +0000742 synchronized (mLock) {
Zimuzo6997f3c2019-04-05 17:06:25 +0100743 Slog.i(TAG, "Syncing state, reason: " + reason);
744 pruneObserversLocked();
745
746 saveToFileAsync();
747 syncRequestsAsync();
748
749 // Done syncing state, schedule the next state sync
750 scheduleNextSyncStateLocked();
751 }
752 }
753
754 private void syncStateWithScheduledReason() {
755 syncState("scheduled");
756 }
757
758 @GuardedBy("mLock")
759 private void scheduleNextSyncStateLocked() {
760 long durationMs = getNextStateSyncMillisLocked();
jwwang1b85cce2019-08-27 15:39:19 +0800761 mShortTaskHandler.removeCallbacks(mSyncStateWithScheduledReason);
Zimuzo6997f3c2019-04-05 17:06:25 +0100762 if (durationMs == Long.MAX_VALUE) {
763 Slog.i(TAG, "Cancelling state sync, nothing to sync");
764 mUptimeAtLastStateSync = 0;
765 } else {
766 Slog.i(TAG, "Scheduling next state sync in " + durationMs + "ms");
JW Wang2d085672019-09-02 03:28:31 +0000767 mUptimeAtLastStateSync = mSystemClock.uptimeMillis();
jwwang1b85cce2019-08-27 15:39:19 +0800768 mShortTaskHandler.postDelayed(mSyncStateWithScheduledReason, durationMs);
Zimuzo6efba542018-11-29 12:47:58 +0000769 }
770 }
771
772 /**
Zimuzo6997f3c2019-04-05 17:06:25 +0100773 * Returns the next duration in millis to sync the watchdog state.
Zimuzo7c6c28f2019-04-05 12:03:10 +0100774 *
Zimuzo6efba542018-11-29 12:47:58 +0000775 * @returns Long#MAX_VALUE if there are no observed packages.
776 */
Zimuzo6997f3c2019-04-05 17:06:25 +0100777 @GuardedBy("mLock")
778 private long getNextStateSyncMillisLocked() {
Zimuzo6efba542018-11-29 12:47:58 +0000779 long shortestDurationMs = Long.MAX_VALUE;
Zimuzocfaed762019-01-03 21:13:01 +0000780 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
JW Wangcc44fc62019-09-10 14:26:18 +0800781 ArrayMap<String, MonitoredPackage> packages = mAllObservers.valueAt(oIndex).packages;
Zimuzocfaed762019-01-03 21:13:01 +0000782 for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
Zimuzo7c6c28f2019-04-05 12:03:10 +0100783 MonitoredPackage mp = packages.valueAt(pIndex);
Zimuzo6997f3c2019-04-05 17:06:25 +0100784 long duration = mp.getShortestScheduleDurationMsLocked();
Zimuzocfaed762019-01-03 21:13:01 +0000785 if (duration < shortestDurationMs) {
786 shortestDurationMs = duration;
Zimuzo6efba542018-11-29 12:47:58 +0000787 }
788 }
789 }
Zimuzo6efba542018-11-29 12:47:58 +0000790 return shortestDurationMs;
791 }
792
793 /**
Zimuzo6997f3c2019-04-05 17:06:25 +0100794 * Removes {@code elapsedMs} milliseconds from all durations on monitored packages
795 * and updates other internal state.
Zimuzo6efba542018-11-29 12:47:58 +0000796 */
Zimuzo6997f3c2019-04-05 17:06:25 +0100797 @GuardedBy("mLock")
798 private void pruneObserversLocked() {
799 long elapsedMs = mUptimeAtLastStateSync == 0
JW Wang2d085672019-09-02 03:28:31 +0000800 ? 0 : mSystemClock.uptimeMillis() - mUptimeAtLastStateSync;
Zimuzo6997f3c2019-04-05 17:06:25 +0100801 if (elapsedMs <= 0) {
802 Slog.i(TAG, "Not pruning observers, elapsed time: " + elapsedMs + "ms");
Zimuzo6efba542018-11-29 12:47:58 +0000803 return;
804 }
Zimuzo6997f3c2019-04-05 17:06:25 +0100805
806 Slog.i(TAG, "Removing " + elapsedMs + "ms from all packages on all observers");
807 Iterator<ObserverInternal> it = mAllObservers.values().iterator();
808 while (it.hasNext()) {
809 ObserverInternal observer = it.next();
810 Set<MonitoredPackage> failedPackages =
811 observer.prunePackagesLocked(elapsedMs);
812 if (!failedPackages.isEmpty()) {
813 onHealthCheckFailed(observer, failedPackages);
814 }
Gavin Corkery84e99022019-11-20 15:40:45 +0000815 if (observer.packages.isEmpty() && (observer.registeredObserver == null
816 || !observer.registeredObserver.isPersistent())) {
JW Wangcc44fc62019-09-10 14:26:18 +0800817 Slog.i(TAG, "Discarding observer " + observer.name + ". All packages expired");
Zimuzo6997f3c2019-04-05 17:06:25 +0100818 it.remove();
Zimuzo6efba542018-11-29 12:47:58 +0000819 }
820 }
Zimuzo6efba542018-11-29 12:47:58 +0000821 }
822
Zimuzocaa435e2019-03-20 11:16:06 +0000823 private void onHealthCheckFailed(ObserverInternal observer,
Zimuzo7c6c28f2019-04-05 12:03:10 +0100824 Set<MonitoredPackage> failedPackages) {
Zimuzocaa435e2019-03-20 11:16:06 +0000825 mLongTaskHandler.post(() -> {
Zimuzoef65fb82019-02-28 10:44:29 +0000826 synchronized (mLock) {
JW Wangcc44fc62019-09-10 14:26:18 +0800827 PackageHealthObserver registeredObserver = observer.registeredObserver;
Zimuzoef65fb82019-02-28 10:44:29 +0000828 if (registeredObserver != null) {
Zimuzo7c6c28f2019-04-05 12:03:10 +0100829 Iterator<MonitoredPackage> it = failedPackages.iterator();
830 while (it.hasNext()) {
JW Wang5d9ccae2019-11-28 10:48:41 +0800831 VersionedPackage versionedPkg = it.next().mPackage;
832 Slog.i(TAG, "Explicit health check failed for package " + versionedPkg);
Gavin Corkeryf305f4d2019-11-27 15:46:29 +0000833 registeredObserver.execute(versionedPkg,
834 PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
Zimuzoef65fb82019-02-28 10:44:29 +0000835 }
836 }
837 }
838 });
839 }
840
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900841 @Nullable
842 private VersionedPackage getVersionedPackage(String packageName) {
843 final PackageManager pm = mContext.getPackageManager();
JW Wang5f6719f2019-11-28 10:38:53 +0800844 if (pm == null || TextUtils.isEmpty(packageName)) {
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900845 return null;
846 }
847 try {
848 final long versionCode = pm.getPackageInfo(
849 packageName, 0 /* flags */).getLongVersionCode();
850 return new VersionedPackage(packageName, versionCode);
851 } catch (PackageManager.NameNotFoundException e) {
852 return null;
853 }
854 }
855
Zimuzo6efba542018-11-29 12:47:58 +0000856 /**
857 * Loads mAllObservers from file.
858 *
859 * <p>Note that this is <b>not</b> thread safe and should only called be called
860 * from the constructor.
861 */
862 private void loadFromFile() {
863 InputStream infile = null;
864 mAllObservers.clear();
865 try {
866 infile = mPolicyFile.openRead();
867 final XmlPullParser parser = Xml.newPullParser();
868 parser.setInput(infile, StandardCharsets.UTF_8.name());
869 XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG);
870 int outerDepth = parser.getDepth();
871 while (XmlUtils.nextElementWithin(parser, outerDepth)) {
Zimuzodef38be2019-04-05 17:24:29 +0100872 ObserverInternal observer = ObserverInternal.read(parser, this);
Zimuzo6efba542018-11-29 12:47:58 +0000873 if (observer != null) {
JW Wangcc44fc62019-09-10 14:26:18 +0800874 mAllObservers.put(observer.name, observer);
Zimuzo6efba542018-11-29 12:47:58 +0000875 }
876 }
877 } catch (FileNotFoundException e) {
878 // Nothing to monitor
Zimuzocfaed762019-01-03 21:13:01 +0000879 } catch (IOException | NumberFormatException | XmlPullParserException e) {
Zimuzocaa435e2019-03-20 11:16:06 +0000880 Slog.wtf(TAG, "Unable to read monitored packages, deleting file", e);
Zimuzocfaed762019-01-03 21:13:01 +0000881 mPolicyFile.delete();
Zimuzo6efba542018-11-29 12:47:58 +0000882 } finally {
883 IoUtils.closeQuietly(infile);
884 }
885 }
886
Matt Pape11010c22019-05-06 12:27:55 -0700887 /** Adds a {@link DeviceConfig#OnPropertiesChangedListener}. */
Zimuzodef38be2019-04-05 17:24:29 +0100888 private void setPropertyChangedListenerLocked() {
Matt Pape11010c22019-05-06 12:27:55 -0700889 DeviceConfig.addOnPropertiesChangedListener(
Zimuzodef38be2019-04-05 17:24:29 +0100890 DeviceConfig.NAMESPACE_ROLLBACK,
891 mContext.getMainExecutor(),
Matt Pape11010c22019-05-06 12:27:55 -0700892 (properties) -> {
893 if (!DeviceConfig.NAMESPACE_ROLLBACK.equals(properties.getNamespace())) {
Zimuzodef38be2019-04-05 17:24:29 +0100894 return;
895 }
896 updateConfigs();
897 });
898 }
899
900 /**
901 * Health check is enabled or disabled after reading the flags
902 * from DeviceConfig.
903 */
904 private void updateConfigs() {
905 synchronized (mLock) {
906 mTriggerFailureCount = DeviceConfig.getInt(
907 DeviceConfig.NAMESPACE_ROLLBACK,
908 PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT,
909 DEFAULT_TRIGGER_FAILURE_COUNT);
910 if (mTriggerFailureCount <= 0) {
911 mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT;
912 }
913
914 mTriggerFailureDurationMs = DeviceConfig.getInt(
915 DeviceConfig.NAMESPACE_ROLLBACK,
916 PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS,
917 DEFAULT_TRIGGER_FAILURE_DURATION_MS);
918 if (mTriggerFailureDurationMs <= 0) {
JW Wangdfa52682019-09-11 15:16:49 +0800919 mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS;
Zimuzodef38be2019-04-05 17:24:29 +0100920 }
921
922 setExplicitHealthCheckEnabled(DeviceConfig.getBoolean(
923 DeviceConfig.NAMESPACE_ROLLBACK,
924 PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED,
925 DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED));
926 }
927 }
928
Roshan Pius7e6f5f52019-06-27 14:14:40 -0700929 private void registerConnectivityModuleHealthListener() {
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900930 // TODO: have an internal method to trigger a rollback by reporting high severity errors,
931 // and rely on ActivityManager to inform the watchdog of severe network stack crashes
932 // instead of having this listener in parallel.
Roshan Pius7e6f5f52019-06-27 14:14:40 -0700933 mConnectivityModuleConnector.registerHealthListener(
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900934 packageName -> {
935 final VersionedPackage pkg = getVersionedPackage(packageName);
936 if (pkg == null) {
937 Slog.wtf(TAG, "NetworkStack failed but could not find its package");
938 return;
939 }
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900940 final List<VersionedPackage> pkgList = Collections.singletonList(pkg);
Gavin Corkeryc69ea402019-12-12 13:42:55 +0000941 onPackageFailure(pkgList, FAILURE_REASON_EXPLICIT_HEALTH_CHECK);
Remi NGUYEN VAN6a135102019-05-29 18:30:27 +0900942 });
943 }
944
Zimuzo6efba542018-11-29 12:47:58 +0000945 /**
Zimuzocfaed762019-01-03 21:13:01 +0000946 * Persists mAllObservers to file. Threshold information is ignored.
Zimuzo6efba542018-11-29 12:47:58 +0000947 */
948 private boolean saveToFile() {
Zimuzo6997f3c2019-04-05 17:06:25 +0100949 Slog.i(TAG, "Saving observer state to file");
Zimuzocfaed762019-01-03 21:13:01 +0000950 synchronized (mLock) {
951 FileOutputStream stream;
952 try {
953 stream = mPolicyFile.startWrite();
954 } catch (IOException e) {
955 Slog.w(TAG, "Cannot update monitored packages", e);
956 return false;
Zimuzo6efba542018-11-29 12:47:58 +0000957 }
Zimuzocfaed762019-01-03 21:13:01 +0000958
959 try {
960 XmlSerializer out = new FastXmlSerializer();
961 out.setOutput(stream, StandardCharsets.UTF_8.name());
962 out.startDocument(null, true);
963 out.startTag(null, TAG_PACKAGE_WATCHDOG);
964 out.attribute(null, ATTR_VERSION, Integer.toString(DB_VERSION));
965 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) {
Zimuzo6997f3c2019-04-05 17:06:25 +0100966 mAllObservers.valueAt(oIndex).writeLocked(out);
Zimuzocfaed762019-01-03 21:13:01 +0000967 }
968 out.endTag(null, TAG_PACKAGE_WATCHDOG);
969 out.endDocument();
970 mPolicyFile.finishWrite(stream);
971 return true;
972 } catch (IOException e) {
973 Slog.w(TAG, "Failed to save monitored packages, restoring backup", e);
974 mPolicyFile.failWrite(stream);
975 return false;
976 } finally {
977 IoUtils.closeQuietly(stream);
978 }
Zimuzo6efba542018-11-29 12:47:58 +0000979 }
980 }
981
Zimuzocfaed762019-01-03 21:13:01 +0000982 private void saveToFileAsync() {
jwwang1b85cce2019-08-27 15:39:19 +0800983 if (!mLongTaskHandler.hasCallbacks(mSaveToFile)) {
984 mLongTaskHandler.post(mSaveToFile);
Zimuzo7c6c28f2019-04-05 12:03:10 +0100985 }
Zimuzo6efba542018-11-29 12:47:58 +0000986 }
987
Gavin Corkeryefb3ff12019-12-02 18:15:24 +0000988 /** Dump status of every observer in mAllObservers. */
989 public void dump(IndentingPrintWriter pw) {
990 pw.println("Package Watchdog status");
991 pw.increaseIndent();
992 synchronized (mLock) {
993 for (String observerName : mAllObservers.keySet()) {
994 pw.println("Observer name: " + observerName);
995 pw.increaseIndent();
996 ObserverInternal observerInternal = mAllObservers.get(observerName);
997 observerInternal.dump(pw);
998 pw.decreaseIndent();
999 }
1000 }
1001 }
1002
Zimuzo6efba542018-11-29 12:47:58 +00001003 /**
1004 * Represents an observer monitoring a set of packages along with the failure thresholds for
1005 * each package.
Zimuzo7c6c28f2019-04-05 12:03:10 +01001006 *
1007 * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing
1008 * instances of this class.
Zimuzo6efba542018-11-29 12:47:58 +00001009 */
Zimuzo7c6c28f2019-04-05 12:03:10 +01001010 private static class ObserverInternal {
JW Wangcc44fc62019-09-10 14:26:18 +08001011 public final String name;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001012 @GuardedBy("mLock")
JW Wangcc44fc62019-09-10 14:26:18 +08001013 public final ArrayMap<String, MonitoredPackage> packages = new ArrayMap<>();
Zimuzo3eee4382019-01-08 20:42:39 +00001014 @Nullable
Zimuzo7c6c28f2019-04-05 12:03:10 +01001015 @GuardedBy("mLock")
JW Wangcc44fc62019-09-10 14:26:18 +08001016 public PackageHealthObserver registeredObserver;
Zimuzo6efba542018-11-29 12:47:58 +00001017
1018 ObserverInternal(String name, List<MonitoredPackage> packages) {
JW Wangcc44fc62019-09-10 14:26:18 +08001019 this.name = name;
Zimuzo6997f3c2019-04-05 17:06:25 +01001020 updatePackagesLocked(packages);
Zimuzo6efba542018-11-29 12:47:58 +00001021 }
1022
1023 /**
Zimuzo7c6c28f2019-04-05 12:03:10 +01001024 * Writes important {@link MonitoredPackage} details for this observer to file.
1025 * Does not persist any package failure thresholds.
Zimuzo6efba542018-11-29 12:47:58 +00001026 */
Zimuzo7c6c28f2019-04-05 12:03:10 +01001027 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001028 public boolean writeLocked(XmlSerializer out) {
Zimuzo6efba542018-11-29 12:47:58 +00001029 try {
1030 out.startTag(null, TAG_OBSERVER);
JW Wangcc44fc62019-09-10 14:26:18 +08001031 out.attribute(null, ATTR_NAME, name);
1032 for (int i = 0; i < packages.size(); i++) {
1033 MonitoredPackage p = packages.valueAt(i);
Zimuzo6997f3c2019-04-05 17:06:25 +01001034 p.writeLocked(out);
Zimuzo6efba542018-11-29 12:47:58 +00001035 }
1036 out.endTag(null, TAG_OBSERVER);
1037 return true;
1038 } catch (IOException e) {
1039 Slog.w(TAG, "Cannot save observer", e);
1040 return false;
1041 }
1042 }
1043
Zimuzo7c6c28f2019-04-05 12:03:10 +01001044 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001045 public void updatePackagesLocked(List<MonitoredPackage> packages) {
Zimuzo7c6c28f2019-04-05 12:03:10 +01001046 for (int pIndex = 0; pIndex < packages.size(); pIndex++) {
1047 MonitoredPackage p = packages.get(pIndex);
JW Wangcb9074a2019-11-27 18:41:00 +08001048 this.packages.put(p.getName(), p);
Zimuzo6efba542018-11-29 12:47:58 +00001049 }
1050 }
1051
1052 /**
1053 * Reduces the monitoring durations of all packages observed by this observer by
Zimuzo7c6c28f2019-04-05 12:03:10 +01001054 * {@code elapsedMs}. If any duration is less than 0, the package is removed from
1055 * observation. If any health check duration is less than 0, the health check result
1056 * is evaluated.
Zimuzo6efba542018-11-29 12:47:58 +00001057 *
Zimuzo6997f3c2019-04-05 17:06:25 +01001058 * @return a {@link Set} of packages that were removed from the observer without explicit
Zimuzoef65fb82019-02-28 10:44:29 +00001059 * health check passing, or an empty list if no package expired for which an explicit health
1060 * check was still pending
Zimuzo6efba542018-11-29 12:47:58 +00001061 */
Zimuzo7c6c28f2019-04-05 12:03:10 +01001062 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001063 private Set<MonitoredPackage> prunePackagesLocked(long elapsedMs) {
Zimuzo7c6c28f2019-04-05 12:03:10 +01001064 Set<MonitoredPackage> failedPackages = new ArraySet<>();
JW Wangcc44fc62019-09-10 14:26:18 +08001065 Iterator<MonitoredPackage> it = packages.values().iterator();
Zimuzo7c6c28f2019-04-05 12:03:10 +01001066 while (it.hasNext()) {
1067 MonitoredPackage p = it.next();
Zimuzo6997f3c2019-04-05 17:06:25 +01001068 int oldState = p.getHealthCheckStateLocked();
1069 int newState = p.handleElapsedTimeLocked(elapsedMs);
JW Wang15fb8832019-09-06 15:50:43 +08001070 if (oldState != HealthCheckState.FAILED
1071 && newState == HealthCheckState.FAILED) {
JW Wangcb9074a2019-11-27 18:41:00 +08001072 Slog.i(TAG, "Package " + p.getName() + " failed health check");
Zimuzo6997f3c2019-04-05 17:06:25 +01001073 failedPackages.add(p);
Zimuzo6efba542018-11-29 12:47:58 +00001074 }
Zimuzo6997f3c2019-04-05 17:06:25 +01001075 if (p.isExpiredLocked()) {
Zimuzo7c6c28f2019-04-05 12:03:10 +01001076 it.remove();
1077 }
Zimuzo6efba542018-11-29 12:47:58 +00001078 }
Zimuzo7c6c28f2019-04-05 12:03:10 +01001079 return failedPackages;
Zimuzo6efba542018-11-29 12:47:58 +00001080 }
1081
1082 /**
1083 * Increments failure counts of {@code packageName}.
1084 * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise
1085 */
Zimuzo7c6c28f2019-04-05 12:03:10 +01001086 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001087 public boolean onPackageFailureLocked(String packageName) {
Gavin Corkerybfb96ce2019-12-11 16:39:25 +00001088 if (packages.get(packageName) == null && registeredObserver.isPersistent()
1089 && registeredObserver.mayObservePackage(packageName)) {
1090 packages.put(packageName, sPackageWatchdog.newMonitoredPackage(
1091 packageName, DEFAULT_OBSERVING_DURATION_MS, false));
1092 }
JW Wangcc44fc62019-09-10 14:26:18 +08001093 MonitoredPackage p = packages.get(packageName);
Zimuzo7c6c28f2019-04-05 12:03:10 +01001094 if (p != null) {
Zimuzo6997f3c2019-04-05 17:06:25 +01001095 return p.onFailureLocked();
Zimuzo6efba542018-11-29 12:47:58 +00001096 }
Zimuzo7c6c28f2019-04-05 12:03:10 +01001097 return false;
Zimuzo6efba542018-11-29 12:47:58 +00001098 }
1099
1100 /**
1101 * Returns one ObserverInternal from the {@code parser} and advances its state.
1102 *
1103 * <p>Note that this method is <b>not</b> thread safe. It should only be called from
1104 * #loadFromFile which in turn is only called on construction of the
1105 * singleton PackageWatchdog.
1106 **/
Zimuzodef38be2019-04-05 17:24:29 +01001107 public static ObserverInternal read(XmlPullParser parser, PackageWatchdog watchdog) {
Zimuzo6efba542018-11-29 12:47:58 +00001108 String observerName = null;
1109 if (TAG_OBSERVER.equals(parser.getName())) {
1110 observerName = parser.getAttributeValue(null, ATTR_NAME);
1111 if (TextUtils.isEmpty(observerName)) {
Zimuzo9284e742019-02-22 12:09:28 +00001112 Slog.wtf(TAG, "Unable to read observer name");
Zimuzo6efba542018-11-29 12:47:58 +00001113 return null;
1114 }
1115 }
1116 List<MonitoredPackage> packages = new ArrayList<>();
1117 int innerDepth = parser.getDepth();
1118 try {
1119 while (XmlUtils.nextElementWithin(parser, innerDepth)) {
1120 if (TAG_PACKAGE.equals(parser.getName())) {
Zimuzo9284e742019-02-22 12:09:28 +00001121 try {
1122 String packageName = parser.getAttributeValue(null, ATTR_NAME);
1123 long duration = Long.parseLong(
1124 parser.getAttributeValue(null, ATTR_DURATION));
Zimuzo7c6c28f2019-04-05 12:03:10 +01001125 long healthCheckDuration = Long.parseLong(
1126 parser.getAttributeValue(null,
1127 ATTR_EXPLICIT_HEALTH_CHECK_DURATION));
Zimuzo9284e742019-02-22 12:09:28 +00001128 boolean hasPassedHealthCheck = Boolean.parseBoolean(
1129 parser.getAttributeValue(null, ATTR_PASSED_HEALTH_CHECK));
JW Wang5f6719f2019-11-28 10:38:53 +08001130 MonitoredPackage pkg = watchdog.newMonitoredPackage(packageName,
1131 duration, healthCheckDuration, hasPassedHealthCheck);
1132 if (pkg != null) {
1133 packages.add(pkg);
Zimuzo9284e742019-02-22 12:09:28 +00001134 }
1135 } catch (NumberFormatException e) {
1136 Slog.wtf(TAG, "Skipping package for observer " + observerName, e);
1137 continue;
Zimuzo6efba542018-11-29 12:47:58 +00001138 }
1139 }
1140 }
Zimuzo9284e742019-02-22 12:09:28 +00001141 } catch (XmlPullParserException | IOException e) {
1142 Slog.wtf(TAG, "Unable to read observer " + observerName, e);
Zimuzo6efba542018-11-29 12:47:58 +00001143 return null;
1144 }
1145 if (packages.isEmpty()) {
1146 return null;
1147 }
1148 return new ObserverInternal(observerName, packages);
1149 }
Gavin Corkeryefb3ff12019-12-02 18:15:24 +00001150
1151 /** Dumps information about this observer and the packages it watches. */
1152 public void dump(IndentingPrintWriter pw) {
1153 boolean isPersistent = registeredObserver != null && registeredObserver.isPersistent();
1154 pw.println("Persistent: " + isPersistent);
1155 for (String packageName : packages.keySet()) {
1156 MonitoredPackage p = packages.get(packageName);
1157 pw.println(packageName + ": ");
1158 pw.increaseIndent();
1159 pw.println("# Failures: " + p.mFailureHistory.size());
1160 pw.println("Monitoring duration remaining: " + p.mDurationMs + "ms");
1161 pw.println("Explicit health check duration: " + p.mHealthCheckDurationMs + "ms");
1162 pw.println("Health check state: " + p.toString(p.mHealthCheckState));
1163 pw.decreaseIndent();
1164 }
1165 }
Zimuzo6efba542018-11-29 12:47:58 +00001166 }
1167
JW Wang15fb8832019-09-06 15:50:43 +08001168 @Retention(SOURCE)
1169 @IntDef(value = {
1170 HealthCheckState.ACTIVE,
1171 HealthCheckState.INACTIVE,
1172 HealthCheckState.PASSED,
1173 HealthCheckState.FAILED})
1174 public @interface HealthCheckState {
1175 // The package has not passed health check but has requested a health check
1176 int ACTIVE = 0;
1177 // The package has not passed health check and has not requested a health check
1178 int INACTIVE = 1;
1179 // The package has passed health check
1180 int PASSED = 2;
1181 // The package has failed health check
1182 int FAILED = 3;
1183 }
1184
JW Wang5f6719f2019-11-28 10:38:53 +08001185 MonitoredPackage newMonitoredPackage(
1186 String name, long durationMs, boolean hasPassedHealthCheck) {
1187 return newMonitoredPackage(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck);
1188 }
1189
1190 MonitoredPackage newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs,
1191 boolean hasPassedHealthCheck) {
1192 VersionedPackage pkg = getVersionedPackage(name);
1193 if (pkg == null) {
1194 return null;
1195 }
1196 return new MonitoredPackage(pkg, durationMs, healthCheckDurationMs, hasPassedHealthCheck);
1197 }
1198
Zimuzo7c6c28f2019-04-05 12:03:10 +01001199 /**
Zimuzo6997f3c2019-04-05 17:06:25 +01001200 * Represents a package and its health check state along with the time
1201 * it should be monitored for.
Zimuzo7c6c28f2019-04-05 12:03:10 +01001202 *
1203 * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing
1204 * instances of this class.
1205 */
Zimuzodef38be2019-04-05 17:24:29 +01001206 class MonitoredPackage {
JW Wangcb9074a2019-11-27 18:41:00 +08001207 private final VersionedPackage mPackage;
JW Wang88525f72019-09-11 14:34:43 +08001208 // Times when package failures happen sorted in ascending order
1209 @GuardedBy("mLock")
1210 private final LongArrayQueue mFailureHistory = new LongArrayQueue();
Zimuzo6997f3c2019-04-05 17:06:25 +01001211 // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after
1212 // methods that could change the health check state: handleElapsedTimeLocked and
1213 // tryPassHealthCheckLocked
JW Wang15fb8832019-09-06 15:50:43 +08001214 private int mHealthCheckState = HealthCheckState.INACTIVE;
Zimuzo6997f3c2019-04-05 17:06:25 +01001215 // Whether an explicit health check has passed.
1216 // This value in addition with mHealthCheckDurationMs determines the health check state
1217 // of the package, see #getHealthCheckStateLocked
Zimuzo7c6c28f2019-04-05 12:03:10 +01001218 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001219 private boolean mHasPassedHealthCheck;
1220 // System uptime duration to monitor package.
Zimuzo7c6c28f2019-04-05 12:03:10 +01001221 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001222 private long mDurationMs;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001223 // System uptime duration to check the result of an explicit health check
1224 // Initially, MAX_VALUE until we get a value from the health check service
1225 // and request health checks.
Zimuzo6997f3c2019-04-05 17:06:25 +01001226 // This value in addition with mHasPassedHealthCheck determines the health check state
1227 // of the package, see #getHealthCheckStateLocked
Zimuzo7c6c28f2019-04-05 12:03:10 +01001228 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001229 private long mHealthCheckDurationMs = Long.MAX_VALUE;
Zimuzo6efba542018-11-29 12:47:58 +00001230
JW Wang5f6719f2019-11-28 10:38:53 +08001231 private MonitoredPackage(VersionedPackage pkg, long durationMs,
1232 long healthCheckDurationMs, boolean hasPassedHealthCheck) {
1233 mPackage = pkg;
Zimuzo6efba542018-11-29 12:47:58 +00001234 mDurationMs = durationMs;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001235 mHealthCheckDurationMs = healthCheckDurationMs;
Zimuzo9284e742019-02-22 12:09:28 +00001236 mHasPassedHealthCheck = hasPassedHealthCheck;
Zimuzo6997f3c2019-04-05 17:06:25 +01001237 updateHealthCheckStateLocked();
1238 }
1239
1240 /** Writes the salient fields to disk using {@code out}. */
1241 @GuardedBy("mLock")
1242 public void writeLocked(XmlSerializer out) throws IOException {
1243 out.startTag(null, TAG_PACKAGE);
JW Wangcb9074a2019-11-27 18:41:00 +08001244 out.attribute(null, ATTR_NAME, getName());
Zimuzo6997f3c2019-04-05 17:06:25 +01001245 out.attribute(null, ATTR_DURATION, String.valueOf(mDurationMs));
1246 out.attribute(null, ATTR_EXPLICIT_HEALTH_CHECK_DURATION,
1247 String.valueOf(mHealthCheckDurationMs));
1248 out.attribute(null, ATTR_PASSED_HEALTH_CHECK,
1249 String.valueOf(mHasPassedHealthCheck));
1250 out.endTag(null, TAG_PACKAGE);
Zimuzo6efba542018-11-29 12:47:58 +00001251 }
1252
1253 /**
1254 * Increment package failures or resets failure count depending on the last package failure.
1255 *
1256 * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise
1257 */
Zimuzo7c6c28f2019-04-05 12:03:10 +01001258 @GuardedBy("mLock")
Zimuzo6997f3c2019-04-05 17:06:25 +01001259 public boolean onFailureLocked() {
JW Wang88525f72019-09-11 14:34:43 +08001260 // Sliding window algorithm: find out if there exists a window containing failures >=
1261 // mTriggerFailureCount.
JW Wang2d085672019-09-02 03:28:31 +00001262 final long now = mSystemClock.uptimeMillis();
JW Wang88525f72019-09-11 14:34:43 +08001263 mFailureHistory.addLast(now);
1264 while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) {
1265 // Prune values falling out of the window
1266 mFailureHistory.removeFirst();
Zimuzo6efba542018-11-29 12:47:58 +00001267 }
JW Wang88525f72019-09-11 14:34:43 +08001268 boolean failed = mFailureHistory.size() >= mTriggerFailureCount;
Zimuzoe5009cd2019-01-23 18:11:58 +00001269 if (failed) {
JW Wang88525f72019-09-11 14:34:43 +08001270 mFailureHistory.clear();
Zimuzoe5009cd2019-01-23 18:11:58 +00001271 }
1272 return failed;
Zimuzo6efba542018-11-29 12:47:58 +00001273 }
Zimuzo7c6c28f2019-04-05 12:03:10 +01001274
1275 /**
Zimuzo6997f3c2019-04-05 17:06:25 +01001276 * Sets the initial health check duration.
1277 *
1278 * @return the new health check state
1279 */
1280 @GuardedBy("mLock")
1281 public int setHealthCheckActiveLocked(long initialHealthCheckDurationMs) {
1282 if (initialHealthCheckDurationMs <= 0) {
1283 Slog.wtf(TAG, "Cannot set non-positive health check duration "
JW Wangcb9074a2019-11-27 18:41:00 +08001284 + initialHealthCheckDurationMs + "ms for package " + getName()
Zimuzo6997f3c2019-04-05 17:06:25 +01001285 + ". Using total duration " + mDurationMs + "ms instead");
1286 initialHealthCheckDurationMs = mDurationMs;
1287 }
JW Wang15fb8832019-09-06 15:50:43 +08001288 if (mHealthCheckState == HealthCheckState.INACTIVE) {
Zimuzo6997f3c2019-04-05 17:06:25 +01001289 // Transitions to ACTIVE
1290 mHealthCheckDurationMs = initialHealthCheckDurationMs;
1291 }
1292 return updateHealthCheckStateLocked();
1293 }
1294
1295 /**
1296 * Updates the monitoring durations of the package.
1297 *
1298 * @return the new health check state
1299 */
1300 @GuardedBy("mLock")
1301 public int handleElapsedTimeLocked(long elapsedMs) {
1302 if (elapsedMs <= 0) {
JW Wangcb9074a2019-11-27 18:41:00 +08001303 Slog.w(TAG, "Cannot handle non-positive elapsed time for package " + getName());
Zimuzo6997f3c2019-04-05 17:06:25 +01001304 return mHealthCheckState;
1305 }
1306 // Transitions to FAILED if now <= 0 and health check not passed
1307 mDurationMs -= elapsedMs;
JW Wang15fb8832019-09-06 15:50:43 +08001308 if (mHealthCheckState == HealthCheckState.ACTIVE) {
Zimuzo6997f3c2019-04-05 17:06:25 +01001309 // We only update health check durations if we have #setHealthCheckActiveLocked
1310 // This ensures we don't leave the INACTIVE state for an unexpected elapsed time
1311 // Transitions to FAILED if now <= 0 and health check not passed
1312 mHealthCheckDurationMs -= elapsedMs;
1313 }
1314 return updateHealthCheckStateLocked();
1315 }
1316
1317 /**
JW Wang15fb8832019-09-06 15:50:43 +08001318 * Marks the health check as passed and transitions to {@link HealthCheckState.PASSED}
1319 * if not yet {@link HealthCheckState.FAILED}.
Zimuzo6997f3c2019-04-05 17:06:25 +01001320 *
JW Wang15fb8832019-09-06 15:50:43 +08001321 * @return the new {@link HealthCheckState health check state}
Zimuzo6997f3c2019-04-05 17:06:25 +01001322 */
1323 @GuardedBy("mLock")
JW Wang15fb8832019-09-06 15:50:43 +08001324 @HealthCheckState
Zimuzo6997f3c2019-04-05 17:06:25 +01001325 public int tryPassHealthCheckLocked() {
JW Wang15fb8832019-09-06 15:50:43 +08001326 if (mHealthCheckState != HealthCheckState.FAILED) {
Zimuzo6997f3c2019-04-05 17:06:25 +01001327 // FAILED is a final state so only pass if we haven't failed
1328 // Transition to PASSED
1329 mHasPassedHealthCheck = true;
1330 }
1331 return updateHealthCheckStateLocked();
1332 }
1333
1334 /** Returns the monitored package name. */
1335 private String getName() {
JW Wangcb9074a2019-11-27 18:41:00 +08001336 return mPackage.getPackageName();
Zimuzo6997f3c2019-04-05 17:06:25 +01001337 }
1338
Zimuzo6997f3c2019-04-05 17:06:25 +01001339 /**
JW Wang15fb8832019-09-06 15:50:43 +08001340 * Returns the current {@link HealthCheckState health check state}.
Zimuzo7c6c28f2019-04-05 12:03:10 +01001341 */
1342 @GuardedBy("mLock")
JW Wang15fb8832019-09-06 15:50:43 +08001343 @HealthCheckState
Zimuzo6997f3c2019-04-05 17:06:25 +01001344 public int getHealthCheckStateLocked() {
1345 return mHealthCheckState;
1346 }
1347
1348 /**
1349 * Returns the shortest duration before the package should be scheduled for a prune.
1350 *
1351 * @return the duration or {@link Long#MAX_VALUE} if the package should not be scheduled
1352 */
1353 @GuardedBy("mLock")
1354 public long getShortestScheduleDurationMsLocked() {
Zimuzod3d2ada2019-05-07 17:48:02 +01001355 // Consider health check duration only if #isPendingHealthChecksLocked is true
1356 return Math.min(toPositive(mDurationMs),
1357 isPendingHealthChecksLocked()
1358 ? toPositive(mHealthCheckDurationMs) : Long.MAX_VALUE);
Zimuzo6997f3c2019-04-05 17:06:25 +01001359 }
1360
1361 /**
1362 * Returns {@code true} if the total duration left to monitor the package is less than or
1363 * equal to 0 {@code false} otherwise.
1364 */
1365 @GuardedBy("mLock")
1366 public boolean isExpiredLocked() {
1367 return mDurationMs <= 0;
1368 }
1369
1370 /**
Zimuzod3d2ada2019-05-07 17:48:02 +01001371 * Returns {@code true} if the package, {@link #getName} is expecting health check results
1372 * {@code false} otherwise.
1373 */
1374 @GuardedBy("mLock")
1375 public boolean isPendingHealthChecksLocked() {
JW Wang15fb8832019-09-06 15:50:43 +08001376 return mHealthCheckState == HealthCheckState.ACTIVE
1377 || mHealthCheckState == HealthCheckState.INACTIVE;
Zimuzod3d2ada2019-05-07 17:48:02 +01001378 }
1379
1380 /**
Zimuzo6997f3c2019-04-05 17:06:25 +01001381 * Updates the health check state based on {@link #mHasPassedHealthCheck}
1382 * and {@link #mHealthCheckDurationMs}.
1383 *
JW Wang15fb8832019-09-06 15:50:43 +08001384 * @return the new {@link HealthCheckState health check state}
Zimuzo6997f3c2019-04-05 17:06:25 +01001385 */
1386 @GuardedBy("mLock")
JW Wang15fb8832019-09-06 15:50:43 +08001387 @HealthCheckState
Zimuzo6997f3c2019-04-05 17:06:25 +01001388 private int updateHealthCheckStateLocked() {
1389 int oldState = mHealthCheckState;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001390 if (mHasPassedHealthCheck) {
Zimuzo6997f3c2019-04-05 17:06:25 +01001391 // Set final state first to avoid ambiguity
JW Wang15fb8832019-09-06 15:50:43 +08001392 mHealthCheckState = HealthCheckState.PASSED;
Zimuzo6997f3c2019-04-05 17:06:25 +01001393 } else if (mHealthCheckDurationMs <= 0 || mDurationMs <= 0) {
1394 // Set final state first to avoid ambiguity
JW Wang15fb8832019-09-06 15:50:43 +08001395 mHealthCheckState = HealthCheckState.FAILED;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001396 } else if (mHealthCheckDurationMs == Long.MAX_VALUE) {
JW Wang15fb8832019-09-06 15:50:43 +08001397 mHealthCheckState = HealthCheckState.INACTIVE;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001398 } else {
JW Wang15fb8832019-09-06 15:50:43 +08001399 mHealthCheckState = HealthCheckState.ACTIVE;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001400 }
JW Wangcb9074a2019-11-27 18:41:00 +08001401 Slog.i(TAG, "Updated health check state for package " + getName() + ": "
Zimuzo6997f3c2019-04-05 17:06:25 +01001402 + toString(oldState) + " -> " + toString(mHealthCheckState));
1403 return mHealthCheckState;
1404 }
1405
1406 /** Returns a {@link String} representation of the current health check state. */
JW Wang15fb8832019-09-06 15:50:43 +08001407 private String toString(@HealthCheckState int state) {
Zimuzo6997f3c2019-04-05 17:06:25 +01001408 switch (state) {
JW Wang15fb8832019-09-06 15:50:43 +08001409 case HealthCheckState.ACTIVE:
Zimuzo6997f3c2019-04-05 17:06:25 +01001410 return "ACTIVE";
JW Wang15fb8832019-09-06 15:50:43 +08001411 case HealthCheckState.INACTIVE:
Zimuzo6997f3c2019-04-05 17:06:25 +01001412 return "INACTIVE";
JW Wang15fb8832019-09-06 15:50:43 +08001413 case HealthCheckState.PASSED:
Zimuzo6997f3c2019-04-05 17:06:25 +01001414 return "PASSED";
JW Wang15fb8832019-09-06 15:50:43 +08001415 case HealthCheckState.FAILED:
Zimuzo6997f3c2019-04-05 17:06:25 +01001416 return "FAILED";
1417 default:
1418 return "UNKNOWN";
1419 }
1420 }
1421
1422 /** Returns {@code value} if it is greater than 0 or {@link Long#MAX_VALUE} otherwise. */
Zimuzodef38be2019-04-05 17:24:29 +01001423 private long toPositive(long value) {
Zimuzo6997f3c2019-04-05 17:06:25 +01001424 return value > 0 ? value : Long.MAX_VALUE;
Zimuzo7c6c28f2019-04-05 12:03:10 +01001425 }
Zimuzo6efba542018-11-29 12:47:58 +00001426 }
Gavin Corkeryaa57ef32019-12-17 19:02:54 +00001427
1428 /**
1429 * Handles the thresholding logic for system server boots.
1430 */
1431 static class BootThreshold {
1432
1433 private final int mBootTriggerCount;
1434 private final long mTriggerWindow;
1435
1436 BootThreshold(int bootTriggerCount, long triggerWindow) {
1437 this.mBootTriggerCount = bootTriggerCount;
1438 this.mTriggerWindow = triggerWindow;
1439 }
1440
1441 public void reset() {
1442 setStart(0);
1443 setCount(0);
1444 }
1445
1446 private int getCount() {
1447 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
1448 }
1449
1450 private void setCount(int count) {
1451 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
1452 }
1453
1454 public long getStart() {
1455 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
1456 }
1457
1458 public void setStart(long start) {
1459 final long now = android.os.SystemClock.elapsedRealtime();
1460 final long newStart = MathUtils.constrain(start, 0, now);
1461 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(newStart));
1462 }
1463
1464 /** Increments the boot counter, and returns whether the device is bootlooping. */
1465 public boolean incrementAndTest() {
1466 final long now = android.os.SystemClock.elapsedRealtime();
1467 if (now - getStart() < 0) {
1468 Slog.e(TAG, "Window was less than zero. Resetting start to current time.");
1469 setStart(now);
1470 }
1471 final long window = now - getStart();
1472 if (window >= mTriggerWindow) {
1473 setCount(1);
1474 setStart(now);
1475 return false;
1476 } else {
1477 int count = getCount() + 1;
1478 setCount(count);
1479 EventLogTags.writeRescueNote(Process.ROOT_UID, count, window);
1480 return count >= mBootTriggerCount;
1481 }
1482 }
1483
1484 }
Zimuzo6efba542018-11-29 12:47:58 +00001485}