blob: fb1a962c0ef3738ce0de6c199fb4af0f7c073497 [file] [log] [blame]
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
Todd Kennedy7c4c55d2017-11-02 10:01:39 -070019import static com.android.server.pm.PackageManagerServiceUtils.logCriticalInfo;
20
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070021import android.content.ContentResolver;
22import android.content.Context;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070023import android.os.Build;
Jeff Sharkey82311462017-04-02 23:42:17 -060024import android.os.Environment;
Jeff Sharkey1bec4482017-02-23 12:40:54 -070025import android.os.FileUtils;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070026import android.os.RecoverySystem;
27import android.os.SystemClock;
28import android.os.SystemProperties;
29import android.os.UserHandle;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070030import android.provider.Settings;
31import android.text.format.DateUtils;
32import android.util.ExceptionUtils;
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070033import android.util.Log;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070034import android.util.MathUtils;
35import android.util.Slog;
36import android.util.SparseArray;
Christian Brunschenf86039e2018-12-21 12:26:14 +000037import android.util.StatsLog;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070038
bpetrivsa7101952019-02-07 16:01:24 +000039import com.android.internal.annotations.VisibleForTesting;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070040import com.android.internal.util.ArrayUtils;
bpetrivs93075f42019-02-28 12:08:12 +000041import com.android.server.am.SettingsToPropertiesMapper;
bpetrivs62f15982019-02-13 17:18:16 +000042import com.android.server.utils.FlagNamespaceUtils;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070043
Jeff Sharkey1bec4482017-02-23 12:40:54 -070044import java.io.File;
bpetrivs93075f42019-02-28 12:08:12 +000045import java.util.Arrays;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070046
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070047/**
48 * Utilities to help rescue the system from crash loops. Callers are expected to
49 * report boot events and persistent app crashes, and if they happen frequently
50 * enough this class will slowly escalate through several rescue operations
51 * before finally rebooting and prompting the user if they want to wipe data as
52 * a last resort.
53 *
54 * @hide
55 */
56public class RescueParty {
bpetrivsa7101952019-02-07 16:01:24 +000057 @VisibleForTesting
58 static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
59 @VisibleForTesting
60 static final int TRIGGER_COUNT = 5;
61 @VisibleForTesting
62 static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
63 @VisibleForTesting
64 static final int LEVEL_NONE = 0;
65 @VisibleForTesting
66 static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
67 @VisibleForTesting
68 static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
69 @VisibleForTesting
70 static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
71 @VisibleForTesting
72 static final int LEVEL_FACTORY_RESET = 4;
73 @VisibleForTesting
74 static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
bpetrivsd62daaf2019-04-30 11:20:09 +010075 /**
76 * The boot trigger window size must always be greater than Watchdog's deadlock timeout
77 * {@link Watchdog#DEFAULT_TIMEOUT}.
78 */
bpetrivsa7101952019-02-07 16:01:24 +000079 @VisibleForTesting
bpetrivsd62daaf2019-04-30 11:20:09 +010080 static final long BOOT_TRIGGER_WINDOW_MILLIS = 600 * DateUtils.SECOND_IN_MILLIS;
bpetrivsa7101952019-02-07 16:01:24 +000081 @VisibleForTesting
82 static final long PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS = 30 * DateUtils.SECOND_IN_MILLIS;
83 @VisibleForTesting
84 static final String TAG = "RescueParty";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070085
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070086 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070087 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
Jeff Sharkey9d640952017-06-26 19:57:16 -060088 private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070089
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070090 /** Threshold for boot loops */
91 private static final Threshold sBoot = new BootThreshold();
92 /** Threshold for app crash loops */
93 private static SparseArray<Threshold> sApps = new SparseArray<>();
94
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070095 private static boolean isDisabled() {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070096 // Check if we're explicitly enabled for testing
Jeff Sharkey82311462017-04-02 23:42:17 -060097 if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070098 return false;
99 }
100
Jeff Sharkeycdee83a2017-01-26 15:29:16 -0700101 // We're disabled on all engineering devices
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700102 if (Build.IS_ENG) {
103 Slog.v(TAG, "Disabled because of eng build");
104 return true;
105 }
Jeff Sharkeycdee83a2017-01-26 15:29:16 -0700106
107 // We're disabled on userdebug devices connected over USB, since that's
108 // a decent signal that someone is actively trying to debug the device,
109 // or that it's in a lab environment.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700110 if (Build.IS_USERDEBUG && isUsbActive()) {
111 Slog.v(TAG, "Disabled because of active USB connection");
112 return true;
Jeff Sharkeycdee83a2017-01-26 15:29:16 -0700113 }
114
115 // One last-ditch check
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700116 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
117 Slog.v(TAG, "Disabled because of manual property");
118 return true;
119 }
120
121 return false;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700122 }
123
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700124 /**
125 * Take note of a boot event. If we notice too many of these events
126 * happening in rapid succession, we'll send out a rescue party.
127 */
128 public static void noteBoot(Context context) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700129 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700130 if (sBoot.incrementAndTest()) {
131 sBoot.reset();
132 incrementRescueLevel(sBoot.uid);
133 executeRescueLevel(context);
134 }
135 }
136
137 /**
bpetrivs0254ff62019-03-01 11:50:45 +0000138 * Take note of a persistent app or apex module crash. If we notice too many of these
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700139 * events happening in rapid succession, we'll send out a rescue party.
140 */
bpetrivs0254ff62019-03-01 11:50:45 +0000141 public static void noteAppCrash(Context context, int uid) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700142 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700143 Threshold t = sApps.get(uid);
144 if (t == null) {
145 t = new AppThreshold(uid);
146 sApps.put(uid, t);
147 }
148 if (t.incrementAndTest()) {
149 t.reset();
150 incrementRescueLevel(t.uid);
151 executeRescueLevel(context);
152 }
153 }
154
155 /**
156 * Check if we're currently attempting to reboot for a factory reset.
157 */
158 public static boolean isAttemptingFactoryReset() {
159 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
160 }
161
162 /**
bpetrivsa7101952019-02-07 16:01:24 +0000163 * Called when {@code SettingsProvider} has been published, which is a good
164 * opportunity to reset any settings depending on our rescue level.
165 */
166 public static void onSettingsProviderPublished(Context context) {
bpetrivs93075f42019-02-28 12:08:12 +0000167 handleNativeRescuePartyResets();
bpetrivsa7101952019-02-07 16:01:24 +0000168 executeRescueLevel(context);
169 }
170
171 @VisibleForTesting
172 static void resetAllThresholds() {
173 sBoot.reset();
174
175 for (int i = 0; i < sApps.size(); i++) {
176 Threshold appThreshold = sApps.get(sApps.keyAt(i));
177 appThreshold.reset();
178 }
179 }
180
181 @VisibleForTesting
182 static long getElapsedRealtime() {
183 return SystemClock.elapsedRealtime();
184 }
185
bpetrivs93075f42019-02-28 12:08:12 +0000186 private static void handleNativeRescuePartyResets() {
187 if (SettingsToPropertiesMapper.isNativeFlagsResetPerformed()) {
188 FlagNamespaceUtils.resetDeviceConfig(Settings.RESET_MODE_TRUSTED_DEFAULTS,
189 Arrays.asList(SettingsToPropertiesMapper.getResetNativeCategories()));
190 }
191 }
192
bpetrivsa7101952019-02-07 16:01:24 +0000193 /**
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700194 * Escalate to the next rescue level. After incrementing the level you'll
195 * probably want to call {@link #executeRescueLevel(Context)}.
196 */
197 private static void incrementRescueLevel(int triggerUid) {
198 final int level = MathUtils.constrain(
199 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
200 LEVEL_NONE, LEVEL_FACTORY_RESET);
201 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
202
203 EventLogTags.writeRescueLevel(level, triggerUid);
Todd Kennedy7c4c55d2017-11-02 10:01:39 -0700204 logCriticalInfo(Log.WARN, "Incremented rescue level to "
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700205 + levelToString(level) + " triggered by UID " + triggerUid);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700206 }
207
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700208 private static void executeRescueLevel(Context context) {
209 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
210 if (level == LEVEL_NONE) return;
211
212 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
213 try {
214 executeRescueLevelInternal(context, level);
215 EventLogTags.writeRescueSuccess(level);
Todd Kennedy7c4c55d2017-11-02 10:01:39 -0700216 logCriticalInfo(Log.DEBUG,
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700217 "Finished rescue level " + levelToString(level));
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700218 } catch (Throwable t) {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700219 final String msg = ExceptionUtils.getCompleteMessage(t);
220 EventLogTags.writeRescueFailure(level, msg);
Todd Kennedy7c4c55d2017-11-02 10:01:39 -0700221 logCriticalInfo(Log.ERROR,
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700222 "Failed rescue level " + levelToString(level) + ": " + msg);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700223 }
224 }
225
226 private static void executeRescueLevelInternal(Context context, int level) throws Exception {
Christian Brunschenf86039e2018-12-21 12:26:14 +0000227 StatsLog.write(StatsLog.RESCUE_PARTY_RESET_REPORTED, level);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700228 switch (level) {
229 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
230 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
231 break;
232 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
233 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
234 break;
235 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
236 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
237 break;
238 case LEVEL_FACTORY_RESET:
239 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
240 break;
241 }
bpetrivs62f15982019-02-13 17:18:16 +0000242 FlagNamespaceUtils.addToKnownResetNamespaces(
243 FlagNamespaceUtils.NAMESPACE_NO_PACKAGE);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700244 }
245
246 private static void resetAllSettings(Context context, int mode) throws Exception {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700247 // Try our best to reset all settings possible, and once finished
248 // rethrow any exception that we encountered
249 Exception res = null;
250 final ContentResolver resolver = context.getContentResolver();
251 try {
bpetrivs62f15982019-02-13 17:18:16 +0000252 FlagNamespaceUtils.resetDeviceConfig(mode);
253 } catch (Exception e) {
254 res = new RuntimeException("Failed to reset config settings", e);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700255 }
bpetrivs0254ff62019-03-01 11:50:45 +0000256 try {
257 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
258 } catch (Exception e) {
259 res = new RuntimeException("Failed to reset global settings", e);
260 }
Jeff Sharkey82311462017-04-02 23:42:17 -0600261 for (int userId : getAllUserIds()) {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700262 try {
263 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
bpetrivs62f15982019-02-13 17:18:16 +0000264 } catch (Exception e) {
265 res = new RuntimeException("Failed to reset secure settings for " + userId, e);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700266 }
267 }
268 if (res != null) {
269 throw res;
270 }
271 }
272
273 /**
274 * Threshold that can be triggered if a number of events occur within a
275 * window of time.
276 */
277 private abstract static class Threshold {
278 public abstract int getCount();
279 public abstract void setCount(int count);
280 public abstract long getStart();
281 public abstract void setStart(long start);
282
283 private final int uid;
284 private final int triggerCount;
285 private final long triggerWindow;
286
287 public Threshold(int uid, int triggerCount, long triggerWindow) {
288 this.uid = uid;
289 this.triggerCount = triggerCount;
290 this.triggerWindow = triggerWindow;
291 }
292
293 public void reset() {
294 setCount(0);
295 setStart(0);
296 }
297
298 /**
299 * @return if this threshold has been triggered
300 */
301 public boolean incrementAndTest() {
bpetrivsa7101952019-02-07 16:01:24 +0000302 final long now = getElapsedRealtime();
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700303 final long window = now - getStart();
304 if (window > triggerWindow) {
305 setCount(1);
306 setStart(now);
307 return false;
308 } else {
309 int count = getCount() + 1;
310 setCount(count);
311 EventLogTags.writeRescueNote(uid, count, window);
312 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
313 + (window / 1000) + " sec");
314 return (count >= triggerCount);
315 }
316 }
317 }
318
319 /**
320 * Specialization of {@link Threshold} for monitoring boot events. It stores
321 * counters in system properties for robustness.
322 */
323 private static class BootThreshold extends Threshold {
324 public BootThreshold() {
bpetrivsa7101952019-02-07 16:01:24 +0000325 // We're interested in TRIGGER_COUNT events in any
326 // BOOT_TRIGGER_WINDOW_MILLIS second period; this window is super relaxed because
327 // booting can take a long time if forced to dexopt things.
328 super(android.os.Process.ROOT_UID, TRIGGER_COUNT, BOOT_TRIGGER_WINDOW_MILLIS);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700329 }
330
331 @Override
332 public int getCount() {
333 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
334 }
335
336 @Override
337 public void setCount(int count) {
338 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
339 }
340
341 @Override
342 public long getStart() {
343 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
344 }
345
346 @Override
347 public void setStart(long start) {
348 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
349 }
350 }
351
352 /**
353 * Specialization of {@link Threshold} for monitoring app crashes. It stores
354 * counters in memory.
355 */
356 private static class AppThreshold extends Threshold {
357 private int count;
358 private long start;
359
360 public AppThreshold(int uid) {
bpetrivsa7101952019-02-07 16:01:24 +0000361 // We're interested in TRIGGER_COUNT events in any
362 // PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS second period; apps crash pretty quickly
363 // so we can keep a tight leash on them.
364 super(uid, TRIGGER_COUNT, PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700365 }
366
367 @Override public int getCount() { return count; }
368 @Override public void setCount(int count) { this.count = count; }
369 @Override public long getStart() { return start; }
370 @Override public void setStart(long start) { this.start = start; }
371 }
372
Jeff Sharkey82311462017-04-02 23:42:17 -0600373 private static int[] getAllUserIds() {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700374 int[] userIds = { UserHandle.USER_SYSTEM };
375 try {
Jeff Sharkey82311462017-04-02 23:42:17 -0600376 for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
377 try {
378 final int userId = Integer.parseInt(file.getName());
379 if (userId != UserHandle.USER_SYSTEM) {
380 userIds = ArrayUtils.appendInt(userIds, userId);
381 }
382 } catch (NumberFormatException ignored) {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700383 }
384 }
385 } catch (Throwable t) {
386 Slog.w(TAG, "Trouble discovering users", t);
387 }
388 return userIds;
389 }
390
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700391 /**
392 * Hacky test to check if the device has an active USB connection, which is
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700393 * a good proxy for someone doing local development work.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700394 */
395 private static boolean isUsbActive() {
Jeff Sharkey9d640952017-06-26 19:57:16 -0600396 if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
397 Slog.v(TAG, "Assuming virtual device is connected over USB");
398 return true;
399 }
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700400 try {
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700401 final String state = FileUtils
402 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
403 return "CONFIGURED".equals(state.trim());
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700404 } catch (Throwable t) {
405 Slog.w(TAG, "Failed to determine if device was on USB", t);
406 return false;
407 }
408 }
409
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700410 private static String levelToString(int level) {
411 switch (level) {
412 case LEVEL_NONE: return "NONE";
413 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
414 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
415 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
416 case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
417 default: return Integer.toString(level);
418 }
419 }
420}