blob: 1924a861e002a8ab9f2ab6ab7420a41e25698cf5 [file] [log] [blame]
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import android.content.ContentResolver;
20import android.content.Context;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070021import android.os.Build;
Jeff Sharkey82311462017-04-02 23:42:17 -060022import android.os.Environment;
Jeff Sharkey1bec4482017-02-23 12:40:54 -070023import android.os.FileUtils;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070024import android.os.RecoverySystem;
25import android.os.SystemClock;
26import android.os.SystemProperties;
27import android.os.UserHandle;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070028import android.provider.Settings;
29import android.text.format.DateUtils;
30import android.util.ExceptionUtils;
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070031import android.util.Log;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070032import android.util.MathUtils;
33import android.util.Slog;
34import android.util.SparseArray;
35
36import com.android.internal.util.ArrayUtils;
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070037import com.android.server.pm.PackageManagerService;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070038
Jeff Sharkey1bec4482017-02-23 12:40:54 -070039import java.io.File;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070040
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070041/**
42 * Utilities to help rescue the system from crash loops. Callers are expected to
43 * report boot events and persistent app crashes, and if they happen frequently
44 * enough this class will slowly escalate through several rescue operations
45 * before finally rebooting and prompting the user if they want to wipe data as
46 * a last resort.
47 *
48 * @hide
49 */
50public class RescueParty {
51 private static final String TAG = "RescueParty";
52
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070053 private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070054 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070055 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
56 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
57 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
Jeff Sharkey9d640952017-06-26 19:57:16 -060058 private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070059
60 private static final int LEVEL_NONE = 0;
61 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
62 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
63 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
64 private static final int LEVEL_FACTORY_RESET = 4;
65
66 /** Threshold for boot loops */
67 private static final Threshold sBoot = new BootThreshold();
68 /** Threshold for app crash loops */
69 private static SparseArray<Threshold> sApps = new SparseArray<>();
70
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070071 private static boolean isDisabled() {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070072 // Check if we're explicitly enabled for testing
Jeff Sharkey82311462017-04-02 23:42:17 -060073 if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070074 return false;
75 }
76
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070077 // We're disabled on all engineering devices
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070078 if (Build.IS_ENG) {
79 Slog.v(TAG, "Disabled because of eng build");
80 return true;
81 }
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070082
83 // We're disabled on userdebug devices connected over USB, since that's
84 // a decent signal that someone is actively trying to debug the device,
85 // or that it's in a lab environment.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070086 if (Build.IS_USERDEBUG && isUsbActive()) {
87 Slog.v(TAG, "Disabled because of active USB connection");
88 return true;
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070089 }
90
91 // One last-ditch check
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070092 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
93 Slog.v(TAG, "Disabled because of manual property");
94 return true;
95 }
96
97 return false;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070098 }
99
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700100 /**
101 * Take note of a boot event. If we notice too many of these events
102 * happening in rapid succession, we'll send out a rescue party.
103 */
104 public static void noteBoot(Context context) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700105 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700106 if (sBoot.incrementAndTest()) {
107 sBoot.reset();
108 incrementRescueLevel(sBoot.uid);
109 executeRescueLevel(context);
110 }
111 }
112
113 /**
114 * Take note of a persistent app crash. If we notice too many of these
115 * events happening in rapid succession, we'll send out a rescue party.
116 */
117 public static void notePersistentAppCrash(Context context, int uid) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700118 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700119 Threshold t = sApps.get(uid);
120 if (t == null) {
121 t = new AppThreshold(uid);
122 sApps.put(uid, t);
123 }
124 if (t.incrementAndTest()) {
125 t.reset();
126 incrementRescueLevel(t.uid);
127 executeRescueLevel(context);
128 }
129 }
130
131 /**
132 * Check if we're currently attempting to reboot for a factory reset.
133 */
134 public static boolean isAttemptingFactoryReset() {
135 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
136 }
137
138 /**
139 * Escalate to the next rescue level. After incrementing the level you'll
140 * probably want to call {@link #executeRescueLevel(Context)}.
141 */
142 private static void incrementRescueLevel(int triggerUid) {
143 final int level = MathUtils.constrain(
144 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
145 LEVEL_NONE, LEVEL_FACTORY_RESET);
146 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
147
148 EventLogTags.writeRescueLevel(level, triggerUid);
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700149 PackageManagerService.logCriticalInfo(Log.WARN, "Incremented rescue level to "
150 + levelToString(level) + " triggered by UID " + triggerUid);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700151 }
152
153 /**
154 * Called when {@code SettingsProvider} has been published, which is a good
155 * opportunity to reset any settings depending on our rescue level.
156 */
157 public static void onSettingsProviderPublished(Context context) {
158 executeRescueLevel(context);
159 }
160
161 private static void executeRescueLevel(Context context) {
162 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
163 if (level == LEVEL_NONE) return;
164
165 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
166 try {
167 executeRescueLevelInternal(context, level);
168 EventLogTags.writeRescueSuccess(level);
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700169 PackageManagerService.logCriticalInfo(Log.DEBUG,
170 "Finished rescue level " + levelToString(level));
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700171 } catch (Throwable t) {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700172 final String msg = ExceptionUtils.getCompleteMessage(t);
173 EventLogTags.writeRescueFailure(level, msg);
174 PackageManagerService.logCriticalInfo(Log.ERROR,
175 "Failed rescue level " + levelToString(level) + ": " + msg);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700176 }
177 }
178
179 private static void executeRescueLevelInternal(Context context, int level) throws Exception {
180 switch (level) {
181 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
182 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
183 break;
184 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
185 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
186 break;
187 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
188 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
189 break;
190 case LEVEL_FACTORY_RESET:
191 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
192 break;
193 }
194 }
195
196 private static void resetAllSettings(Context context, int mode) throws Exception {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700197 // Try our best to reset all settings possible, and once finished
198 // rethrow any exception that we encountered
199 Exception res = null;
200 final ContentResolver resolver = context.getContentResolver();
201 try {
202 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700203 } catch (Throwable t) {
204 res = new RuntimeException("Failed to reset global settings", t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700205 }
Jeff Sharkey82311462017-04-02 23:42:17 -0600206 for (int userId : getAllUserIds()) {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700207 try {
208 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700209 } catch (Throwable t) {
210 res = new RuntimeException("Failed to reset secure settings for " + userId, t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700211 }
212 }
213 if (res != null) {
214 throw res;
215 }
216 }
217
218 /**
219 * Threshold that can be triggered if a number of events occur within a
220 * window of time.
221 */
222 private abstract static class Threshold {
223 public abstract int getCount();
224 public abstract void setCount(int count);
225 public abstract long getStart();
226 public abstract void setStart(long start);
227
228 private final int uid;
229 private final int triggerCount;
230 private final long triggerWindow;
231
232 public Threshold(int uid, int triggerCount, long triggerWindow) {
233 this.uid = uid;
234 this.triggerCount = triggerCount;
235 this.triggerWindow = triggerWindow;
236 }
237
238 public void reset() {
239 setCount(0);
240 setStart(0);
241 }
242
243 /**
244 * @return if this threshold has been triggered
245 */
246 public boolean incrementAndTest() {
247 final long now = SystemClock.elapsedRealtime();
248 final long window = now - getStart();
249 if (window > triggerWindow) {
250 setCount(1);
251 setStart(now);
252 return false;
253 } else {
254 int count = getCount() + 1;
255 setCount(count);
256 EventLogTags.writeRescueNote(uid, count, window);
257 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
258 + (window / 1000) + " sec");
259 return (count >= triggerCount);
260 }
261 }
262 }
263
264 /**
265 * Specialization of {@link Threshold} for monitoring boot events. It stores
266 * counters in system properties for robustness.
267 */
268 private static class BootThreshold extends Threshold {
269 public BootThreshold() {
270 // We're interested in 5 events in any 300 second period; this
271 // window is super relaxed because booting can take a long time if
272 // forced to dexopt things.
273 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
274 }
275
276 @Override
277 public int getCount() {
278 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
279 }
280
281 @Override
282 public void setCount(int count) {
283 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
284 }
285
286 @Override
287 public long getStart() {
288 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
289 }
290
291 @Override
292 public void setStart(long start) {
293 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
294 }
295 }
296
297 /**
298 * Specialization of {@link Threshold} for monitoring app crashes. It stores
299 * counters in memory.
300 */
301 private static class AppThreshold extends Threshold {
302 private int count;
303 private long start;
304
305 public AppThreshold(int uid) {
306 // We're interested in 5 events in any 30 second period; apps crash
307 // pretty quickly so we can keep a tight leash on them.
308 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
309 }
310
311 @Override public int getCount() { return count; }
312 @Override public void setCount(int count) { this.count = count; }
313 @Override public long getStart() { return start; }
314 @Override public void setStart(long start) { this.start = start; }
315 }
316
Jeff Sharkey82311462017-04-02 23:42:17 -0600317 private static int[] getAllUserIds() {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700318 int[] userIds = { UserHandle.USER_SYSTEM };
319 try {
Jeff Sharkey82311462017-04-02 23:42:17 -0600320 for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
321 try {
322 final int userId = Integer.parseInt(file.getName());
323 if (userId != UserHandle.USER_SYSTEM) {
324 userIds = ArrayUtils.appendInt(userIds, userId);
325 }
326 } catch (NumberFormatException ignored) {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700327 }
328 }
329 } catch (Throwable t) {
330 Slog.w(TAG, "Trouble discovering users", t);
331 }
332 return userIds;
333 }
334
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700335 /**
336 * Hacky test to check if the device has an active USB connection, which is
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700337 * a good proxy for someone doing local development work.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700338 */
339 private static boolean isUsbActive() {
Jeff Sharkey9d640952017-06-26 19:57:16 -0600340 if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
341 Slog.v(TAG, "Assuming virtual device is connected over USB");
342 return true;
343 }
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700344 try {
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700345 final String state = FileUtils
346 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
347 return "CONFIGURED".equals(state.trim());
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700348 } catch (Throwable t) {
349 Slog.w(TAG, "Failed to determine if device was on USB", t);
350 return false;
351 }
352 }
353
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700354 private static String levelToString(int level) {
355 switch (level) {
356 case LEVEL_NONE: return "NONE";
357 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
358 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
359 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
360 case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
361 default: return Integer.toString(level);
362 }
363 }
364}