blob: 3b36c3c0d2bc4675f83581094a88c436f8d12a65 [file] [log] [blame]
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import android.content.ContentResolver;
20import android.content.Context;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070021import android.os.Build;
Jeff Sharkey82311462017-04-02 23:42:17 -060022import android.os.Environment;
Jeff Sharkey1bec4482017-02-23 12:40:54 -070023import android.os.FileUtils;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070024import android.os.RecoverySystem;
25import android.os.SystemClock;
26import android.os.SystemProperties;
27import android.os.UserHandle;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070028import android.provider.Settings;
29import android.text.format.DateUtils;
30import android.util.ExceptionUtils;
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070031import android.util.Log;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070032import android.util.MathUtils;
33import android.util.Slog;
34import android.util.SparseArray;
35
36import com.android.internal.util.ArrayUtils;
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070037import com.android.server.pm.PackageManagerService;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070038
Jeff Sharkey1bec4482017-02-23 12:40:54 -070039import java.io.File;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070040
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070041/**
42 * Utilities to help rescue the system from crash loops. Callers are expected to
43 * report boot events and persistent app crashes, and if they happen frequently
44 * enough this class will slowly escalate through several rescue operations
45 * before finally rebooting and prompting the user if they want to wipe data as
46 * a last resort.
47 *
48 * @hide
49 */
50public class RescueParty {
51 private static final String TAG = "RescueParty";
52
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070053 private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070054 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070055 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
56 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
57 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
58
59 private static final int LEVEL_NONE = 0;
60 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
61 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
62 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
63 private static final int LEVEL_FACTORY_RESET = 4;
64
65 /** Threshold for boot loops */
66 private static final Threshold sBoot = new BootThreshold();
67 /** Threshold for app crash loops */
68 private static SparseArray<Threshold> sApps = new SparseArray<>();
69
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070070 private static boolean isDisabled() {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070071 // Check if we're explicitly enabled for testing
Jeff Sharkey82311462017-04-02 23:42:17 -060072 if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -070073 return false;
74 }
75
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070076 // We're disabled on all engineering devices
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070077 if (Build.IS_ENG) {
78 Slog.v(TAG, "Disabled because of eng build");
79 return true;
80 }
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070081
82 // We're disabled on userdebug devices connected over USB, since that's
83 // a decent signal that someone is actively trying to debug the device,
84 // or that it's in a lab environment.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070085 if (Build.IS_USERDEBUG && isUsbActive()) {
86 Slog.v(TAG, "Disabled because of active USB connection");
87 return true;
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070088 }
89
90 // One last-ditch check
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070091 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
92 Slog.v(TAG, "Disabled because of manual property");
93 return true;
94 }
95
96 return false;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070097 }
98
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070099 /**
100 * Take note of a boot event. If we notice too many of these events
101 * happening in rapid succession, we'll send out a rescue party.
102 */
103 public static void noteBoot(Context context) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700104 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700105 if (sBoot.incrementAndTest()) {
106 sBoot.reset();
107 incrementRescueLevel(sBoot.uid);
108 executeRescueLevel(context);
109 }
110 }
111
112 /**
113 * Take note of a persistent app crash. If we notice too many of these
114 * events happening in rapid succession, we'll send out a rescue party.
115 */
116 public static void notePersistentAppCrash(Context context, int uid) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700117 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700118 Threshold t = sApps.get(uid);
119 if (t == null) {
120 t = new AppThreshold(uid);
121 sApps.put(uid, t);
122 }
123 if (t.incrementAndTest()) {
124 t.reset();
125 incrementRescueLevel(t.uid);
126 executeRescueLevel(context);
127 }
128 }
129
130 /**
131 * Check if we're currently attempting to reboot for a factory reset.
132 */
133 public static boolean isAttemptingFactoryReset() {
134 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
135 }
136
137 /**
138 * Escalate to the next rescue level. After incrementing the level you'll
139 * probably want to call {@link #executeRescueLevel(Context)}.
140 */
141 private static void incrementRescueLevel(int triggerUid) {
142 final int level = MathUtils.constrain(
143 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
144 LEVEL_NONE, LEVEL_FACTORY_RESET);
145 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
146
147 EventLogTags.writeRescueLevel(level, triggerUid);
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700148 PackageManagerService.logCriticalInfo(Log.WARN, "Incremented rescue level to "
149 + levelToString(level) + " triggered by UID " + triggerUid);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700150 }
151
152 /**
153 * Called when {@code SettingsProvider} has been published, which is a good
154 * opportunity to reset any settings depending on our rescue level.
155 */
156 public static void onSettingsProviderPublished(Context context) {
157 executeRescueLevel(context);
158 }
159
160 private static void executeRescueLevel(Context context) {
161 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
162 if (level == LEVEL_NONE) return;
163
164 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
165 try {
166 executeRescueLevelInternal(context, level);
167 EventLogTags.writeRescueSuccess(level);
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700168 PackageManagerService.logCriticalInfo(Log.DEBUG,
169 "Finished rescue level " + levelToString(level));
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700170 } catch (Throwable t) {
Jeff Sharkeybc9caa12017-03-11 20:38:21 -0700171 final String msg = ExceptionUtils.getCompleteMessage(t);
172 EventLogTags.writeRescueFailure(level, msg);
173 PackageManagerService.logCriticalInfo(Log.ERROR,
174 "Failed rescue level " + levelToString(level) + ": " + msg);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700175 }
176 }
177
178 private static void executeRescueLevelInternal(Context context, int level) throws Exception {
179 switch (level) {
180 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
181 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
182 break;
183 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
184 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
185 break;
186 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
187 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
188 break;
189 case LEVEL_FACTORY_RESET:
190 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
191 break;
192 }
193 }
194
195 private static void resetAllSettings(Context context, int mode) throws Exception {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700196 // Try our best to reset all settings possible, and once finished
197 // rethrow any exception that we encountered
198 Exception res = null;
199 final ContentResolver resolver = context.getContentResolver();
200 try {
201 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700202 } catch (Throwable t) {
203 res = new RuntimeException("Failed to reset global settings", t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700204 }
Jeff Sharkey82311462017-04-02 23:42:17 -0600205 for (int userId : getAllUserIds()) {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700206 try {
207 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700208 } catch (Throwable t) {
209 res = new RuntimeException("Failed to reset secure settings for " + userId, t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700210 }
211 }
212 if (res != null) {
213 throw res;
214 }
215 }
216
217 /**
218 * Threshold that can be triggered if a number of events occur within a
219 * window of time.
220 */
221 private abstract static class Threshold {
222 public abstract int getCount();
223 public abstract void setCount(int count);
224 public abstract long getStart();
225 public abstract void setStart(long start);
226
227 private final int uid;
228 private final int triggerCount;
229 private final long triggerWindow;
230
231 public Threshold(int uid, int triggerCount, long triggerWindow) {
232 this.uid = uid;
233 this.triggerCount = triggerCount;
234 this.triggerWindow = triggerWindow;
235 }
236
237 public void reset() {
238 setCount(0);
239 setStart(0);
240 }
241
242 /**
243 * @return if this threshold has been triggered
244 */
245 public boolean incrementAndTest() {
246 final long now = SystemClock.elapsedRealtime();
247 final long window = now - getStart();
248 if (window > triggerWindow) {
249 setCount(1);
250 setStart(now);
251 return false;
252 } else {
253 int count = getCount() + 1;
254 setCount(count);
255 EventLogTags.writeRescueNote(uid, count, window);
256 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
257 + (window / 1000) + " sec");
258 return (count >= triggerCount);
259 }
260 }
261 }
262
263 /**
264 * Specialization of {@link Threshold} for monitoring boot events. It stores
265 * counters in system properties for robustness.
266 */
267 private static class BootThreshold extends Threshold {
268 public BootThreshold() {
269 // We're interested in 5 events in any 300 second period; this
270 // window is super relaxed because booting can take a long time if
271 // forced to dexopt things.
272 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
273 }
274
275 @Override
276 public int getCount() {
277 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
278 }
279
280 @Override
281 public void setCount(int count) {
282 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
283 }
284
285 @Override
286 public long getStart() {
287 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
288 }
289
290 @Override
291 public void setStart(long start) {
292 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
293 }
294 }
295
296 /**
297 * Specialization of {@link Threshold} for monitoring app crashes. It stores
298 * counters in memory.
299 */
300 private static class AppThreshold extends Threshold {
301 private int count;
302 private long start;
303
304 public AppThreshold(int uid) {
305 // We're interested in 5 events in any 30 second period; apps crash
306 // pretty quickly so we can keep a tight leash on them.
307 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
308 }
309
310 @Override public int getCount() { return count; }
311 @Override public void setCount(int count) { this.count = count; }
312 @Override public long getStart() { return start; }
313 @Override public void setStart(long start) { this.start = start; }
314 }
315
Jeff Sharkey82311462017-04-02 23:42:17 -0600316 private static int[] getAllUserIds() {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700317 int[] userIds = { UserHandle.USER_SYSTEM };
318 try {
Jeff Sharkey82311462017-04-02 23:42:17 -0600319 for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
320 try {
321 final int userId = Integer.parseInt(file.getName());
322 if (userId != UserHandle.USER_SYSTEM) {
323 userIds = ArrayUtils.appendInt(userIds, userId);
324 }
325 } catch (NumberFormatException ignored) {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700326 }
327 }
328 } catch (Throwable t) {
329 Slog.w(TAG, "Trouble discovering users", t);
330 }
331 return userIds;
332 }
333
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700334 /**
335 * Hacky test to check if the device has an active USB connection, which is
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700336 * a good proxy for someone doing local development work.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700337 */
338 private static boolean isUsbActive() {
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700339 try {
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700340 final String state = FileUtils
341 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
342 return "CONFIGURED".equals(state.trim());
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700343 } catch (Throwable t) {
344 Slog.w(TAG, "Failed to determine if device was on USB", t);
345 return false;
346 }
347 }
348
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700349 private static String levelToString(int level) {
350 switch (level) {
351 case LEVEL_NONE: return "NONE";
352 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
353 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
354 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
355 case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
356 default: return Integer.toString(level);
357 }
358 }
359}