blob: f078acfd27a1ed8dae62f8b18a020893e85cebb9 [file] [log] [blame]
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import android.content.ContentResolver;
20import android.content.Context;
21import android.content.pm.UserInfo;
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070022import android.os.BatteryManager;
23import android.os.BatteryManagerInternal;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070024import android.os.Build;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070025import android.os.RecoverySystem;
26import android.os.SystemClock;
27import android.os.SystemProperties;
28import android.os.UserHandle;
29import android.os.UserManager;
30import android.provider.Settings;
31import android.text.format.DateUtils;
32import android.util.ExceptionUtils;
33import android.util.MathUtils;
34import android.util.Slog;
35import android.util.SparseArray;
36
37import com.android.internal.util.ArrayUtils;
38
39/**
40 * Utilities to help rescue the system from crash loops. Callers are expected to
41 * report boot events and persistent app crashes, and if they happen frequently
42 * enough this class will slowly escalate through several rescue operations
43 * before finally rebooting and prompting the user if they want to wipe data as
44 * a last resort.
45 *
46 * @hide
47 */
48public class RescueParty {
49 private static final String TAG = "RescueParty";
50
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070051 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070052 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
53 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
54 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
55
56 private static final int LEVEL_NONE = 0;
57 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
58 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
59 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
60 private static final int LEVEL_FACTORY_RESET = 4;
61
Jeff Sharkeyd67ab922017-01-24 15:26:19 -070062 private static final boolean DISABLE_RESET_SETTINGS = true;
63
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070064 /** Threshold for boot loops */
65 private static final Threshold sBoot = new BootThreshold();
66 /** Threshold for app crash loops */
67 private static SparseArray<Threshold> sApps = new SparseArray<>();
68
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070069 private static boolean isDisabled() {
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070070 // We're disabled on all engineering devices
71 if (Build.IS_ENG) return true;
72
73 // We're disabled on userdebug devices connected over USB, since that's
74 // a decent signal that someone is actively trying to debug the device,
75 // or that it's in a lab environment.
76 if (Build.IS_USERDEBUG) {
77 try {
78 if (LocalServices.getService(BatteryManagerInternal.class)
79 .getPlugType() == BatteryManager.BATTERY_PLUGGED_USB) {
80 return true;
81 } else {
82 }
83 } catch (Throwable ignored) {
84 }
85 }
86
87 // One last-ditch check
88 return SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false);
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070089 }
90
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070091 /**
92 * Take note of a boot event. If we notice too many of these events
93 * happening in rapid succession, we'll send out a rescue party.
94 */
95 public static void noteBoot(Context context) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070096 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070097 if (sBoot.incrementAndTest()) {
98 sBoot.reset();
99 incrementRescueLevel(sBoot.uid);
100 executeRescueLevel(context);
101 }
102 }
103
104 /**
105 * Take note of a persistent app crash. If we notice too many of these
106 * events happening in rapid succession, we'll send out a rescue party.
107 */
108 public static void notePersistentAppCrash(Context context, int uid) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700109 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700110 Threshold t = sApps.get(uid);
111 if (t == null) {
112 t = new AppThreshold(uid);
113 sApps.put(uid, t);
114 }
115 if (t.incrementAndTest()) {
116 t.reset();
117 incrementRescueLevel(t.uid);
118 executeRescueLevel(context);
119 }
120 }
121
122 /**
123 * Check if we're currently attempting to reboot for a factory reset.
124 */
125 public static boolean isAttemptingFactoryReset() {
126 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
127 }
128
129 /**
130 * Escalate to the next rescue level. After incrementing the level you'll
131 * probably want to call {@link #executeRescueLevel(Context)}.
132 */
133 private static void incrementRescueLevel(int triggerUid) {
134 final int level = MathUtils.constrain(
135 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
136 LEVEL_NONE, LEVEL_FACTORY_RESET);
137 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
138
139 EventLogTags.writeRescueLevel(level, triggerUid);
140 Slog.w(TAG, "Incremented rescue level to " + levelToString(level));
141 }
142
143 /**
144 * Called when {@code SettingsProvider} has been published, which is a good
145 * opportunity to reset any settings depending on our rescue level.
146 */
147 public static void onSettingsProviderPublished(Context context) {
148 executeRescueLevel(context);
149 }
150
151 private static void executeRescueLevel(Context context) {
152 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
153 if (level == LEVEL_NONE) return;
154
155 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
156 try {
157 executeRescueLevelInternal(context, level);
158 EventLogTags.writeRescueSuccess(level);
159 Slog.d(TAG, "Finished rescue level " + levelToString(level));
160 } catch (Throwable t) {
161 EventLogTags.writeRescueFailure(level, ExceptionUtils.getCompleteMessage(t));
162 Slog.e(TAG, "Failed rescue level " + levelToString(level), t);
163 }
164 }
165
166 private static void executeRescueLevelInternal(Context context, int level) throws Exception {
167 switch (level) {
168 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
169 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
170 break;
171 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
172 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
173 break;
174 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
175 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
176 break;
177 case LEVEL_FACTORY_RESET:
178 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
179 break;
180 }
181 }
182
183 private static void resetAllSettings(Context context, int mode) throws Exception {
Jeff Sharkeyd67ab922017-01-24 15:26:19 -0700184 if (DISABLE_RESET_SETTINGS) return;
185
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700186 // Try our best to reset all settings possible, and once finished
187 // rethrow any exception that we encountered
188 Exception res = null;
189 final ContentResolver resolver = context.getContentResolver();
190 try {
191 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
192 } catch (Exception e) {
193 res = new RuntimeException("Failed to reset global settings", e);
194 }
195 for (int userId : getAllUserIds(context)) {
196 try {
197 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
198 } catch (Exception e) {
199 res = new RuntimeException("Failed to reset secure settings for " + userId, e);
200 }
201 }
202 if (res != null) {
203 throw res;
204 }
205 }
206
207 /**
208 * Threshold that can be triggered if a number of events occur within a
209 * window of time.
210 */
211 private abstract static class Threshold {
212 public abstract int getCount();
213 public abstract void setCount(int count);
214 public abstract long getStart();
215 public abstract void setStart(long start);
216
217 private final int uid;
218 private final int triggerCount;
219 private final long triggerWindow;
220
221 public Threshold(int uid, int triggerCount, long triggerWindow) {
222 this.uid = uid;
223 this.triggerCount = triggerCount;
224 this.triggerWindow = triggerWindow;
225 }
226
227 public void reset() {
228 setCount(0);
229 setStart(0);
230 }
231
232 /**
233 * @return if this threshold has been triggered
234 */
235 public boolean incrementAndTest() {
236 final long now = SystemClock.elapsedRealtime();
237 final long window = now - getStart();
238 if (window > triggerWindow) {
239 setCount(1);
240 setStart(now);
241 return false;
242 } else {
243 int count = getCount() + 1;
244 setCount(count);
245 EventLogTags.writeRescueNote(uid, count, window);
246 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
247 + (window / 1000) + " sec");
248 return (count >= triggerCount);
249 }
250 }
251 }
252
253 /**
254 * Specialization of {@link Threshold} for monitoring boot events. It stores
255 * counters in system properties for robustness.
256 */
257 private static class BootThreshold extends Threshold {
258 public BootThreshold() {
259 // We're interested in 5 events in any 300 second period; this
260 // window is super relaxed because booting can take a long time if
261 // forced to dexopt things.
262 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
263 }
264
265 @Override
266 public int getCount() {
267 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
268 }
269
270 @Override
271 public void setCount(int count) {
272 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
273 }
274
275 @Override
276 public long getStart() {
277 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
278 }
279
280 @Override
281 public void setStart(long start) {
282 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
283 }
284 }
285
286 /**
287 * Specialization of {@link Threshold} for monitoring app crashes. It stores
288 * counters in memory.
289 */
290 private static class AppThreshold extends Threshold {
291 private int count;
292 private long start;
293
294 public AppThreshold(int uid) {
295 // We're interested in 5 events in any 30 second period; apps crash
296 // pretty quickly so we can keep a tight leash on them.
297 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
298 }
299
300 @Override public int getCount() { return count; }
301 @Override public void setCount(int count) { this.count = count; }
302 @Override public long getStart() { return start; }
303 @Override public void setStart(long start) { this.start = start; }
304 }
305
306 private static int[] getAllUserIds(Context context) {
307 int[] userIds = { UserHandle.USER_SYSTEM };
308 try {
309 final UserManager um = context.getSystemService(UserManager.class);
310 for (UserInfo user : um.getUsers()) {
311 if (user.id != UserHandle.USER_SYSTEM) {
312 userIds = ArrayUtils.appendInt(userIds, user.id);
313 }
314 }
315 } catch (Throwable t) {
316 Slog.w(TAG, "Trouble discovering users", t);
317 }
318 return userIds;
319 }
320
321 private static String levelToString(int level) {
322 switch (level) {
323 case LEVEL_NONE: return "NONE";
324 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
325 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
326 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
327 case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
328 default: return Integer.toString(level);
329 }
330 }
331}