blob: 480b08a77cf7964804aa5a0b8c1e0932a70400a9 [file] [log] [blame]
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import android.content.ContentResolver;
20import android.content.Context;
21import android.content.pm.UserInfo;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070022import android.os.Build;
Jeff Sharkey1bec4482017-02-23 12:40:54 -070023import android.os.FileUtils;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070024import android.os.RecoverySystem;
25import android.os.SystemClock;
26import android.os.SystemProperties;
27import android.os.UserHandle;
28import android.os.UserManager;
29import android.provider.Settings;
30import android.text.format.DateUtils;
31import android.util.ExceptionUtils;
32import android.util.MathUtils;
33import android.util.Slog;
34import android.util.SparseArray;
35
36import com.android.internal.util.ArrayUtils;
37
Jeff Sharkey1bec4482017-02-23 12:40:54 -070038import java.io.File;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070039
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070040/**
41 * Utilities to help rescue the system from crash loops. Callers are expected to
42 * report boot events and persistent app crashes, and if they happen frequently
43 * enough this class will slowly escalate through several rescue operations
44 * before finally rebooting and prompting the user if they want to wipe data as
45 * a last resort.
46 *
47 * @hide
48 */
49public class RescueParty {
50 private static final String TAG = "RescueParty";
51
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070052 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070053 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
54 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
55 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
56
57 private static final int LEVEL_NONE = 0;
58 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
59 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
60 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
61 private static final int LEVEL_FACTORY_RESET = 4;
62
63 /** Threshold for boot loops */
64 private static final Threshold sBoot = new BootThreshold();
65 /** Threshold for app crash loops */
66 private static SparseArray<Threshold> sApps = new SparseArray<>();
67
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070068 private static boolean isDisabled() {
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070069 // We're disabled on all engineering devices
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070070 if (Build.IS_ENG) {
71 Slog.v(TAG, "Disabled because of eng build");
72 return true;
73 }
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070074
75 // We're disabled on userdebug devices connected over USB, since that's
76 // a decent signal that someone is actively trying to debug the device,
77 // or that it's in a lab environment.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070078 if (Build.IS_USERDEBUG && isUsbActive()) {
79 Slog.v(TAG, "Disabled because of active USB connection");
80 return true;
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070081 }
82
83 // One last-ditch check
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070084 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
85 Slog.v(TAG, "Disabled because of manual property");
86 return true;
87 }
88
89 return false;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070090 }
91
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070092 /**
93 * Take note of a boot event. If we notice too many of these events
94 * happening in rapid succession, we'll send out a rescue party.
95 */
96 public static void noteBoot(Context context) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070097 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070098 if (sBoot.incrementAndTest()) {
99 sBoot.reset();
100 incrementRescueLevel(sBoot.uid);
101 executeRescueLevel(context);
102 }
103 }
104
105 /**
106 * Take note of a persistent app crash. If we notice too many of these
107 * events happening in rapid succession, we'll send out a rescue party.
108 */
109 public static void notePersistentAppCrash(Context context, int uid) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700110 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700111 Threshold t = sApps.get(uid);
112 if (t == null) {
113 t = new AppThreshold(uid);
114 sApps.put(uid, t);
115 }
116 if (t.incrementAndTest()) {
117 t.reset();
118 incrementRescueLevel(t.uid);
119 executeRescueLevel(context);
120 }
121 }
122
123 /**
124 * Check if we're currently attempting to reboot for a factory reset.
125 */
126 public static boolean isAttemptingFactoryReset() {
127 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
128 }
129
130 /**
131 * Escalate to the next rescue level. After incrementing the level you'll
132 * probably want to call {@link #executeRescueLevel(Context)}.
133 */
134 private static void incrementRescueLevel(int triggerUid) {
135 final int level = MathUtils.constrain(
136 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
137 LEVEL_NONE, LEVEL_FACTORY_RESET);
138 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
139
140 EventLogTags.writeRescueLevel(level, triggerUid);
141 Slog.w(TAG, "Incremented rescue level to " + levelToString(level));
142 }
143
144 /**
145 * Called when {@code SettingsProvider} has been published, which is a good
146 * opportunity to reset any settings depending on our rescue level.
147 */
148 public static void onSettingsProviderPublished(Context context) {
149 executeRescueLevel(context);
150 }
151
152 private static void executeRescueLevel(Context context) {
153 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
154 if (level == LEVEL_NONE) return;
155
156 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
157 try {
158 executeRescueLevelInternal(context, level);
159 EventLogTags.writeRescueSuccess(level);
160 Slog.d(TAG, "Finished rescue level " + levelToString(level));
161 } catch (Throwable t) {
162 EventLogTags.writeRescueFailure(level, ExceptionUtils.getCompleteMessage(t));
163 Slog.e(TAG, "Failed rescue level " + levelToString(level), t);
164 }
165 }
166
167 private static void executeRescueLevelInternal(Context context, int level) throws Exception {
168 switch (level) {
169 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
170 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
171 break;
172 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
173 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
174 break;
175 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
176 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
177 break;
178 case LEVEL_FACTORY_RESET:
179 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
180 break;
181 }
182 }
183
184 private static void resetAllSettings(Context context, int mode) throws Exception {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700185 // Try our best to reset all settings possible, and once finished
186 // rethrow any exception that we encountered
187 Exception res = null;
188 final ContentResolver resolver = context.getContentResolver();
189 try {
190 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700191 } catch (Throwable t) {
192 res = new RuntimeException("Failed to reset global settings", t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700193 }
194 for (int userId : getAllUserIds(context)) {
195 try {
196 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700197 } catch (Throwable t) {
198 res = new RuntimeException("Failed to reset secure settings for " + userId, t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700199 }
200 }
201 if (res != null) {
202 throw res;
203 }
204 }
205
206 /**
207 * Threshold that can be triggered if a number of events occur within a
208 * window of time.
209 */
210 private abstract static class Threshold {
211 public abstract int getCount();
212 public abstract void setCount(int count);
213 public abstract long getStart();
214 public abstract void setStart(long start);
215
216 private final int uid;
217 private final int triggerCount;
218 private final long triggerWindow;
219
220 public Threshold(int uid, int triggerCount, long triggerWindow) {
221 this.uid = uid;
222 this.triggerCount = triggerCount;
223 this.triggerWindow = triggerWindow;
224 }
225
226 public void reset() {
227 setCount(0);
228 setStart(0);
229 }
230
231 /**
232 * @return if this threshold has been triggered
233 */
234 public boolean incrementAndTest() {
235 final long now = SystemClock.elapsedRealtime();
236 final long window = now - getStart();
237 if (window > triggerWindow) {
238 setCount(1);
239 setStart(now);
240 return false;
241 } else {
242 int count = getCount() + 1;
243 setCount(count);
244 EventLogTags.writeRescueNote(uid, count, window);
245 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
246 + (window / 1000) + " sec");
247 return (count >= triggerCount);
248 }
249 }
250 }
251
252 /**
253 * Specialization of {@link Threshold} for monitoring boot events. It stores
254 * counters in system properties for robustness.
255 */
256 private static class BootThreshold extends Threshold {
257 public BootThreshold() {
258 // We're interested in 5 events in any 300 second period; this
259 // window is super relaxed because booting can take a long time if
260 // forced to dexopt things.
261 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
262 }
263
264 @Override
265 public int getCount() {
266 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
267 }
268
269 @Override
270 public void setCount(int count) {
271 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
272 }
273
274 @Override
275 public long getStart() {
276 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
277 }
278
279 @Override
280 public void setStart(long start) {
281 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
282 }
283 }
284
285 /**
286 * Specialization of {@link Threshold} for monitoring app crashes. It stores
287 * counters in memory.
288 */
289 private static class AppThreshold extends Threshold {
290 private int count;
291 private long start;
292
293 public AppThreshold(int uid) {
294 // We're interested in 5 events in any 30 second period; apps crash
295 // pretty quickly so we can keep a tight leash on them.
296 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
297 }
298
299 @Override public int getCount() { return count; }
300 @Override public void setCount(int count) { this.count = count; }
301 @Override public long getStart() { return start; }
302 @Override public void setStart(long start) { this.start = start; }
303 }
304
305 private static int[] getAllUserIds(Context context) {
306 int[] userIds = { UserHandle.USER_SYSTEM };
307 try {
308 final UserManager um = context.getSystemService(UserManager.class);
309 for (UserInfo user : um.getUsers()) {
310 if (user.id != UserHandle.USER_SYSTEM) {
311 userIds = ArrayUtils.appendInt(userIds, user.id);
312 }
313 }
314 } catch (Throwable t) {
315 Slog.w(TAG, "Trouble discovering users", t);
316 }
317 return userIds;
318 }
319
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700320 /**
321 * Hacky test to check if the device has an active USB connection, which is
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700322 * a good proxy for someone doing local development work.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700323 */
324 private static boolean isUsbActive() {
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700325 try {
Jeff Sharkey1bec4482017-02-23 12:40:54 -0700326 final String state = FileUtils
327 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
328 return "CONFIGURED".equals(state.trim());
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700329 } catch (Throwable t) {
330 Slog.w(TAG, "Failed to determine if device was on USB", t);
331 return false;
332 }
333 }
334
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700335 private static String levelToString(int level) {
336 switch (level) {
337 case LEVEL_NONE: return "NONE";
338 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
339 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
340 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
341 case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
342 default: return Integer.toString(level);
343 }
344 }
345}