blob: 33351ffc0f77ab4dc7b5d9fc03db9dad02fcba6a [file] [log] [blame]
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -07001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import android.content.ContentResolver;
20import android.content.Context;
21import android.content.pm.UserInfo;
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070022import android.os.BatteryManager;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070023import android.os.BatteryProperties;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070024import android.os.Build;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070025import android.os.IBatteryPropertiesListener;
26import android.os.IBatteryPropertiesRegistrar;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070027import android.os.RecoverySystem;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070028import android.os.ServiceManager;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070029import android.os.SystemClock;
30import android.os.SystemProperties;
31import android.os.UserHandle;
32import android.os.UserManager;
33import android.provider.Settings;
34import android.text.format.DateUtils;
35import android.util.ExceptionUtils;
36import android.util.MathUtils;
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070037import android.util.MutableBoolean;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070038import android.util.Slog;
39import android.util.SparseArray;
40
41import com.android.internal.util.ArrayUtils;
42
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070043import java.util.concurrent.CountDownLatch;
44import java.util.concurrent.TimeUnit;
45
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070046/**
47 * Utilities to help rescue the system from crash loops. Callers are expected to
48 * report boot events and persistent app crashes, and if they happen frequently
49 * enough this class will slowly escalate through several rescue operations
50 * before finally rebooting and prompting the user if they want to wipe data as
51 * a last resort.
52 *
53 * @hide
54 */
55public class RescueParty {
56 private static final String TAG = "RescueParty";
57
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070058 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070059 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
60 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
61 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
62
63 private static final int LEVEL_NONE = 0;
64 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
65 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
66 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
67 private static final int LEVEL_FACTORY_RESET = 4;
68
69 /** Threshold for boot loops */
70 private static final Threshold sBoot = new BootThreshold();
71 /** Threshold for app crash loops */
72 private static SparseArray<Threshold> sApps = new SparseArray<>();
73
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070074 private static boolean isDisabled() {
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070075 // We're disabled on all engineering devices
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070076 if (Build.IS_ENG) {
77 Slog.v(TAG, "Disabled because of eng build");
78 return true;
79 }
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070080
81 // We're disabled on userdebug devices connected over USB, since that's
82 // a decent signal that someone is actively trying to debug the device,
83 // or that it's in a lab environment.
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070084 if (Build.IS_USERDEBUG && isUsbActive()) {
85 Slog.v(TAG, "Disabled because of active USB connection");
86 return true;
Jeff Sharkeycdee83a2017-01-26 15:29:16 -070087 }
88
89 // One last-ditch check
Jeff Sharkeyd9574c72017-02-20 10:45:06 -070090 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
91 Slog.v(TAG, "Disabled because of manual property");
92 return true;
93 }
94
95 return false;
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -070096 }
97
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -070098 /**
99 * Take note of a boot event. If we notice too many of these events
100 * happening in rapid succession, we'll send out a rescue party.
101 */
102 public static void noteBoot(Context context) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700103 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700104 if (sBoot.incrementAndTest()) {
105 sBoot.reset();
106 incrementRescueLevel(sBoot.uid);
107 executeRescueLevel(context);
108 }
109 }
110
111 /**
112 * Take note of a persistent app crash. If we notice too many of these
113 * events happening in rapid succession, we'll send out a rescue party.
114 */
115 public static void notePersistentAppCrash(Context context, int uid) {
Jeff Sharkey9f1fc2d2017-01-24 11:05:16 -0700116 if (isDisabled()) return;
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700117 Threshold t = sApps.get(uid);
118 if (t == null) {
119 t = new AppThreshold(uid);
120 sApps.put(uid, t);
121 }
122 if (t.incrementAndTest()) {
123 t.reset();
124 incrementRescueLevel(t.uid);
125 executeRescueLevel(context);
126 }
127 }
128
129 /**
130 * Check if we're currently attempting to reboot for a factory reset.
131 */
132 public static boolean isAttemptingFactoryReset() {
133 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
134 }
135
136 /**
137 * Escalate to the next rescue level. After incrementing the level you'll
138 * probably want to call {@link #executeRescueLevel(Context)}.
139 */
140 private static void incrementRescueLevel(int triggerUid) {
141 final int level = MathUtils.constrain(
142 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
143 LEVEL_NONE, LEVEL_FACTORY_RESET);
144 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
145
146 EventLogTags.writeRescueLevel(level, triggerUid);
147 Slog.w(TAG, "Incremented rescue level to " + levelToString(level));
148 }
149
150 /**
151 * Called when {@code SettingsProvider} has been published, which is a good
152 * opportunity to reset any settings depending on our rescue level.
153 */
154 public static void onSettingsProviderPublished(Context context) {
155 executeRescueLevel(context);
156 }
157
158 private static void executeRescueLevel(Context context) {
159 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
160 if (level == LEVEL_NONE) return;
161
162 Slog.w(TAG, "Attempting rescue level " + levelToString(level));
163 try {
164 executeRescueLevelInternal(context, level);
165 EventLogTags.writeRescueSuccess(level);
166 Slog.d(TAG, "Finished rescue level " + levelToString(level));
167 } catch (Throwable t) {
168 EventLogTags.writeRescueFailure(level, ExceptionUtils.getCompleteMessage(t));
169 Slog.e(TAG, "Failed rescue level " + levelToString(level), t);
170 }
171 }
172
173 private static void executeRescueLevelInternal(Context context, int level) throws Exception {
174 switch (level) {
175 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
176 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
177 break;
178 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
179 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
180 break;
181 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
182 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
183 break;
184 case LEVEL_FACTORY_RESET:
185 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
186 break;
187 }
188 }
189
190 private static void resetAllSettings(Context context, int mode) throws Exception {
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700191 // Try our best to reset all settings possible, and once finished
192 // rethrow any exception that we encountered
193 Exception res = null;
194 final ContentResolver resolver = context.getContentResolver();
195 try {
196 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700197 } catch (Throwable t) {
198 res = new RuntimeException("Failed to reset global settings", t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700199 }
200 for (int userId : getAllUserIds(context)) {
201 try {
202 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700203 } catch (Throwable t) {
204 res = new RuntimeException("Failed to reset secure settings for " + userId, t);
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700205 }
206 }
207 if (res != null) {
208 throw res;
209 }
210 }
211
212 /**
213 * Threshold that can be triggered if a number of events occur within a
214 * window of time.
215 */
216 private abstract static class Threshold {
217 public abstract int getCount();
218 public abstract void setCount(int count);
219 public abstract long getStart();
220 public abstract void setStart(long start);
221
222 private final int uid;
223 private final int triggerCount;
224 private final long triggerWindow;
225
226 public Threshold(int uid, int triggerCount, long triggerWindow) {
227 this.uid = uid;
228 this.triggerCount = triggerCount;
229 this.triggerWindow = triggerWindow;
230 }
231
232 public void reset() {
233 setCount(0);
234 setStart(0);
235 }
236
237 /**
238 * @return if this threshold has been triggered
239 */
240 public boolean incrementAndTest() {
241 final long now = SystemClock.elapsedRealtime();
242 final long window = now - getStart();
243 if (window > triggerWindow) {
244 setCount(1);
245 setStart(now);
246 return false;
247 } else {
248 int count = getCount() + 1;
249 setCount(count);
250 EventLogTags.writeRescueNote(uid, count, window);
251 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
252 + (window / 1000) + " sec");
253 return (count >= triggerCount);
254 }
255 }
256 }
257
258 /**
259 * Specialization of {@link Threshold} for monitoring boot events. It stores
260 * counters in system properties for robustness.
261 */
262 private static class BootThreshold extends Threshold {
263 public BootThreshold() {
264 // We're interested in 5 events in any 300 second period; this
265 // window is super relaxed because booting can take a long time if
266 // forced to dexopt things.
267 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
268 }
269
270 @Override
271 public int getCount() {
272 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
273 }
274
275 @Override
276 public void setCount(int count) {
277 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
278 }
279
280 @Override
281 public long getStart() {
282 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
283 }
284
285 @Override
286 public void setStart(long start) {
287 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
288 }
289 }
290
291 /**
292 * Specialization of {@link Threshold} for monitoring app crashes. It stores
293 * counters in memory.
294 */
295 private static class AppThreshold extends Threshold {
296 private int count;
297 private long start;
298
299 public AppThreshold(int uid) {
300 // We're interested in 5 events in any 30 second period; apps crash
301 // pretty quickly so we can keep a tight leash on them.
302 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
303 }
304
305 @Override public int getCount() { return count; }
306 @Override public void setCount(int count) { this.count = count; }
307 @Override public long getStart() { return start; }
308 @Override public void setStart(long start) { this.start = start; }
309 }
310
311 private static int[] getAllUserIds(Context context) {
312 int[] userIds = { UserHandle.USER_SYSTEM };
313 try {
314 final UserManager um = context.getSystemService(UserManager.class);
315 for (UserInfo user : um.getUsers()) {
316 if (user.id != UserHandle.USER_SYSTEM) {
317 userIds = ArrayUtils.appendInt(userIds, user.id);
318 }
319 }
320 } catch (Throwable t) {
321 Slog.w(TAG, "Trouble discovering users", t);
322 }
323 return userIds;
324 }
325
Jeff Sharkeyd9574c72017-02-20 10:45:06 -0700326 /**
327 * Hacky test to check if the device has an active USB connection, which is
328 * a good proxy for someone doing local development work. It uses a low
329 * level call since we may not have started {@link BatteryManager} yet.
330 */
331 private static boolean isUsbActive() {
332 final MutableBoolean res = new MutableBoolean(false);
333 final CountDownLatch latch = new CountDownLatch(1);
334 final IBatteryPropertiesListener listener = new IBatteryPropertiesListener.Stub() {
335 @Override
336 public void batteryPropertiesChanged(BatteryProperties props) {
337 res.value = props.chargerUsbOnline;
338 latch.countDown();
339 }
340 };
341
342 try {
343 final IBatteryPropertiesRegistrar bpr = IBatteryPropertiesRegistrar.Stub
344 .asInterface(ServiceManager.getService("batteryproperties"));
345 bpr.registerListener(listener);
346 try {
347 latch.await(5, TimeUnit.SECONDS);
348 } finally {
349 bpr.unregisterListener(listener);
350 }
351 return res.value;
352 } catch (Throwable t) {
353 Slog.w(TAG, "Failed to determine if device was on USB", t);
354 return false;
355 }
356 }
357
Jeff Sharkeyfe6f85c2017-01-20 10:42:57 -0700358 private static String levelToString(int level) {
359 switch (level) {
360 case LEVEL_NONE: return "NONE";
361 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
362 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
363 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
364 case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
365 default: return Integer.toString(level);
366 }
367 }
368}