blob: 06dc9188d1ee39d0a101d43a10db4d23ce52398c [file] [log] [blame]
Zimuzo6efba542018-11-29 12:47:58 +00001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import android.content.Context;
20import android.os.Environment;
21import android.os.Handler;
22import android.os.HandlerThread;
23import android.os.Looper;
24import android.os.Message;
25import android.os.Process;
26import android.os.SystemClock;
27import android.text.TextUtils;
28import android.util.ArrayMap;
29import android.util.AtomicFile;
30import android.util.Log;
31import android.util.Slog;
32import android.util.Xml;
33
34import com.android.internal.annotations.GuardedBy;
35import com.android.internal.util.FastXmlSerializer;
36import com.android.internal.util.XmlUtils;
37
38import libcore.io.IoUtils;
39
40import org.xmlpull.v1.XmlPullParser;
41import org.xmlpull.v1.XmlPullParserException;
42import org.xmlpull.v1.XmlSerializer;
43
44import java.io.File;
45import java.io.FileNotFoundException;
46import java.io.FileOutputStream;
47import java.io.IOException;
48import java.io.InputStream;
49import java.nio.charset.StandardCharsets;
50import java.util.ArrayList;
51import java.util.Iterator;
52import java.util.List;
53import java.util.Map;
54import java.util.concurrent.TimeUnit;
55
56/**
57 * Monitors the health of packages on the system and notifies interested observers when packages
58 * fail. All registered observers will be notified until an observer takes a mitigation action.
59 */
60public class PackageWatchdog {
61 private static final String TAG = "PackageWatchdog";
62 // Duration to count package failures before it resets to 0
63 private static final int TRIGGER_DURATION_MS = 60000;
64 // Number of package failures within the duration above before we notify observers
65 private static final int TRIGGER_FAILURE_COUNT = 5;
66 private static final int DB_VERSION = 1;
67 private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog";
68 private static final String TAG_PACKAGE = "package";
69 private static final String TAG_OBSERVER = "observer";
70 private static final String ATTR_VERSION = "version";
71 private static final String ATTR_NAME = "name";
72 private static final String ATTR_DURATION = "duration";
73 private static final int MESSAGE_SAVE_FILE = 1;
74
75 private static PackageWatchdog sPackageWatchdog;
76
77 private final Object mLock = new Object();
78 // System server context
79 private final Context mContext;
80 // Handler to run package cleanup runnables
81 private final Handler mTimerHandler;
82 private final HandlerThread mIoThread = new HandlerThread("package_watchdog_io",
83 Process.THREAD_PRIORITY_BACKGROUND);
84 private final Handler mIoHandler;
85 // Maps observer names to package observers that have been registered since the last boot
86 @GuardedBy("mLock")
87 final Map<String, PackageHealthObserver> mRegisteredObservers = new ArrayMap<>();
88 // Maps observer names to internal observers (registered or not) loaded from file
89 @GuardedBy("mLock")
90 final Map<String, ObserverInternal> mAllObservers = new ArrayMap<>();
91 // /data/system/ directory
92 private final File mSystemDir = new File(Environment.getDataDirectory(), "system");
93 // File containing the XML data of monitored packages
94 private final AtomicFile mPolicyFile =
95 new AtomicFile(new File(mSystemDir, "package-watchdog.xml"));
96 // Runnable to prune monitored packages that have expired
97 private final Runnable mPackageCleanup;
98 // Last SystemClock#uptimeMillis a package clean up was executed.
99 // 0 if mPackageCleanup not running.
100 private long mUptimeAtLastRescheduleMs;
101 // Duration a package cleanup was last scheduled for.
102 // 0 if mPackageCleanup not running.
103 private long mDurationAtLastReschedule;
104
105 private PackageWatchdog(Context context) {
106 mContext = context;
107 mTimerHandler = new Handler(Looper.myLooper());
108 mIoThread.start();
109 mIoHandler = new IoHandler(mIoThread.getLooper());
110 mPackageCleanup = this::rescheduleCleanup;
111 loadFromFile();
112 }
113
114 /** Creates or gets singleton instance of PackageWatchdog. */
115 public static synchronized PackageWatchdog getInstance(Context context) {
116 if (sPackageWatchdog == null) {
117 sPackageWatchdog = new PackageWatchdog(context);
118 }
119 return sPackageWatchdog;
120 }
121
122 /**
123 * Registers {@code observer} to listen for package failures
124 *
125 * <p>Observers are expected to call this on boot. It does not specify any packages but
126 * it will resume observing any packages requested from a previous boot.
127 */
128 public void registerHealthObserver(PackageHealthObserver observer) {
129 synchronized (mLock) {
130 mRegisteredObservers.put(observer.getName(), observer);
131 if (mDurationAtLastReschedule == 0) {
132 // Nothing running, schedule
133 rescheduleCleanup();
134 }
135 }
136 }
137
138 /**
139 * Starts observing the health of the {@code packages} for {@code observer} and notifies
140 * {@code observer} of any package failures within the monitoring duration.
141 *
142 * <p>If {@code observer} is already monitoring a package in {@code packageNames},
143 * the monitoring window of that package will be reset to {@code hours}.
144 *
145 * @throws IllegalArgumentException if {@code packageNames} is empty
146 * or {@code hours} is less than 1
147 */
148 public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames,
149 int hours) {
150 if (packageNames.isEmpty() || hours < 1) {
151 throw new IllegalArgumentException("Observation not started, no packages specified"
152 + "or invalid hours");
153 }
154 long durationMs = TimeUnit.HOURS.toMillis(hours);
155 List<MonitoredPackage> packages = new ArrayList<>();
156 for (String packageName : packageNames) {
157 packages.add(new MonitoredPackage(packageName, durationMs));
158 }
159 synchronized (mLock) {
160 ObserverInternal oldObserver = mAllObservers.get(observer.getName());
161 if (oldObserver == null) {
162 Slog.d(TAG, observer.getName() + " started monitoring health of packages "
163 + packageNames);
164 mAllObservers.put(observer.getName(),
165 new ObserverInternal(observer.getName(), packages));
166 } else {
167 Slog.d(TAG, observer.getName() + " added the following packages to monitor "
168 + packageNames);
169 oldObserver.updatePackages(packages);
170 }
171 }
172 registerHealthObserver(observer);
173 // Always reschedule because we may need to expire packages
174 // earlier than we are already scheduled for
175 rescheduleCleanup();
176 sendIoMessage(MESSAGE_SAVE_FILE);
177 }
178
179 /**
180 * Unregisters {@code observer} from listening to package failure.
181 * Additionally, this stops observing any packages that may have previously been observed
182 * even from a previous boot.
183 */
184 public void unregisterHealthObserver(PackageHealthObserver observer) {
185 synchronized (mLock) {
186 mAllObservers.remove(observer.getName());
187 mRegisteredObservers.remove(observer.getName());
188 }
189 sendIoMessage(MESSAGE_SAVE_FILE);
190 }
191
192 // TODO(zezeozue:) Accept current versionCodes of failing packages?
193 /**
194 * Called when a process fails either due to a crash or ANR.
195 *
196 * <p>All registered observers for the packages contained in the process will be notified in
197 * order of priority unitl an observer signifies that it has taken action and other observers
198 * should not notified.
199 *
200 * <p>This method could be called frequently if there is a severe problem on the device.
201 */
202 public void onPackageFailure(String[] packages) {
203 synchronized (mLock) {
204 if (mRegisteredObservers.isEmpty()) {
205 return;
206 }
207 for (String packageName : packages) {
208 for (ObserverInternal observer : mAllObservers.values()) {
209 if (observer.onPackageFailure(packageName)) {
210 PackageHealthObserver activeObserver =
211 mRegisteredObservers.get(observer.mName);
212 if (activeObserver != null
213 && activeObserver.onHealthCheckFailed(packageName)) {
214 // Observer has handled, do not notify other observers
215 break;
216 }
217 }
218 }
219 }
220 }
221 }
222
223 // TODO(zezeozue): Optimize write? Maybe only write a separate smaller file?
224 // This currently adds about 7ms extra to shutdown thread
225 /** Writes the package information to file during shutdown. */
226 public void writeNow() {
227 if (!mAllObservers.isEmpty()) {
228 mIoHandler.removeMessages(MESSAGE_SAVE_FILE);
229 pruneObservers(SystemClock.uptimeMillis() - mUptimeAtLastRescheduleMs);
230 saveToFile();
231 Slog.i(TAG, "Last write to update package durations");
232 }
233 }
234
235 /** Register instances of this interface to receive notifications on package failure. */
236 public interface PackageHealthObserver {
237 /**
238 * Called when health check fails for the {@code packages}.
239 * @return {@code true} if action was taken and other observers should not be notified of
240 * this failure, {@code false} otherwise.
241 */
242 boolean onHealthCheckFailed(String packageName);
243
244 // TODO(zezeozue): Ensure uniqueness?
245 /**
246 * Identifier for the observer, should not change across device updates otherwise the
247 * watchdog may drop observing packages with the old name.
248 */
249 String getName();
250 }
251
252 /** Reschedules handler to prune expired packages from observers. */
253 private void rescheduleCleanup() {
254 synchronized (mLock) {
255 long nextDurationToScheduleMs = getEarliestPackageExpiryLocked();
256 if (nextDurationToScheduleMs == Long.MAX_VALUE) {
257 Slog.i(TAG, "No monitored packages, ending package cleanup");
258 mDurationAtLastReschedule = 0;
259 mUptimeAtLastRescheduleMs = 0;
260 return;
261 }
262 long uptimeMs = SystemClock.uptimeMillis();
263 // O if mPackageCleanup not running
264 long elapsedDurationMs = mUptimeAtLastRescheduleMs == 0
265 ? 0 : uptimeMs - mUptimeAtLastRescheduleMs;
266 // O if mPackageCleanup not running
267 long remainingDurationMs = mDurationAtLastReschedule - elapsedDurationMs;
268
269 if (mUptimeAtLastRescheduleMs == 0 || nextDurationToScheduleMs < remainingDurationMs) {
270 // First schedule or an earlier reschedule
271 pruneObservers(elapsedDurationMs);
272 mTimerHandler.removeCallbacks(mPackageCleanup);
273 mTimerHandler.postDelayed(mPackageCleanup, nextDurationToScheduleMs);
274 mDurationAtLastReschedule = nextDurationToScheduleMs;
275 mUptimeAtLastRescheduleMs = uptimeMs;
276 }
277 }
278 }
279
280 /**
281 * Returns the earliest time a package should expire.
282 * @returns Long#MAX_VALUE if there are no observed packages.
283 */
284 private long getEarliestPackageExpiryLocked() {
285 long shortestDurationMs = Long.MAX_VALUE;
286 for (ObserverInternal observer : mAllObservers.values()) {
287 for (MonitoredPackage p : observer.mPackages.values()) {
288 if (p.mDurationMs < shortestDurationMs) {
289 shortestDurationMs = p.mDurationMs;
290 }
291 }
292 }
293 Slog.v(TAG, "Earliest package time is " + shortestDurationMs);
294 return shortestDurationMs;
295 }
296
297 /**
298 * Removes {@code elapsedMs} milliseconds from all durations on monitored packages.
299 * Discards expired packages and discards observers without any packages.
300 */
301 private void pruneObservers(long elapsedMs) {
302 if (elapsedMs == 0) {
303 return;
304 }
305 synchronized (mLock) {
306 Slog.d(TAG, "Removing expired packages after " + elapsedMs + "ms");
307 Iterator<ObserverInternal> it = mAllObservers.values().iterator();
308 while (it.hasNext()) {
309 ObserverInternal observer = it.next();
310 if (!observer.updateMonitoringDurations(elapsedMs)) {
311 Slog.i(TAG, "Discarding observer " + observer.mName + ". All packages expired");
312 it.remove();
313 }
314 }
315 }
316 sendIoMessage(MESSAGE_SAVE_FILE);
317 }
318
319 /**
320 * Loads mAllObservers from file.
321 *
322 * <p>Note that this is <b>not</b> thread safe and should only called be called
323 * from the constructor.
324 */
325 private void loadFromFile() {
326 InputStream infile = null;
327 mAllObservers.clear();
328 try {
329 infile = mPolicyFile.openRead();
330 final XmlPullParser parser = Xml.newPullParser();
331 parser.setInput(infile, StandardCharsets.UTF_8.name());
332 XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG);
333 int outerDepth = parser.getDepth();
334 while (XmlUtils.nextElementWithin(parser, outerDepth)) {
335 ObserverInternal observer = ObserverInternal.read(parser);
336 if (observer != null) {
337 mAllObservers.put(observer.mName, observer);
338 }
339 }
340 } catch (FileNotFoundException e) {
341 // Nothing to monitor
342 } catch (IOException e) {
343 Log.wtf(TAG, "Unable to read monitored packages", e);
344 } catch (NumberFormatException e) {
345 Log.wtf(TAG, "Unable to parse monitored package windows", e);
346 } catch (XmlPullParserException e) {
347 Log.wtf(TAG, "Unable to parse monitored packages", e);
348 } finally {
349 IoUtils.closeQuietly(infile);
350 }
351 }
352
353 /**
354 * Persists mAllObservers to file and ignores threshold information.
355 *
356 * <p>Note that this is <b>not</b> thread safe and should only be called on the
357 * single threaded IoHandler.
358 */
359 private boolean saveToFile() {
360 FileOutputStream stream;
361 try {
362 stream = mPolicyFile.startWrite();
363 } catch (IOException e) {
364 Slog.w(TAG, "Cannot update monitored packages", e);
365 return false;
366 }
367
368 try {
369 XmlSerializer out = new FastXmlSerializer();
370 out.setOutput(stream, StandardCharsets.UTF_8.name());
371 out.startDocument(null, true);
372 out.startTag(null, TAG_PACKAGE_WATCHDOG);
373 out.attribute(null, ATTR_VERSION, Integer.toString(DB_VERSION));
374 for (ObserverInternal observer : mAllObservers.values()) {
375 observer.write(out);
376 }
377 out.endTag(null, TAG_PACKAGE_WATCHDOG);
378 out.endDocument();
379 mPolicyFile.finishWrite(stream);
380 return true;
381 } catch (IOException e) {
382 Slog.w(TAG, "Failed to save monitored packages, restoring backup", e);
383 mPolicyFile.failWrite(stream);
384 return false;
385 } finally {
386 IoUtils.closeQuietly(stream);
387 }
388 }
389
390 private void sendIoMessage(int what) {
391 if (!mIoHandler.hasMessages(what)) {
392 Message m = Message.obtain(mIoHandler, what);
393 mIoHandler.sendMessage(m);
394 }
395 }
396
397 /**
398 * Represents an observer monitoring a set of packages along with the failure thresholds for
399 * each package.
400 */
401 static class ObserverInternal {
402 public final String mName;
403 public final ArrayMap<String, MonitoredPackage> mPackages;
404
405 ObserverInternal(String name, List<MonitoredPackage> packages) {
406 mName = name;
407 mPackages = new ArrayMap<>();
408 updatePackages(packages);
409 }
410
411 /**
412 * Writes important details to file. Doesn't persist any package failure thresholds.
413 *
414 * <p>Note that this method is <b>not</b> thread safe. It should only be called from
415 * #saveToFile which runs on a single threaded handler.
416 */
417 public boolean write(XmlSerializer out) {
418 try {
419 out.startTag(null, TAG_OBSERVER);
420 out.attribute(null, ATTR_NAME, mName);
421 for (int i = 0; i < mPackages.size(); i++) {
422 MonitoredPackage p = mPackages.valueAt(i);
423 out.startTag(null, TAG_PACKAGE);
424 out.attribute(null, ATTR_NAME, p.mName);
425 out.attribute(null, ATTR_DURATION, String.valueOf(p.mDurationMs));
426 out.endTag(null, TAG_PACKAGE);
427 }
428 out.endTag(null, TAG_OBSERVER);
429 return true;
430 } catch (IOException e) {
431 Slog.w(TAG, "Cannot save observer", e);
432 return false;
433 }
434 }
435
436 public void updatePackages(List<MonitoredPackage> packages) {
437 synchronized (mName) {
438 for (MonitoredPackage p : packages) {
439 mPackages.put(p.mName, p);
440 }
441 }
442 }
443
444 /**
445 * Reduces the monitoring durations of all packages observed by this observer by
446 * {@code elapsedMs}. If any duration is less than 0, the package is removed from
447 * observation.
448 *
449 * @returns {@code true} if there are still packages to be observed, {@code false} otherwise
450 */
451 public boolean updateMonitoringDurations(long elapsedMs) {
452 List<MonitoredPackage> packages = new ArrayList<>();
453 synchronized (mName) {
454 Iterator<MonitoredPackage> it = mPackages.values().iterator();
455 while (it.hasNext()) {
456 MonitoredPackage p = it.next();
457 long newDuration = p.mDurationMs - elapsedMs;
458 if (newDuration > 0) {
459 p.mDurationMs = newDuration;
460 } else {
461 it.remove();
462 }
463 }
464 return !mPackages.isEmpty();
465 }
466 }
467
468 /**
469 * Increments failure counts of {@code packageName}.
470 * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise
471 */
472 public boolean onPackageFailure(String packageName) {
473 synchronized (mName) {
474 MonitoredPackage p = mPackages.get(packageName);
475 if (p != null) {
476 return p.onFailure();
477 }
478 return false;
479 }
480 }
481
482 /**
483 * Returns one ObserverInternal from the {@code parser} and advances its state.
484 *
485 * <p>Note that this method is <b>not</b> thread safe. It should only be called from
486 * #loadFromFile which in turn is only called on construction of the
487 * singleton PackageWatchdog.
488 **/
489 public static ObserverInternal read(XmlPullParser parser) {
490 String observerName = null;
491 if (TAG_OBSERVER.equals(parser.getName())) {
492 observerName = parser.getAttributeValue(null, ATTR_NAME);
493 if (TextUtils.isEmpty(observerName)) {
494 return null;
495 }
496 }
497 List<MonitoredPackage> packages = new ArrayList<>();
498 int innerDepth = parser.getDepth();
499 try {
500 while (XmlUtils.nextElementWithin(parser, innerDepth)) {
501 if (TAG_PACKAGE.equals(parser.getName())) {
502 String packageName = parser.getAttributeValue(null, ATTR_NAME);
503 long duration = Long.parseLong(
504 parser.getAttributeValue(null, ATTR_DURATION));
505 if (!TextUtils.isEmpty(packageName)) {
506 packages.add(new MonitoredPackage(packageName, duration));
507 }
508 }
509 }
510 } catch (IOException e) {
511 return null;
512 } catch (XmlPullParserException e) {
513 return null;
514 }
515 if (packages.isEmpty()) {
516 return null;
517 }
518 return new ObserverInternal(observerName, packages);
519 }
520 }
521
522 /** Represents a package along with the time it should be monitored for. */
523 static class MonitoredPackage {
524 public final String mName;
525 // System uptime duration to monitor package
526 public long mDurationMs;
527 // System uptime of first package failure
528 private long mUptimeStartMs;
529 // Number of failures since mUptimeStartMs
530 private int mFailures;
531
532 MonitoredPackage(String name, long durationMs) {
533 mName = name;
534 mDurationMs = durationMs;
535 }
536
537 /**
538 * Increment package failures or resets failure count depending on the last package failure.
539 *
540 * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise
541 */
542 public synchronized boolean onFailure() {
543 final long now = SystemClock.uptimeMillis();
544 final long duration = now - mUptimeStartMs;
545 if (duration > TRIGGER_DURATION_MS) {
546 // TODO(zezeozue): Reseting to 1 is not correct
547 // because there may be more than 1 failure in the last trigger window from now
548 // This is the RescueParty impl, will leave for now
549 mFailures = 1;
550 mUptimeStartMs = now;
551 } else {
552 mFailures++;
553 }
554 return mFailures >= TRIGGER_FAILURE_COUNT;
555 }
556 }
557
558 private class IoHandler extends Handler {
559 IoHandler(Looper looper) {
560 super(looper);
561 }
562
563 @Override
564 public void handleMessage(Message msg) {
565 switch (msg.what) {
566 case MESSAGE_SAVE_FILE:
567 saveToFile();
568 break;
569 }
570 }
571 }
572}