blob: 454941ccdb03f427e1dc4620e9bd7859de0f4c5b [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070019import android.app.IActivityController;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080020import android.content.BroadcastReceiver;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080021import android.content.Context;
22import android.content.Intent;
23import android.content.IntentFilter;
Steven Moreland6b47c542017-03-21 12:52:16 -070024import android.hidl.manager.V1_0.IServiceManager;
Makoto Onuki99029542018-08-27 17:23:09 -070025import android.os.Binder;
26import android.os.Build;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080027import android.os.Debug;
28import android.os.Handler;
Jeff Brown6f357d32014-01-15 20:40:55 -080029import android.os.IPowerManager;
John Michelau11641522013-03-18 18:28:23 -050030import android.os.Looper;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080031import android.os.Process;
Makoto Onuki99029542018-08-27 17:23:09 -070032import android.os.RemoteException;
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -080033import android.os.ServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080034import android.os.SystemClock;
Makoto Onuki99029542018-08-27 17:23:09 -070035import android.system.ErrnoException;
36import android.system.Os;
37import android.system.OsConstants;
38import android.system.StructRlimit;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080039import android.util.EventLog;
Dan Egnor9bdc94b2010-03-04 14:20:31 -080040import android.util.Log;
Joe Onorato8a9b2202010-02-26 18:56:32 -080041import android.util.Slog;
jianjin8f2dee02019-03-29 14:03:08 -070042import android.util.StatsLog;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080043
Makoto Onuki99029542018-08-27 17:23:09 -070044import com.android.internal.os.ZygoteConnectionConstants;
45import com.android.server.am.ActivityManagerService;
Wale Ogunwale1f5e53d2018-11-05 05:12:46 -080046import com.android.server.wm.SurfaceAnimationThread;
Makoto Onuki99029542018-08-27 17:23:09 -070047
Dan Egnor9bdc94b2010-03-04 14:20:31 -080048import java.io.File;
Colin Cross5df1d872012-11-29 11:42:11 -080049import java.io.FileWriter;
50import java.io.IOException;
Nandana Dutt6647ef52018-07-12 17:02:57 +010051import java.nio.charset.StandardCharsets;
52import java.nio.file.Files;
53import java.nio.file.Path;
54import java.nio.file.Paths;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080055import java.util.ArrayList;
Steven Moreland6b47c542017-03-21 12:52:16 -070056import java.util.Arrays;
Narayan Kamatha0a28082017-07-31 15:58:59 +010057import java.util.Collections;
Steven Moreland6b47c542017-03-21 12:52:16 -070058import java.util.HashSet;
59import java.util.List;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080060
61/** This class calls its monitor every minute. Killing this process if they don't return **/
62public class Watchdog extends Thread {
63 static final String TAG = "Watchdog";
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080064
Makoto Onukie276b442018-08-30 09:38:44 -070065 /** Debug flag. */
Makoto Onuki729c41e2019-05-30 09:47:10 -070066 public static final boolean DEBUG = false;
Makoto Onuki99029542018-08-27 17:23:09 -070067
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080068 // Set this to true to use debug default values.
69 static final boolean DB = false;
70
Andreas Gampe032a9292017-07-21 11:41:00 -070071 // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with
72 // timeout in com.android.internal.os.ZygoteConnection, or wrapped applications
73 // can trigger the watchdog.
74 // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped
75 // applications may not work with a debug build. CTS will fail.
Christopher Tatee6f81cf2013-10-23 17:28:27 -070076 static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
77 static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
78
79 // These are temporally ordered: larger values as lateness increases
80 static final int COMPLETED = 0;
81 static final int WAITING = 1;
82 static final int WAITED_HALF = 2;
83 static final int OVERDUE = 3;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080084
Igor Murashkin44d04aa2013-10-23 10:56:02 -070085 // Which native processes to dump into dropbox's stack traces
86 public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
Andy Hung3a64ecb2016-03-09 13:55:58 -080087 "/system/bin/audioserver",
Andy Hung689574a2016-04-13 19:27:43 -070088 "/system/bin/cameraserver",
89 "/system/bin/drmserver",
90 "/system/bin/mediadrmserver",
Dianne Hackbornf72467a2012-06-08 17:23:59 -070091 "/system/bin/mediaserver",
Mike Yuc9855952019-11-19 14:36:02 +080092 "/system/bin/netd",
Dianne Hackbornf72467a2012-06-08 17:23:59 -070093 "/system/bin/sdcard",
Eric Laurent05d4e352016-03-14 18:49:08 -070094 "/system/bin/surfaceflinger",
Jeff Sharkey3e40afb2019-01-02 09:21:24 -070095 "/system/bin/vold",
Andy Hung689574a2016-04-13 19:27:43 -070096 "media.extractor", // system/bin/mediaextractor
Andy Hungb1c4c932018-01-04 12:05:43 -080097 "media.metrics", // system/bin/mediametrics
Chong Zhange55e6e02017-06-02 10:52:04 -070098 "media.codec", // vendor/bin/hw/android.hardware.media.omx@1.0-service
Chong Zhangf1f76322019-03-27 16:41:51 -070099 "media.swcodec", // /apex/com.android.media.swcodec/bin/mediaswcodec
Andreas Gampecf9e79b2016-05-11 18:41:25 -0700100 "com.android.bluetooth", // Bluetooth service
Rafal Slawike35d8f82018-10-01 11:39:58 +0100101 "/system/bin/statsd", // Stats daemon
Dianne Hackbornf72467a2012-06-08 17:23:59 -0700102 };
103
Steven Moreland6b47c542017-03-21 12:52:16 -0700104 public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList(
Hui Yu7e65c3f2019-04-16 17:03:16 -0700105 "android.hardware.audio@2.0::IDevicesFactory",
106 "android.hardware.audio@4.0::IDevicesFactory",
Mikhail Naganov708ea282019-07-10 10:16:32 -0700107 "android.hardware.audio@5.0::IDevicesFactory",
Kevin Rocarddf6e4d52019-09-30 19:54:55 +0100108 "android.hardware.audio@6.0::IDevicesFactory",
Kevin Chyn7733b752019-06-19 13:48:18 -0700109 "android.hardware.biometrics.face@1.0::IBiometricsFace",
Hui Yu7e65c3f2019-04-16 17:03:16 -0700110 "android.hardware.bluetooth@1.0::IBluetoothHci",
111 "android.hardware.camera.provider@2.4::ICameraProvider",
112 "android.hardware.graphics.allocator@2.0::IAllocator",
113 "android.hardware.graphics.composer@2.1::IComposer",
114 "android.hardware.health@2.0::IHealth",
Chong Zhangf1f76322019-03-27 16:41:51 -0700115 "android.hardware.media.c2@1.0::IComponentStore",
Hui Yu7e65c3f2019-04-16 17:03:16 -0700116 "android.hardware.media.omx@1.0::IOmx",
117 "android.hardware.media.omx@1.0::IOmxStore",
Benjamin Schwartzf0a81f62019-06-18 17:58:39 -0700118 "android.hardware.power.stats@1.0::IPowerStats",
Hui Yu7e65c3f2019-04-16 17:03:16 -0700119 "android.hardware.sensors@1.0::ISensors",
Kalesh Singh5fe631d2019-07-02 13:51:53 -0700120 "android.hardware.vr@1.0::IVr",
121 "android.system.suspend@1.0::ISystemSuspend"
Steven Moreland6b47c542017-03-21 12:52:16 -0700122 );
123
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800124 static Watchdog sWatchdog;
125
126 /* This handler will be used to post message back onto the main thread */
Wale Ogunwaled7fdd022015-04-13 16:22:38 -0700127 final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700128 final HandlerChecker mMonitorChecker;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800129 ActivityManagerService mActivity;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800130
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800131 int mPhonePid;
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700132 IActivityController mController;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700133 boolean mAllowRestart = true;
Narayan Kamatha0a28082017-07-31 15:58:59 +0100134 final OpenFdMonitor mOpenFdMonitor;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800135
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800136 /**
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700137 * Used for checking status of handle threads and scheduling monitor callbacks.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800138 */
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700139 public final class HandlerChecker implements Runnable {
140 private final Handler mHandler;
141 private final String mName;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700142 private final long mWaitMax;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700143 private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
Zimuzob065b152019-04-16 13:33:01 +0100144 private final ArrayList<Monitor> mMonitorQueue = new ArrayList<Monitor>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700145 private boolean mCompleted;
146 private Monitor mCurrentMonitor;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700147 private long mStartTime;
Zimuzo2a050392019-05-09 12:51:33 +0100148 private int mPauseCount;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700149
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700150 HandlerChecker(Handler handler, String name, long waitMaxMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700151 mHandler = handler;
152 mName = name;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700153 mWaitMax = waitMaxMillis;
154 mCompleted = true;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700155 }
156
Makoto Onukie276b442018-08-30 09:38:44 -0700157 void addMonitorLocked(Monitor monitor) {
Zimuzob065b152019-04-16 13:33:01 +0100158 // We don't want to update mMonitors when the Handler is in the middle of checking
159 // all monitors. We will update mMonitors on the next schedule if it is safe
160 mMonitorQueue.add(monitor);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700161 }
162
163 public void scheduleCheckLocked() {
Zimuzob065b152019-04-16 13:33:01 +0100164 if (mCompleted) {
165 // Safe to update monitors in queue, Handler is not in the middle of work
166 mMonitors.addAll(mMonitorQueue);
167 mMonitorQueue.clear();
168 }
Zimuzo2a050392019-05-09 12:51:33 +0100169 if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling())
170 || (mPauseCount > 0)) {
171 // Don't schedule until after resume OR
Jeff Brown6c7b41a2015-02-26 14:43:53 -0800172 // If the target looper has recently been polling, then
Dianne Hackbornefa92b22013-05-03 14:11:43 -0700173 // there is no reason to enqueue our checker on it since that
174 // is as good as it not being deadlocked. This avoid having
Zimuzo2a050392019-05-09 12:51:33 +0100175 // to do a context switch to check the thread. Note that we
176 // only do this if we have no monitors since those would need to
177 // be executed at this point.
Dianne Hackbornefa92b22013-05-03 14:11:43 -0700178 mCompleted = true;
179 return;
180 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700181 if (!mCompleted) {
182 // we already have a check in flight, so no need
183 return;
184 }
185
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700186 mCompleted = false;
187 mCurrentMonitor = null;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700188 mStartTime = SystemClock.uptimeMillis();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700189 mHandler.postAtFrontOfQueue(this);
190 }
191
Makoto Onukie276b442018-08-30 09:38:44 -0700192 boolean isOverdueLocked() {
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700193 return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
194 }
195
196 public int getCompletionStateLocked() {
197 if (mCompleted) {
198 return COMPLETED;
199 } else {
200 long latency = SystemClock.uptimeMillis() - mStartTime;
201 if (latency < mWaitMax/2) {
202 return WAITING;
203 } else if (latency < mWaitMax) {
204 return WAITED_HALF;
205 }
206 }
207 return OVERDUE;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700208 }
209
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700210 public Thread getThread() {
211 return mHandler.getLooper().getThread();
212 }
213
214 public String getName() {
215 return mName;
216 }
217
Makoto Onukie276b442018-08-30 09:38:44 -0700218 String describeBlockedStateLocked() {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700219 if (mCurrentMonitor == null) {
220 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
221 } else {
222 return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
223 + " on " + mName + " (" + getThread().getName() + ")";
224 }
John Michelau11641522013-03-18 18:28:23 -0500225 }
226
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800227 @Override
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700228 public void run() {
Zimuzob065b152019-04-16 13:33:01 +0100229 // Once we get here, we ensure that mMonitors does not change even if we call
230 // #addMonitorLocked because we first add the new monitors to mMonitorQueue and
231 // move them to mMonitors on the next schedule when mCompleted is true, at which
232 // point we have completed execution of this method.
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700233 final int size = mMonitors.size();
234 for (int i = 0 ; i < size ; i++) {
235 synchronized (Watchdog.this) {
236 mCurrentMonitor = mMonitors.get(i);
237 }
238 mCurrentMonitor.monitor();
239 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800240
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700241 synchronized (Watchdog.this) {
242 mCompleted = true;
243 mCurrentMonitor = null;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800244 }
245 }
Zimuzo2a050392019-05-09 12:51:33 +0100246
247 /** Pause the HandlerChecker. */
248 public void pauseLocked(String reason) {
249 mPauseCount++;
250 // Mark as completed, because there's a chance we called this after the watchog
251 // thread loop called Object#wait after 'WAITED_HALF'. In that case we want to ensure
252 // the next call to #getCompletionStateLocked for this checker returns 'COMPLETED'
253 mCompleted = true;
254 Slog.i(TAG, "Pausing HandlerChecker: " + mName + " for reason: "
255 + reason + ". Pause count: " + mPauseCount);
256 }
257
258 /** Resume the HandlerChecker from the last {@link #pauseLocked}. */
259 public void resumeLocked(String reason) {
260 if (mPauseCount > 0) {
261 mPauseCount--;
262 Slog.i(TAG, "Resuming HandlerChecker: " + mName + " for reason: "
263 + reason + ". Pause count: " + mPauseCount);
264 } else {
265 Slog.wtf(TAG, "Already resumed HandlerChecker: " + mName);
266 }
267 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800268 }
269
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800270 final class RebootRequestReceiver extends BroadcastReceiver {
271 @Override
272 public void onReceive(Context c, Intent intent) {
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700273 if (intent.getIntExtra("nowait", 0) != 0) {
274 rebootSystem("Received ACTION_REBOOT broadcast");
275 return;
276 }
277 Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800278 }
279 }
280
Wale Ogunwale517daec2015-04-15 10:27:24 -0700281 /** Monitor for checking the availability of binder threads. The monitor will block until
282 * there is a binder thread available to process in coming IPCs to make sure other processes
283 * can still communicate with the service.
284 */
285 private static final class BinderThreadMonitor implements Watchdog.Monitor {
286 @Override
287 public void monitor() {
288 Binder.blockUntilThreadAvailable();
289 }
290 }
291
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800292 public interface Monitor {
293 void monitor();
294 }
295
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800296 public static Watchdog getInstance() {
297 if (sWatchdog == null) {
298 sWatchdog = new Watchdog();
299 }
300
301 return sWatchdog;
302 }
303
304 private Watchdog() {
305 super("watchdog");
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700306 // Initialize handler checkers for each common thread we want to check. Note
307 // that we are not currently checking the background thread, since it can
308 // potentially hold longer running operations with no guarantees about the timeliness
309 // of operations there.
310
311 // The shared foreground thread is the main checker. It is where we
312 // will also dispatch monitor checks and do other work.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700313 mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
314 "foreground thread", DEFAULT_TIMEOUT);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700315 mHandlerCheckers.add(mMonitorChecker);
316 // Add checker for main thread. We only do a quick check since there
317 // can be UI running on the thread.
318 mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700319 "main thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700320 // Add checker for shared UI thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700321 mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
322 "ui thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700323 // And also check IO thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700324 mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
325 "i/o thread", DEFAULT_TIMEOUT));
Jeff Brown4ccb8232014-01-16 22:16:42 -0800326 // And the display thread.
327 mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(),
328 "display thread", DEFAULT_TIMEOUT));
Wale Ogunwale1f5e53d2018-11-05 05:12:46 -0800329 // And the animation thread.
330 mHandlerCheckers.add(new HandlerChecker(AnimationThread.getHandler(),
331 "animation thread", DEFAULT_TIMEOUT));
332 // And the surface animation thread.
333 mHandlerCheckers.add(new HandlerChecker(SurfaceAnimationThread.getHandler(),
334 "surface animation thread", DEFAULT_TIMEOUT));
Wale Ogunwale517daec2015-04-15 10:27:24 -0700335
336 // Initialize monitor for Binder threads.
337 addMonitor(new BinderThreadMonitor());
Andreas Gampe032a9292017-07-21 11:41:00 -0700338
Narayan Kamatha0a28082017-07-31 15:58:59 +0100339 mOpenFdMonitor = OpenFdMonitor.create();
340
Andreas Gampe032a9292017-07-21 11:41:00 -0700341 // See the notes on DEFAULT_TIMEOUT.
342 assert DB ||
343 DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800344 }
345
Zimuzob065b152019-04-16 13:33:01 +0100346 /**
347 * Registers a {@link BroadcastReceiver} to listen to reboot broadcasts and trigger reboot.
348 * Should be called during boot after the ActivityManagerService is up and registered
349 * as a system service so it can handle registration of a {@link BroadcastReceiver}.
350 */
Adam Lesinski182f73f2013-12-05 16:48:06 -0800351 public void init(Context context, ActivityManagerService activity) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800352 mActivity = activity;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800353 context.registerReceiver(new RebootRequestReceiver(),
354 new IntentFilter(Intent.ACTION_REBOOT),
355 android.Manifest.permission.REBOOT, null);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800356 }
357
Christopher Tatec27181c2010-06-30 14:41:09 -0700358 public void processStarted(String name, int pid) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800359 synchronized (this) {
360 if ("com.android.phone".equals(name)) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800361 mPhonePid = pid;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800362 }
363 }
364 }
365
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700366 public void setActivityController(IActivityController controller) {
367 synchronized (this) {
368 mController = controller;
369 }
370 }
371
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700372 public void setAllowRestart(boolean allowRestart) {
373 synchronized (this) {
374 mAllowRestart = allowRestart;
375 }
376 }
377
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800378 public void addMonitor(Monitor monitor) {
379 synchronized (this) {
Makoto Onukie276b442018-08-30 09:38:44 -0700380 mMonitorChecker.addMonitorLocked(monitor);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700381 }
382 }
383
Jeff Brown6f357d32014-01-15 20:40:55 -0800384 public void addThread(Handler thread) {
385 addThread(thread, DEFAULT_TIMEOUT);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700386 }
387
Jeff Brown6f357d32014-01-15 20:40:55 -0800388 public void addThread(Handler thread, long timeoutMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700389 synchronized (this) {
Jeff Brown6f357d32014-01-15 20:40:55 -0800390 final String name = thread.getLooper().getThread().getName();
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700391 mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800392 }
393 }
394
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800395 /**
Zimuzo2a050392019-05-09 12:51:33 +0100396 * Pauses Watchdog action for the currently running thread. Useful before executing long running
397 * operations that could falsely trigger the watchdog. Each call to this will require a matching
398 * call to {@link #resumeWatchingCurrentThread}.
399 *
400 * <p>If the current thread has not been added to the Watchdog, this call is a no-op.
401 *
402 * <p>If the Watchdog is already paused for the current thread, this call adds
403 * adds another pause and will require an additional {@link #resumeCurrentThread} to resume.
404 *
405 * <p>Note: Use with care, as any deadlocks on the current thread will be undetected until all
406 * pauses have been resumed.
407 */
408 public void pauseWatchingCurrentThread(String reason) {
409 synchronized (this) {
410 for (HandlerChecker hc : mHandlerCheckers) {
411 if (Thread.currentThread().equals(hc.getThread())) {
412 hc.pauseLocked(reason);
413 }
414 }
415 }
416 }
417
418 /**
419 * Resumes the last pause from {@link #pauseWatchingCurrentThread} for the currently running
420 * thread.
421 *
422 * <p>If the current thread has not been added to the Watchdog, this call is a no-op.
423 *
424 * <p>If the Watchdog action for the current thread is already resumed, this call logs a wtf.
425 *
426 * <p>If all pauses have been resumed, the Watchdog action is finally resumed, otherwise,
427 * the Watchdog action for the current thread remains paused until resume is called at least
428 * as many times as the calls to pause.
429 */
430 public void resumeWatchingCurrentThread(String reason) {
431 synchronized (this) {
432 for (HandlerChecker hc : mHandlerCheckers) {
433 if (Thread.currentThread().equals(hc.getThread())) {
434 hc.resumeLocked(reason);
435 }
436 }
437 }
438 }
439
440 /**
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800441 * Perform a full reboot of the system.
442 */
443 void rebootSystem(String reason) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800444 Slog.i(TAG, "Rebooting system because: " + reason);
Jeff Brown6f357d32014-01-15 20:40:55 -0800445 IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE);
446 try {
447 pms.reboot(false, reason, false);
448 } catch (RemoteException ex) {
449 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800450 }
451
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700452 private int evaluateCheckerCompletionLocked() {
453 int state = COMPLETED;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700454 for (int i=0; i<mHandlerCheckers.size(); i++) {
455 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700456 state = Math.max(state, hc.getCompletionStateLocked());
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700457 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700458 return state;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700459 }
460
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700461 private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
462 ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700463 for (int i=0; i<mHandlerCheckers.size(); i++) {
464 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700465 if (hc.isOverdueLocked()) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700466 checkers.add(hc);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700467 }
468 }
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700469 return checkers;
470 }
471
Narayan Kamatha0a28082017-07-31 15:58:59 +0100472 private String describeCheckersLocked(List<HandlerChecker> checkers) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700473 StringBuilder builder = new StringBuilder(128);
474 for (int i=0; i<checkers.size(); i++) {
475 if (builder.length() > 0) {
476 builder.append(", ");
477 }
478 builder.append(checkers.get(i).describeBlockedStateLocked());
479 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700480 return builder.toString();
481 }
482
Hui Yu7e65c3f2019-04-16 17:03:16 -0700483 private static ArrayList<Integer> getInterestingHalPids() {
Steven Moreland6b47c542017-03-21 12:52:16 -0700484 try {
485 IServiceManager serviceManager = IServiceManager.getService();
486 ArrayList<IServiceManager.InstanceDebugInfo> dump =
487 serviceManager.debugDump();
488 HashSet<Integer> pids = new HashSet<>();
489 for (IServiceManager.InstanceDebugInfo info : dump) {
490 if (info.pid == IServiceManager.PidConstant.NO_PID) {
491 continue;
492 }
493
494 if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
495 continue;
496 }
497
498 pids.add(info.pid);
499 }
500 return new ArrayList<Integer>(pids);
501 } catch (RemoteException e) {
502 return new ArrayList<Integer>();
503 }
504 }
505
Hui Yu7e65c3f2019-04-16 17:03:16 -0700506 static ArrayList<Integer> getInterestingNativePids() {
Steven Moreland6b47c542017-03-21 12:52:16 -0700507 ArrayList<Integer> pids = getInterestingHalPids();
508
509 int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
510 if (nativePids != null) {
511 pids.ensureCapacity(pids.size() + nativePids.length);
512 for (int i : nativePids) {
513 pids.add(i);
514 }
515 }
516
517 return pids;
518 }
519
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800520 @Override
521 public void run() {
Christopher Tate6ee412d2010-05-28 12:01:56 -0700522 boolean waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800523 while (true) {
Narayan Kamatha0a28082017-07-31 15:58:59 +0100524 final List<HandlerChecker> blockedCheckers;
Jeff Brown7dd2d192013-09-06 15:05:23 -0700525 final String subject;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700526 final boolean allowRestart;
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700527 int debuggerWasConnected = 0;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800528 synchronized (this) {
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700529 long timeout = CHECK_INTERVAL;
530 // Make sure we (re)spin the checkers that have become idle within
531 // this wait-and-check interval
532 for (int i=0; i<mHandlerCheckers.size(); i++) {
533 HandlerChecker hc = mHandlerCheckers.get(i);
534 hc.scheduleCheckLocked();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700535 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800536
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700537 if (debuggerWasConnected > 0) {
538 debuggerWasConnected--;
539 }
540
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800541 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
542 // wait while asleep. If the device is asleep then the thing that we are waiting
Christopher Tate6ee412d2010-05-28 12:01:56 -0700543 // to timeout on is asleep as well and won't have a chance to run, causing a false
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800544 // positive on when to kill things.
545 long start = SystemClock.uptimeMillis();
Michael Wright8fa56f62013-04-01 16:36:05 -0700546 while (timeout > 0) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700547 if (Debug.isDebuggerConnected()) {
548 debuggerWasConnected = 2;
549 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800550 try {
Michael Wright8fa56f62013-04-01 16:36:05 -0700551 wait(timeout);
Zimuzob065b152019-04-16 13:33:01 +0100552 // Note: mHandlerCheckers and mMonitorChecker may have changed after waiting
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800553 } catch (InterruptedException e) {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800554 Log.wtf(TAG, e);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800555 }
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700556 if (Debug.isDebuggerConnected()) {
557 debuggerWasConnected = 2;
558 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700559 timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800560 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800561
Narayan Kamatha0a28082017-07-31 15:58:59 +0100562 boolean fdLimitTriggered = false;
563 if (mOpenFdMonitor != null) {
564 fdLimitTriggered = mOpenFdMonitor.monitor();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800565 }
Michael Wright8fa56f62013-04-01 16:36:05 -0700566
Narayan Kamatha0a28082017-07-31 15:58:59 +0100567 if (!fdLimitTriggered) {
568 final int waitState = evaluateCheckerCompletionLocked();
569 if (waitState == COMPLETED) {
570 // The monitors have returned; reset
571 waitedHalf = false;
572 continue;
573 } else if (waitState == WAITING) {
574 // still waiting but within their configured intervals; back off and recheck
575 continue;
576 } else if (waitState == WAITED_HALF) {
577 if (!waitedHalf) {
Makoto Onuki729c41e2019-05-30 09:47:10 -0700578 Slog.i(TAG, "WAITED_HALF");
Narayan Kamatha0a28082017-07-31 15:58:59 +0100579 // We've waited half the deadlock-detection interval. Pull a stack
580 // trace and wait another half.
581 ArrayList<Integer> pids = new ArrayList<Integer>();
582 pids.add(Process.myPid());
Makoto Onukie276b442018-08-30 09:38:44 -0700583 ActivityManagerService.dumpStackTraces(pids, null, null,
Narayan Kamatha0a28082017-07-31 15:58:59 +0100584 getInterestingNativePids());
585 waitedHalf = true;
586 }
587 continue;
588 }
589
590 // something is overdue!
591 blockedCheckers = getBlockedCheckersLocked();
592 subject = describeCheckersLocked(blockedCheckers);
593 } else {
594 blockedCheckers = Collections.emptyList();
595 subject = "Open FD high water mark reached";
596 }
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700597 allowRestart = mAllowRestart;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800598 }
599
600 // If we got here, that means that the system is most likely hung.
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700601 // First collect stack traces from all threads of the system process.
602 // Then kill this process so that the system will restart.
Jeff Brown7dd2d192013-09-06 15:05:23 -0700603 EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800604
Steven Moreland6b47c542017-03-21 12:52:16 -0700605 ArrayList<Integer> pids = new ArrayList<>();
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800606 pids.add(Process.myPid());
Dan Egnor4bded072010-03-11 22:00:47 -0800607 if (mPhonePid > 0) pids.add(mPhonePid);
Makoto Onukie276b442018-08-30 09:38:44 -0700608
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800609 final File stack = ActivityManagerService.dumpStackTraces(
Makoto Onukie276b442018-08-30 09:38:44 -0700610 pids, null, null, getInterestingNativePids());
Dan Egnor4bded072010-03-11 22:00:47 -0800611
612 // Give some extra time to make sure the stack traces get written.
613 // The system's been hanging for a minute, another second or two won't hurt much.
Makoto Onukie276b442018-08-30 09:38:44 -0700614 SystemClock.sleep(5000);
Dan Egnor4bded072010-03-11 22:00:47 -0800615
Guang Zhu0620c452014-10-29 14:31:48 -0700616 // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
617 doSysRq('w');
618 doSysRq('l');
Colin Cross5df1d872012-11-29 11:42:11 -0800619
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800620 // Try to add the error to the dropbox, but assuming that the ActivityManager
621 // itself may be deadlocked. (which has happened, causing this statement to
622 // deadlock and the watchdog as a whole to be ineffective)
623 Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
624 public void run() {
Amith Yamasanifa7c36892019-05-15 17:32:27 -0700625 // If a watched thread hangs before init() is called, we don't have a
626 // valid mActivity. So we can't log the error to dropbox.
627 if (mActivity != null) {
628 mActivity.addErrorToDropBox(
629 "watchdog", null, "system_server", null, null, null,
630 subject, null, stack, null);
631 }
jianjin8f2dee02019-03-29 14:03:08 -0700632 StatsLog.write(StatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject);
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800633 }
634 };
635 dropboxThread.start();
636 try {
637 dropboxThread.join(2000); // wait up to 2 seconds for it to return.
638 } catch (InterruptedException ignored) {}
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800639
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700640 IActivityController controller;
641 synchronized (this) {
642 controller = mController;
643 }
644 if (controller != null) {
645 Slog.i(TAG, "Reporting stuck state to activity controller");
646 try {
647 Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
648 // 1 = keep waiting, -1 = kill system
Jeff Brown7dd2d192013-09-06 15:05:23 -0700649 int res = controller.systemNotResponding(subject);
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700650 if (res >= 0) {
651 Slog.i(TAG, "Activity controller requested to coninue to wait");
652 waitedHalf = false;
653 continue;
654 }
655 } catch (RemoteException e) {
656 }
657 }
658
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700659 // Only kill the process if the debugger is not attached.
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700660 if (Debug.isDebuggerConnected()) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700661 debuggerWasConnected = 2;
662 }
663 if (debuggerWasConnected >= 2) {
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700664 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700665 } else if (debuggerWasConnected > 0) {
666 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700667 } else if (!allowRestart) {
668 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
669 } else {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700670 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
Andreas Gampe544416e2018-01-26 11:39:46 -0800671 WatchdogDiagnostics.diagnoseCheckers(blockedCheckers);
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700672 Slog.w(TAG, "*** GOODBYE!");
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700673 Process.killProcess(Process.myPid());
674 System.exit(10);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800675 }
Christopher Tate6ee412d2010-05-28 12:01:56 -0700676
677 waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800678 }
679 }
Christopher Tateecaa7b42010-06-04 14:55:02 -0700680
Guang Zhu0620c452014-10-29 14:31:48 -0700681 private void doSysRq(char c) {
682 try {
683 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
684 sysrq_trigger.write(c);
685 sysrq_trigger.close();
686 } catch (IOException e) {
687 Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
688 }
689 }
690
Narayan Kamatha0a28082017-07-31 15:58:59 +0100691 public static final class OpenFdMonitor {
692 /**
693 * Number of FDs below the soft limit that we trigger a runtime restart at. This was
694 * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number
695 * of FDs in reserve to complete a dump.
696 */
697 private static final int FD_HIGH_WATER_MARK = 12;
698
699 private final File mDumpDir;
700 private final File mFdHighWaterMark;
701
702 public static OpenFdMonitor create() {
703 // Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
704 if (!Build.IS_DEBUGGABLE) {
705 return null;
706 }
707
Narayan Kamatha0a28082017-07-31 15:58:59 +0100708 final StructRlimit rlimit;
709 try {
710 rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE);
711 } catch (ErrnoException errno) {
712 Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno);
713 return null;
714 }
715
716 // The assumption we're making here is that FD numbers are allocated (more or less)
717 // sequentially, which is currently (and historically) true since open is currently
718 // specified to always return the lowest-numbered non-open file descriptor for the
719 // current process.
720 //
721 // We do this to avoid having to enumerate the contents of /proc/self/fd in order to
722 // count the number of descriptors open in the process.
723 final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK));
Elliott Hughes4e4caa72018-03-23 11:06:36 -0700724 return new OpenFdMonitor(new File("/data/anr"), fdThreshold);
Narayan Kamatha0a28082017-07-31 15:58:59 +0100725 }
726
727 OpenFdMonitor(File dumpDir, File fdThreshold) {
728 mDumpDir = dumpDir;
729 mFdHighWaterMark = fdThreshold;
730 }
731
Nandana Dutt6647ef52018-07-12 17:02:57 +0100732 /**
733 * Dumps open file descriptors and their full paths to a temporary file in {@code mDumpDir}.
734 */
Narayan Kamatha0a28082017-07-31 15:58:59 +0100735 private void dumpOpenDescriptors() {
Nandana Dutt6647ef52018-07-12 17:02:57 +0100736 // We cannot exec lsof to get more info about open file descriptors because a newly
737 // forked process will not have the permissions to readlink. Instead list all open
738 // descriptors from /proc/pid/fd and resolve them.
739 List<String> dumpInfo = new ArrayList<>();
740 String fdDirPath = String.format("/proc/%d/fd/", Process.myPid());
741 File[] fds = new File(fdDirPath).listFiles();
742 if (fds == null) {
743 dumpInfo.add("Unable to list " + fdDirPath);
744 } else {
745 for (File f : fds) {
746 String fdSymLink = f.getAbsolutePath();
747 String resolvedPath = "";
748 try {
749 resolvedPath = Os.readlink(fdSymLink);
750 } catch (ErrnoException ex) {
751 resolvedPath = ex.getMessage();
752 }
753 dumpInfo.add(fdSymLink + "\t" + resolvedPath);
754 }
755 }
756
757 // Dump the fds & paths to a temp file.
Narayan Kamatha0a28082017-07-31 15:58:59 +0100758 try {
759 File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir);
Nandana Dutt6647ef52018-07-12 17:02:57 +0100760 Path out = Paths.get(dumpFile.getAbsolutePath());
761 Files.write(out, dumpInfo, StandardCharsets.UTF_8);
762 } catch (IOException ex) {
763 Slog.w(TAG, "Unable to write open descriptors to file: " + ex);
Narayan Kamatha0a28082017-07-31 15:58:59 +0100764 }
765 }
766
767 /**
768 * @return {@code true} if the high water mark was breached and a dump was written,
769 * {@code false} otherwise.
770 */
771 public boolean monitor() {
772 if (mFdHighWaterMark.exists()) {
773 dumpOpenDescriptors();
774 return true;
775 }
776
777 return false;
778 }
779 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800780}