blob: afcf954371ccc54dca99974c85fd139f412502f7 [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070019import android.app.IActivityController;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080020import android.content.BroadcastReceiver;
21import android.content.ContentResolver;
22import android.content.Context;
23import android.content.Intent;
24import android.content.IntentFilter;
Steven Moreland6b47c542017-03-21 12:52:16 -070025import android.hidl.manager.V1_0.IServiceManager;
Makoto Onuki99029542018-08-27 17:23:09 -070026import android.os.Binder;
27import android.os.Build;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080028import android.os.Debug;
29import android.os.Handler;
Jeff Brown6f357d32014-01-15 20:40:55 -080030import android.os.IPowerManager;
John Michelau11641522013-03-18 18:28:23 -050031import android.os.Looper;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080032import android.os.Process;
Makoto Onuki99029542018-08-27 17:23:09 -070033import android.os.RemoteException;
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -080034import android.os.ServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080035import android.os.SystemClock;
Makoto Onuki99029542018-08-27 17:23:09 -070036import android.system.ErrnoException;
37import android.system.Os;
38import android.system.OsConstants;
39import android.system.StructRlimit;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080040import android.util.EventLog;
Dan Egnor9bdc94b2010-03-04 14:20:31 -080041import android.util.Log;
Joe Onorato8a9b2202010-02-26 18:56:32 -080042import android.util.Slog;
jianjin8f2dee02019-03-29 14:03:08 -070043import android.util.StatsLog;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080044
Makoto Onuki99029542018-08-27 17:23:09 -070045import com.android.internal.os.ZygoteConnectionConstants;
46import com.android.server.am.ActivityManagerService;
Wale Ogunwale1f5e53d2018-11-05 05:12:46 -080047import com.android.server.wm.SurfaceAnimationThread;
Makoto Onuki99029542018-08-27 17:23:09 -070048
Dan Egnor9bdc94b2010-03-04 14:20:31 -080049import java.io.File;
Colin Cross5df1d872012-11-29 11:42:11 -080050import java.io.FileWriter;
51import java.io.IOException;
Nandana Dutt6647ef52018-07-12 17:02:57 +010052import java.nio.charset.StandardCharsets;
53import java.nio.file.Files;
54import java.nio.file.Path;
55import java.nio.file.Paths;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080056import java.util.ArrayList;
Steven Moreland6b47c542017-03-21 12:52:16 -070057import java.util.Arrays;
Narayan Kamatha0a28082017-07-31 15:58:59 +010058import java.util.Collections;
Steven Moreland6b47c542017-03-21 12:52:16 -070059import java.util.HashSet;
60import java.util.List;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080061
62/** This class calls its monitor every minute. Killing this process if they don't return **/
63public class Watchdog extends Thread {
64 static final String TAG = "Watchdog";
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080065
Makoto Onukie276b442018-08-30 09:38:44 -070066 /** Debug flag. */
67 public static final boolean DEBUG = true; // STOPSHIP disable it (b/113252928)
Makoto Onuki99029542018-08-27 17:23:09 -070068
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080069 // Set this to true to use debug default values.
70 static final boolean DB = false;
71
Andreas Gampe032a9292017-07-21 11:41:00 -070072 // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with
73 // timeout in com.android.internal.os.ZygoteConnection, or wrapped applications
74 // can trigger the watchdog.
75 // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped
76 // applications may not work with a debug build. CTS will fail.
Christopher Tatee6f81cf2013-10-23 17:28:27 -070077 static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
78 static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
79
80 // These are temporally ordered: larger values as lateness increases
81 static final int COMPLETED = 0;
82 static final int WAITING = 1;
83 static final int WAITED_HALF = 2;
84 static final int OVERDUE = 3;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080085
Igor Murashkin44d04aa2013-10-23 10:56:02 -070086 // Which native processes to dump into dropbox's stack traces
87 public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
Andy Hung3a64ecb2016-03-09 13:55:58 -080088 "/system/bin/audioserver",
Andy Hung689574a2016-04-13 19:27:43 -070089 "/system/bin/cameraserver",
90 "/system/bin/drmserver",
91 "/system/bin/mediadrmserver",
Dianne Hackbornf72467a2012-06-08 17:23:59 -070092 "/system/bin/mediaserver",
93 "/system/bin/sdcard",
Eric Laurent05d4e352016-03-14 18:49:08 -070094 "/system/bin/surfaceflinger",
Jeff Sharkey3e40afb2019-01-02 09:21:24 -070095 "/system/bin/vold",
Andy Hung689574a2016-04-13 19:27:43 -070096 "media.extractor", // system/bin/mediaextractor
Andy Hungb1c4c932018-01-04 12:05:43 -080097 "media.metrics", // system/bin/mediametrics
Chong Zhange55e6e02017-06-02 10:52:04 -070098 "media.codec", // vendor/bin/hw/android.hardware.media.omx@1.0-service
Andreas Gampecf9e79b2016-05-11 18:41:25 -070099 "com.android.bluetooth", // Bluetooth service
Rafal Slawike35d8f82018-10-01 11:39:58 +0100100 "/system/bin/statsd", // Stats daemon
Dianne Hackbornf72467a2012-06-08 17:23:59 -0700101 };
102
Steven Moreland6b47c542017-03-21 12:52:16 -0700103 public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList(
104 "android.hardware.audio@2.0::IDevicesFactory",
Mikhail Naganov09f0c5a2018-04-30 15:58:31 -0700105 "android.hardware.audio@4.0::IDevicesFactory",
Steven Moreland6b47c542017-03-21 12:52:16 -0700106 "android.hardware.bluetooth@1.0::IBluetoothHci",
107 "android.hardware.camera.provider@2.4::ICameraProvider",
Chia-I Wu49569e32018-08-01 09:59:02 -0700108 "android.hardware.graphics.allocator@2.0::IAllocator",
Chia-I Wu74debcd2017-04-21 11:14:22 -0700109 "android.hardware.graphics.composer@2.1::IComposer",
Yifan Honge8ea5222018-11-27 16:42:13 -0800110 "android.hardware.health@2.0::IHealth",
Peng Xu102122fb2017-07-11 21:12:11 -0700111 "android.hardware.media.omx@1.0::IOmx",
Pawin Vongmasaf22f5f72018-04-18 07:01:47 -0700112 "android.hardware.media.omx@1.0::IOmxStore",
Peng Xu102122fb2017-07-11 21:12:11 -0700113 "android.hardware.sensors@1.0::ISensors",
114 "android.hardware.vr@1.0::IVr"
Steven Moreland6b47c542017-03-21 12:52:16 -0700115 );
116
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800117 static Watchdog sWatchdog;
118
119 /* This handler will be used to post message back onto the main thread */
Wale Ogunwaled7fdd022015-04-13 16:22:38 -0700120 final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700121 final HandlerChecker mMonitorChecker;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800122 ContentResolver mResolver;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800123 ActivityManagerService mActivity;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800124
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800125 int mPhonePid;
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700126 IActivityController mController;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700127 boolean mAllowRestart = true;
Narayan Kamatha0a28082017-07-31 15:58:59 +0100128 final OpenFdMonitor mOpenFdMonitor;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800129
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800130 /**
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700131 * Used for checking status of handle threads and scheduling monitor callbacks.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800132 */
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700133 public final class HandlerChecker implements Runnable {
134 private final Handler mHandler;
135 private final String mName;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700136 private final long mWaitMax;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700137 private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700138 private boolean mCompleted;
139 private Monitor mCurrentMonitor;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700140 private long mStartTime;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700141
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700142 HandlerChecker(Handler handler, String name, long waitMaxMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700143 mHandler = handler;
144 mName = name;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700145 mWaitMax = waitMaxMillis;
146 mCompleted = true;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700147 }
148
Makoto Onukie276b442018-08-30 09:38:44 -0700149 void addMonitorLocked(Monitor monitor) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700150 mMonitors.add(monitor);
151 }
152
153 public void scheduleCheckLocked() {
Jeff Brown6c7b41a2015-02-26 14:43:53 -0800154 if (mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) {
155 // If the target looper has recently been polling, then
Dianne Hackbornefa92b22013-05-03 14:11:43 -0700156 // there is no reason to enqueue our checker on it since that
157 // is as good as it not being deadlocked. This avoid having
158 // to do a context switch to check the thread. Note that we
159 // only do this if mCheckReboot is false and we have no
160 // monitors, since those would need to be executed at this point.
161 mCompleted = true;
162 return;
163 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700164
165 if (!mCompleted) {
166 // we already have a check in flight, so no need
167 return;
168 }
169
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700170 mCompleted = false;
171 mCurrentMonitor = null;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700172 mStartTime = SystemClock.uptimeMillis();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700173 mHandler.postAtFrontOfQueue(this);
174 }
175
Makoto Onukie276b442018-08-30 09:38:44 -0700176 boolean isOverdueLocked() {
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700177 return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
178 }
179
180 public int getCompletionStateLocked() {
181 if (mCompleted) {
182 return COMPLETED;
183 } else {
184 long latency = SystemClock.uptimeMillis() - mStartTime;
185 if (latency < mWaitMax/2) {
186 return WAITING;
187 } else if (latency < mWaitMax) {
188 return WAITED_HALF;
189 }
190 }
191 return OVERDUE;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700192 }
193
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700194 public Thread getThread() {
195 return mHandler.getLooper().getThread();
196 }
197
198 public String getName() {
199 return mName;
200 }
201
Makoto Onukie276b442018-08-30 09:38:44 -0700202 String describeBlockedStateLocked() {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700203 if (mCurrentMonitor == null) {
204 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
205 } else {
206 return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
207 + " on " + mName + " (" + getThread().getName() + ")";
208 }
John Michelau11641522013-03-18 18:28:23 -0500209 }
210
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800211 @Override
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700212 public void run() {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700213 final int size = mMonitors.size();
214 for (int i = 0 ; i < size ; i++) {
215 synchronized (Watchdog.this) {
216 mCurrentMonitor = mMonitors.get(i);
217 }
218 mCurrentMonitor.monitor();
219 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800220
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700221 synchronized (Watchdog.this) {
222 mCompleted = true;
223 mCurrentMonitor = null;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800224 }
225 }
226 }
227
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800228 final class RebootRequestReceiver extends BroadcastReceiver {
229 @Override
230 public void onReceive(Context c, Intent intent) {
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700231 if (intent.getIntExtra("nowait", 0) != 0) {
232 rebootSystem("Received ACTION_REBOOT broadcast");
233 return;
234 }
235 Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800236 }
237 }
238
Wale Ogunwale517daec2015-04-15 10:27:24 -0700239 /** Monitor for checking the availability of binder threads. The monitor will block until
240 * there is a binder thread available to process in coming IPCs to make sure other processes
241 * can still communicate with the service.
242 */
243 private static final class BinderThreadMonitor implements Watchdog.Monitor {
244 @Override
245 public void monitor() {
246 Binder.blockUntilThreadAvailable();
247 }
248 }
249
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800250 public interface Monitor {
251 void monitor();
252 }
253
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800254 public static Watchdog getInstance() {
255 if (sWatchdog == null) {
256 sWatchdog = new Watchdog();
257 }
258
259 return sWatchdog;
260 }
261
262 private Watchdog() {
263 super("watchdog");
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700264 // Initialize handler checkers for each common thread we want to check. Note
265 // that we are not currently checking the background thread, since it can
266 // potentially hold longer running operations with no guarantees about the timeliness
267 // of operations there.
268
269 // The shared foreground thread is the main checker. It is where we
270 // will also dispatch monitor checks and do other work.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700271 mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
272 "foreground thread", DEFAULT_TIMEOUT);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700273 mHandlerCheckers.add(mMonitorChecker);
274 // Add checker for main thread. We only do a quick check since there
275 // can be UI running on the thread.
276 mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700277 "main thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700278 // Add checker for shared UI thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700279 mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
280 "ui thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700281 // And also check IO thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700282 mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
283 "i/o thread", DEFAULT_TIMEOUT));
Jeff Brown4ccb8232014-01-16 22:16:42 -0800284 // And the display thread.
285 mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(),
286 "display thread", DEFAULT_TIMEOUT));
Wale Ogunwale1f5e53d2018-11-05 05:12:46 -0800287 // And the animation thread.
288 mHandlerCheckers.add(new HandlerChecker(AnimationThread.getHandler(),
289 "animation thread", DEFAULT_TIMEOUT));
290 // And the surface animation thread.
291 mHandlerCheckers.add(new HandlerChecker(SurfaceAnimationThread.getHandler(),
292 "surface animation thread", DEFAULT_TIMEOUT));
Wale Ogunwale517daec2015-04-15 10:27:24 -0700293
294 // Initialize monitor for Binder threads.
295 addMonitor(new BinderThreadMonitor());
Andreas Gampe032a9292017-07-21 11:41:00 -0700296
Narayan Kamatha0a28082017-07-31 15:58:59 +0100297 mOpenFdMonitor = OpenFdMonitor.create();
298
Andreas Gampe032a9292017-07-21 11:41:00 -0700299 // See the notes on DEFAULT_TIMEOUT.
300 assert DB ||
301 DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800302 }
303
Adam Lesinski182f73f2013-12-05 16:48:06 -0800304 public void init(Context context, ActivityManagerService activity) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800305 mResolver = context.getContentResolver();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800306 mActivity = activity;
307
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800308 context.registerReceiver(new RebootRequestReceiver(),
309 new IntentFilter(Intent.ACTION_REBOOT),
310 android.Manifest.permission.REBOOT, null);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800311 }
312
Christopher Tatec27181c2010-06-30 14:41:09 -0700313 public void processStarted(String name, int pid) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800314 synchronized (this) {
315 if ("com.android.phone".equals(name)) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800316 mPhonePid = pid;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800317 }
318 }
319 }
320
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700321 public void setActivityController(IActivityController controller) {
322 synchronized (this) {
323 mController = controller;
324 }
325 }
326
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700327 public void setAllowRestart(boolean allowRestart) {
328 synchronized (this) {
329 mAllowRestart = allowRestart;
330 }
331 }
332
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800333 public void addMonitor(Monitor monitor) {
334 synchronized (this) {
335 if (isAlive()) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700336 throw new RuntimeException("Monitors can't be added once the Watchdog is running");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800337 }
Makoto Onukie276b442018-08-30 09:38:44 -0700338 mMonitorChecker.addMonitorLocked(monitor);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700339 }
340 }
341
Jeff Brown6f357d32014-01-15 20:40:55 -0800342 public void addThread(Handler thread) {
343 addThread(thread, DEFAULT_TIMEOUT);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700344 }
345
Jeff Brown6f357d32014-01-15 20:40:55 -0800346 public void addThread(Handler thread, long timeoutMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700347 synchronized (this) {
348 if (isAlive()) {
349 throw new RuntimeException("Threads can't be added once the Watchdog is running");
350 }
Jeff Brown6f357d32014-01-15 20:40:55 -0800351 final String name = thread.getLooper().getThread().getName();
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700352 mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800353 }
354 }
355
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800356 /**
357 * Perform a full reboot of the system.
358 */
359 void rebootSystem(String reason) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800360 Slog.i(TAG, "Rebooting system because: " + reason);
Jeff Brown6f357d32014-01-15 20:40:55 -0800361 IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE);
362 try {
363 pms.reboot(false, reason, false);
364 } catch (RemoteException ex) {
365 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800366 }
367
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700368 private int evaluateCheckerCompletionLocked() {
369 int state = COMPLETED;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700370 for (int i=0; i<mHandlerCheckers.size(); i++) {
371 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700372 state = Math.max(state, hc.getCompletionStateLocked());
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700373 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700374 return state;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700375 }
376
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700377 private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
378 ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700379 for (int i=0; i<mHandlerCheckers.size(); i++) {
380 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700381 if (hc.isOverdueLocked()) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700382 checkers.add(hc);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700383 }
384 }
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700385 return checkers;
386 }
387
Narayan Kamatha0a28082017-07-31 15:58:59 +0100388 private String describeCheckersLocked(List<HandlerChecker> checkers) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700389 StringBuilder builder = new StringBuilder(128);
390 for (int i=0; i<checkers.size(); i++) {
391 if (builder.length() > 0) {
392 builder.append(", ");
393 }
394 builder.append(checkers.get(i).describeBlockedStateLocked());
395 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700396 return builder.toString();
397 }
398
Steven Moreland6b47c542017-03-21 12:52:16 -0700399 private ArrayList<Integer> getInterestingHalPids() {
400 try {
401 IServiceManager serviceManager = IServiceManager.getService();
402 ArrayList<IServiceManager.InstanceDebugInfo> dump =
403 serviceManager.debugDump();
404 HashSet<Integer> pids = new HashSet<>();
405 for (IServiceManager.InstanceDebugInfo info : dump) {
406 if (info.pid == IServiceManager.PidConstant.NO_PID) {
407 continue;
408 }
409
410 if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
411 continue;
412 }
413
414 pids.add(info.pid);
415 }
416 return new ArrayList<Integer>(pids);
417 } catch (RemoteException e) {
418 return new ArrayList<Integer>();
419 }
420 }
421
422 private ArrayList<Integer> getInterestingNativePids() {
423 ArrayList<Integer> pids = getInterestingHalPids();
424
425 int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
426 if (nativePids != null) {
427 pids.ensureCapacity(pids.size() + nativePids.length);
428 for (int i : nativePids) {
429 pids.add(i);
430 }
431 }
432
433 return pids;
434 }
435
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800436 @Override
437 public void run() {
Christopher Tate6ee412d2010-05-28 12:01:56 -0700438 boolean waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800439 while (true) {
Narayan Kamatha0a28082017-07-31 15:58:59 +0100440 final List<HandlerChecker> blockedCheckers;
Jeff Brown7dd2d192013-09-06 15:05:23 -0700441 final String subject;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700442 final boolean allowRestart;
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700443 int debuggerWasConnected = 0;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800444 synchronized (this) {
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700445 long timeout = CHECK_INTERVAL;
446 // Make sure we (re)spin the checkers that have become idle within
447 // this wait-and-check interval
448 for (int i=0; i<mHandlerCheckers.size(); i++) {
449 HandlerChecker hc = mHandlerCheckers.get(i);
450 hc.scheduleCheckLocked();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700451 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800452
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700453 if (debuggerWasConnected > 0) {
454 debuggerWasConnected--;
455 }
456
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800457 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
458 // wait while asleep. If the device is asleep then the thing that we are waiting
Christopher Tate6ee412d2010-05-28 12:01:56 -0700459 // to timeout on is asleep as well and won't have a chance to run, causing a false
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800460 // positive on when to kill things.
461 long start = SystemClock.uptimeMillis();
Michael Wright8fa56f62013-04-01 16:36:05 -0700462 while (timeout > 0) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700463 if (Debug.isDebuggerConnected()) {
464 debuggerWasConnected = 2;
465 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800466 try {
Michael Wright8fa56f62013-04-01 16:36:05 -0700467 wait(timeout);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800468 } catch (InterruptedException e) {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800469 Log.wtf(TAG, e);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800470 }
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700471 if (Debug.isDebuggerConnected()) {
472 debuggerWasConnected = 2;
473 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700474 timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800475 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800476
Narayan Kamatha0a28082017-07-31 15:58:59 +0100477 boolean fdLimitTriggered = false;
478 if (mOpenFdMonitor != null) {
479 fdLimitTriggered = mOpenFdMonitor.monitor();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800480 }
Michael Wright8fa56f62013-04-01 16:36:05 -0700481
Narayan Kamatha0a28082017-07-31 15:58:59 +0100482 if (!fdLimitTriggered) {
483 final int waitState = evaluateCheckerCompletionLocked();
484 if (waitState == COMPLETED) {
485 // The monitors have returned; reset
486 waitedHalf = false;
487 continue;
488 } else if (waitState == WAITING) {
489 // still waiting but within their configured intervals; back off and recheck
490 continue;
491 } else if (waitState == WAITED_HALF) {
492 if (!waitedHalf) {
Makoto Onuki99029542018-08-27 17:23:09 -0700493 if (DEBUG) Slog.d(TAG, "WAITED_HALF");
Narayan Kamatha0a28082017-07-31 15:58:59 +0100494 // We've waited half the deadlock-detection interval. Pull a stack
495 // trace and wait another half.
496 ArrayList<Integer> pids = new ArrayList<Integer>();
497 pids.add(Process.myPid());
Makoto Onukie276b442018-08-30 09:38:44 -0700498 ActivityManagerService.dumpStackTraces(pids, null, null,
Narayan Kamatha0a28082017-07-31 15:58:59 +0100499 getInterestingNativePids());
500 waitedHalf = true;
501 }
502 continue;
503 }
504
505 // something is overdue!
506 blockedCheckers = getBlockedCheckersLocked();
507 subject = describeCheckersLocked(blockedCheckers);
508 } else {
509 blockedCheckers = Collections.emptyList();
510 subject = "Open FD high water mark reached";
511 }
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700512 allowRestart = mAllowRestart;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800513 }
514
515 // If we got here, that means that the system is most likely hung.
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700516 // First collect stack traces from all threads of the system process.
517 // Then kill this process so that the system will restart.
Jeff Brown7dd2d192013-09-06 15:05:23 -0700518 EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800519
Steven Moreland6b47c542017-03-21 12:52:16 -0700520 ArrayList<Integer> pids = new ArrayList<>();
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800521 pids.add(Process.myPid());
Dan Egnor4bded072010-03-11 22:00:47 -0800522 if (mPhonePid > 0) pids.add(mPhonePid);
Makoto Onukie276b442018-08-30 09:38:44 -0700523
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800524 final File stack = ActivityManagerService.dumpStackTraces(
Makoto Onukie276b442018-08-30 09:38:44 -0700525 pids, null, null, getInterestingNativePids());
Dan Egnor4bded072010-03-11 22:00:47 -0800526
527 // Give some extra time to make sure the stack traces get written.
528 // The system's been hanging for a minute, another second or two won't hurt much.
Makoto Onukie276b442018-08-30 09:38:44 -0700529 SystemClock.sleep(5000);
Dan Egnor4bded072010-03-11 22:00:47 -0800530
Guang Zhu0620c452014-10-29 14:31:48 -0700531 // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
532 doSysRq('w');
533 doSysRq('l');
Colin Cross5df1d872012-11-29 11:42:11 -0800534
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800535 // Try to add the error to the dropbox, but assuming that the ActivityManager
536 // itself may be deadlocked. (which has happened, causing this statement to
537 // deadlock and the watchdog as a whole to be ineffective)
538 Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
539 public void run() {
540 mActivity.addErrorToDropBox(
Wale Ogunwale51cc98a2018-10-15 10:41:05 -0700541 "watchdog", null, "system_server", null, null, null,
Jeff Brown7dd2d192013-09-06 15:05:23 -0700542 subject, null, stack, null);
jianjin8f2dee02019-03-29 14:03:08 -0700543 StatsLog.write(StatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject);
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800544 }
545 };
546 dropboxThread.start();
547 try {
548 dropboxThread.join(2000); // wait up to 2 seconds for it to return.
549 } catch (InterruptedException ignored) {}
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800550
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700551 IActivityController controller;
552 synchronized (this) {
553 controller = mController;
554 }
555 if (controller != null) {
556 Slog.i(TAG, "Reporting stuck state to activity controller");
557 try {
558 Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
559 // 1 = keep waiting, -1 = kill system
Jeff Brown7dd2d192013-09-06 15:05:23 -0700560 int res = controller.systemNotResponding(subject);
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700561 if (res >= 0) {
562 Slog.i(TAG, "Activity controller requested to coninue to wait");
563 waitedHalf = false;
564 continue;
565 }
566 } catch (RemoteException e) {
567 }
568 }
569
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700570 // Only kill the process if the debugger is not attached.
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700571 if (Debug.isDebuggerConnected()) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700572 debuggerWasConnected = 2;
573 }
574 if (debuggerWasConnected >= 2) {
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700575 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700576 } else if (debuggerWasConnected > 0) {
577 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700578 } else if (!allowRestart) {
579 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
580 } else {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700581 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
Andreas Gampe544416e2018-01-26 11:39:46 -0800582 WatchdogDiagnostics.diagnoseCheckers(blockedCheckers);
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700583 Slog.w(TAG, "*** GOODBYE!");
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700584 Process.killProcess(Process.myPid());
585 System.exit(10);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800586 }
Christopher Tate6ee412d2010-05-28 12:01:56 -0700587
588 waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800589 }
590 }
Christopher Tateecaa7b42010-06-04 14:55:02 -0700591
Guang Zhu0620c452014-10-29 14:31:48 -0700592 private void doSysRq(char c) {
593 try {
594 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
595 sysrq_trigger.write(c);
596 sysrq_trigger.close();
597 } catch (IOException e) {
598 Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
599 }
600 }
601
Narayan Kamatha0a28082017-07-31 15:58:59 +0100602 public static final class OpenFdMonitor {
603 /**
604 * Number of FDs below the soft limit that we trigger a runtime restart at. This was
605 * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number
606 * of FDs in reserve to complete a dump.
607 */
608 private static final int FD_HIGH_WATER_MARK = 12;
609
610 private final File mDumpDir;
611 private final File mFdHighWaterMark;
612
613 public static OpenFdMonitor create() {
614 // Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
615 if (!Build.IS_DEBUGGABLE) {
616 return null;
617 }
618
Narayan Kamatha0a28082017-07-31 15:58:59 +0100619 final StructRlimit rlimit;
620 try {
621 rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE);
622 } catch (ErrnoException errno) {
623 Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno);
624 return null;
625 }
626
627 // The assumption we're making here is that FD numbers are allocated (more or less)
628 // sequentially, which is currently (and historically) true since open is currently
629 // specified to always return the lowest-numbered non-open file descriptor for the
630 // current process.
631 //
632 // We do this to avoid having to enumerate the contents of /proc/self/fd in order to
633 // count the number of descriptors open in the process.
634 final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK));
Elliott Hughes4e4caa72018-03-23 11:06:36 -0700635 return new OpenFdMonitor(new File("/data/anr"), fdThreshold);
Narayan Kamatha0a28082017-07-31 15:58:59 +0100636 }
637
638 OpenFdMonitor(File dumpDir, File fdThreshold) {
639 mDumpDir = dumpDir;
640 mFdHighWaterMark = fdThreshold;
641 }
642
Nandana Dutt6647ef52018-07-12 17:02:57 +0100643 /**
644 * Dumps open file descriptors and their full paths to a temporary file in {@code mDumpDir}.
645 */
Narayan Kamatha0a28082017-07-31 15:58:59 +0100646 private void dumpOpenDescriptors() {
Nandana Dutt6647ef52018-07-12 17:02:57 +0100647 // We cannot exec lsof to get more info about open file descriptors because a newly
648 // forked process will not have the permissions to readlink. Instead list all open
649 // descriptors from /proc/pid/fd and resolve them.
650 List<String> dumpInfo = new ArrayList<>();
651 String fdDirPath = String.format("/proc/%d/fd/", Process.myPid());
652 File[] fds = new File(fdDirPath).listFiles();
653 if (fds == null) {
654 dumpInfo.add("Unable to list " + fdDirPath);
655 } else {
656 for (File f : fds) {
657 String fdSymLink = f.getAbsolutePath();
658 String resolvedPath = "";
659 try {
660 resolvedPath = Os.readlink(fdSymLink);
661 } catch (ErrnoException ex) {
662 resolvedPath = ex.getMessage();
663 }
664 dumpInfo.add(fdSymLink + "\t" + resolvedPath);
665 }
666 }
667
668 // Dump the fds & paths to a temp file.
Narayan Kamatha0a28082017-07-31 15:58:59 +0100669 try {
670 File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir);
Nandana Dutt6647ef52018-07-12 17:02:57 +0100671 Path out = Paths.get(dumpFile.getAbsolutePath());
672 Files.write(out, dumpInfo, StandardCharsets.UTF_8);
673 } catch (IOException ex) {
674 Slog.w(TAG, "Unable to write open descriptors to file: " + ex);
Narayan Kamatha0a28082017-07-31 15:58:59 +0100675 }
676 }
677
678 /**
679 * @return {@code true} if the high water mark was breached and a dump was written,
680 * {@code false} otherwise.
681 */
682 public boolean monitor() {
683 if (mFdHighWaterMark.exists()) {
684 dumpOpenDescriptors();
685 return true;
686 }
687
688 return false;
689 }
690 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800691}