blob: 713da305ed55b7f5c28d9fbd29bb0e6b4834d3e8 [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070019import android.app.IActivityController;
20import android.os.Binder;
Narayan Kamatha0a28082017-07-31 15:58:59 +010021import android.os.Build;
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070022import android.os.RemoteException;
Narayan Kamatha0a28082017-07-31 15:58:59 +010023import android.system.ErrnoException;
Nandana Dutt6647ef52018-07-12 17:02:57 +010024import android.system.Os;
Narayan Kamatha0a28082017-07-31 15:58:59 +010025import android.system.OsConstants;
26import android.system.StructRlimit;
Andreas Gampe032a9292017-07-21 11:41:00 -070027import com.android.internal.os.ZygoteConnectionConstants;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080028import com.android.server.am.ActivityManagerService;
29
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080030import android.content.BroadcastReceiver;
31import android.content.ContentResolver;
32import android.content.Context;
33import android.content.Intent;
34import android.content.IntentFilter;
Steven Moreland6b47c542017-03-21 12:52:16 -070035import android.hidl.manager.V1_0.IServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080036import android.os.Debug;
37import android.os.Handler;
Jeff Brown6f357d32014-01-15 20:40:55 -080038import android.os.IPowerManager;
John Michelau11641522013-03-18 18:28:23 -050039import android.os.Looper;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080040import android.os.Process;
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -080041import android.os.ServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080042import android.os.SystemClock;
43import android.os.SystemProperties;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080044import android.util.EventLog;
Dan Egnor9bdc94b2010-03-04 14:20:31 -080045import android.util.Log;
Joe Onorato8a9b2202010-02-26 18:56:32 -080046import android.util.Slog;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080047
Dan Egnor9bdc94b2010-03-04 14:20:31 -080048import java.io.File;
Colin Cross5df1d872012-11-29 11:42:11 -080049import java.io.FileWriter;
50import java.io.IOException;
Nandana Dutt6647ef52018-07-12 17:02:57 +010051import java.nio.charset.StandardCharsets;
52import java.nio.file.Files;
53import java.nio.file.Path;
54import java.nio.file.Paths;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080055import java.util.ArrayList;
Steven Moreland6b47c542017-03-21 12:52:16 -070056import java.util.Arrays;
Narayan Kamatha0a28082017-07-31 15:58:59 +010057import java.util.Collections;
Steven Moreland6b47c542017-03-21 12:52:16 -070058import java.util.HashSet;
59import java.util.List;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080060
61/** This class calls its monitor every minute. Killing this process if they don't return **/
62public class Watchdog extends Thread {
63 static final String TAG = "Watchdog";
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080064
65 // Set this to true to use debug default values.
66 static final boolean DB = false;
67
Andreas Gampe032a9292017-07-21 11:41:00 -070068 // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with
69 // timeout in com.android.internal.os.ZygoteConnection, or wrapped applications
70 // can trigger the watchdog.
71 // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped
72 // applications may not work with a debug build. CTS will fail.
Christopher Tatee6f81cf2013-10-23 17:28:27 -070073 static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
74 static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
75
76 // These are temporally ordered: larger values as lateness increases
77 static final int COMPLETED = 0;
78 static final int WAITING = 1;
79 static final int WAITED_HALF = 2;
80 static final int OVERDUE = 3;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080081
Igor Murashkin44d04aa2013-10-23 10:56:02 -070082 // Which native processes to dump into dropbox's stack traces
83 public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
Andy Hung3a64ecb2016-03-09 13:55:58 -080084 "/system/bin/audioserver",
Andy Hung689574a2016-04-13 19:27:43 -070085 "/system/bin/cameraserver",
86 "/system/bin/drmserver",
87 "/system/bin/mediadrmserver",
Dianne Hackbornf72467a2012-06-08 17:23:59 -070088 "/system/bin/mediaserver",
89 "/system/bin/sdcard",
Eric Laurent05d4e352016-03-14 18:49:08 -070090 "/system/bin/surfaceflinger",
Andy Hung689574a2016-04-13 19:27:43 -070091 "media.extractor", // system/bin/mediaextractor
Andy Hungb1c4c932018-01-04 12:05:43 -080092 "media.metrics", // system/bin/mediametrics
Chong Zhange55e6e02017-06-02 10:52:04 -070093 "media.codec", // vendor/bin/hw/android.hardware.media.omx@1.0-service
Andreas Gampecf9e79b2016-05-11 18:41:25 -070094 "com.android.bluetooth", // Bluetooth service
Andreas Gampee54440c2018-01-24 17:55:17 -080095 "statsd", // Stats daemon
Dianne Hackbornf72467a2012-06-08 17:23:59 -070096 };
97
Steven Moreland6b47c542017-03-21 12:52:16 -070098 public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList(
99 "android.hardware.audio@2.0::IDevicesFactory",
Mikhail Naganov09f0c5a2018-04-30 15:58:31 -0700100 "android.hardware.audio@4.0::IDevicesFactory",
Steven Moreland6b47c542017-03-21 12:52:16 -0700101 "android.hardware.bluetooth@1.0::IBluetoothHci",
102 "android.hardware.camera.provider@2.4::ICameraProvider",
Chia-I Wu49569e32018-08-01 09:59:02 -0700103 "android.hardware.graphics.allocator@2.0::IAllocator",
Chia-I Wu74debcd2017-04-21 11:14:22 -0700104 "android.hardware.graphics.composer@2.1::IComposer",
Peng Xu102122fb2017-07-11 21:12:11 -0700105 "android.hardware.media.omx@1.0::IOmx",
Pawin Vongmasaf22f5f72018-04-18 07:01:47 -0700106 "android.hardware.media.omx@1.0::IOmxStore",
Peng Xu102122fb2017-07-11 21:12:11 -0700107 "android.hardware.sensors@1.0::ISensors",
108 "android.hardware.vr@1.0::IVr"
Steven Moreland6b47c542017-03-21 12:52:16 -0700109 );
110
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800111 static Watchdog sWatchdog;
112
113 /* This handler will be used to post message back onto the main thread */
Wale Ogunwaled7fdd022015-04-13 16:22:38 -0700114 final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700115 final HandlerChecker mMonitorChecker;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800116 ContentResolver mResolver;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800117 ActivityManagerService mActivity;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800118
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800119 int mPhonePid;
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700120 IActivityController mController;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700121 boolean mAllowRestart = true;
Narayan Kamatha0a28082017-07-31 15:58:59 +0100122 final OpenFdMonitor mOpenFdMonitor;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800123
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800124 /**
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700125 * Used for checking status of handle threads and scheduling monitor callbacks.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800126 */
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700127 public final class HandlerChecker implements Runnable {
128 private final Handler mHandler;
129 private final String mName;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700130 private final long mWaitMax;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700131 private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700132 private boolean mCompleted;
133 private Monitor mCurrentMonitor;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700134 private long mStartTime;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700135
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700136 HandlerChecker(Handler handler, String name, long waitMaxMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700137 mHandler = handler;
138 mName = name;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700139 mWaitMax = waitMaxMillis;
140 mCompleted = true;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700141 }
142
143 public void addMonitor(Monitor monitor) {
144 mMonitors.add(monitor);
145 }
146
147 public void scheduleCheckLocked() {
Jeff Brown6c7b41a2015-02-26 14:43:53 -0800148 if (mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) {
149 // If the target looper has recently been polling, then
Dianne Hackbornefa92b22013-05-03 14:11:43 -0700150 // there is no reason to enqueue our checker on it since that
151 // is as good as it not being deadlocked. This avoid having
152 // to do a context switch to check the thread. Note that we
153 // only do this if mCheckReboot is false and we have no
154 // monitors, since those would need to be executed at this point.
155 mCompleted = true;
156 return;
157 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700158
159 if (!mCompleted) {
160 // we already have a check in flight, so no need
161 return;
162 }
163
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700164 mCompleted = false;
165 mCurrentMonitor = null;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700166 mStartTime = SystemClock.uptimeMillis();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700167 mHandler.postAtFrontOfQueue(this);
168 }
169
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700170 public boolean isOverdueLocked() {
171 return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
172 }
173
174 public int getCompletionStateLocked() {
175 if (mCompleted) {
176 return COMPLETED;
177 } else {
178 long latency = SystemClock.uptimeMillis() - mStartTime;
179 if (latency < mWaitMax/2) {
180 return WAITING;
181 } else if (latency < mWaitMax) {
182 return WAITED_HALF;
183 }
184 }
185 return OVERDUE;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700186 }
187
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700188 public Thread getThread() {
189 return mHandler.getLooper().getThread();
190 }
191
192 public String getName() {
193 return mName;
194 }
195
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700196 public String describeBlockedStateLocked() {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700197 if (mCurrentMonitor == null) {
198 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
199 } else {
200 return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
201 + " on " + mName + " (" + getThread().getName() + ")";
202 }
John Michelau11641522013-03-18 18:28:23 -0500203 }
204
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800205 @Override
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700206 public void run() {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700207 final int size = mMonitors.size();
208 for (int i = 0 ; i < size ; i++) {
209 synchronized (Watchdog.this) {
210 mCurrentMonitor = mMonitors.get(i);
211 }
212 mCurrentMonitor.monitor();
213 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800214
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700215 synchronized (Watchdog.this) {
216 mCompleted = true;
217 mCurrentMonitor = null;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800218 }
219 }
220 }
221
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800222 final class RebootRequestReceiver extends BroadcastReceiver {
223 @Override
224 public void onReceive(Context c, Intent intent) {
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700225 if (intent.getIntExtra("nowait", 0) != 0) {
226 rebootSystem("Received ACTION_REBOOT broadcast");
227 return;
228 }
229 Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800230 }
231 }
232
Wale Ogunwale517daec2015-04-15 10:27:24 -0700233 /** Monitor for checking the availability of binder threads. The monitor will block until
234 * there is a binder thread available to process in coming IPCs to make sure other processes
235 * can still communicate with the service.
236 */
237 private static final class BinderThreadMonitor implements Watchdog.Monitor {
238 @Override
239 public void monitor() {
240 Binder.blockUntilThreadAvailable();
241 }
242 }
243
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800244 public interface Monitor {
245 void monitor();
246 }
247
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800248 public static Watchdog getInstance() {
249 if (sWatchdog == null) {
250 sWatchdog = new Watchdog();
251 }
252
253 return sWatchdog;
254 }
255
256 private Watchdog() {
257 super("watchdog");
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700258 // Initialize handler checkers for each common thread we want to check. Note
259 // that we are not currently checking the background thread, since it can
260 // potentially hold longer running operations with no guarantees about the timeliness
261 // of operations there.
262
263 // The shared foreground thread is the main checker. It is where we
264 // will also dispatch monitor checks and do other work.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700265 mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
266 "foreground thread", DEFAULT_TIMEOUT);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700267 mHandlerCheckers.add(mMonitorChecker);
268 // Add checker for main thread. We only do a quick check since there
269 // can be UI running on the thread.
270 mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700271 "main thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700272 // Add checker for shared UI thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700273 mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
274 "ui thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700275 // And also check IO thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700276 mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
277 "i/o thread", DEFAULT_TIMEOUT));
Jeff Brown4ccb8232014-01-16 22:16:42 -0800278 // And the display thread.
279 mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(),
280 "display thread", DEFAULT_TIMEOUT));
Wale Ogunwale517daec2015-04-15 10:27:24 -0700281
282 // Initialize monitor for Binder threads.
283 addMonitor(new BinderThreadMonitor());
Andreas Gampe032a9292017-07-21 11:41:00 -0700284
Narayan Kamatha0a28082017-07-31 15:58:59 +0100285 mOpenFdMonitor = OpenFdMonitor.create();
286
Andreas Gampe032a9292017-07-21 11:41:00 -0700287 // See the notes on DEFAULT_TIMEOUT.
288 assert DB ||
289 DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800290 }
291
Adam Lesinski182f73f2013-12-05 16:48:06 -0800292 public void init(Context context, ActivityManagerService activity) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800293 mResolver = context.getContentResolver();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800294 mActivity = activity;
295
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800296 context.registerReceiver(new RebootRequestReceiver(),
297 new IntentFilter(Intent.ACTION_REBOOT),
298 android.Manifest.permission.REBOOT, null);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800299 }
300
Christopher Tatec27181c2010-06-30 14:41:09 -0700301 public void processStarted(String name, int pid) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800302 synchronized (this) {
303 if ("com.android.phone".equals(name)) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800304 mPhonePid = pid;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800305 }
306 }
307 }
308
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700309 public void setActivityController(IActivityController controller) {
310 synchronized (this) {
311 mController = controller;
312 }
313 }
314
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700315 public void setAllowRestart(boolean allowRestart) {
316 synchronized (this) {
317 mAllowRestart = allowRestart;
318 }
319 }
320
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800321 public void addMonitor(Monitor monitor) {
322 synchronized (this) {
323 if (isAlive()) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700324 throw new RuntimeException("Monitors can't be added once the Watchdog is running");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800325 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700326 mMonitorChecker.addMonitor(monitor);
327 }
328 }
329
Jeff Brown6f357d32014-01-15 20:40:55 -0800330 public void addThread(Handler thread) {
331 addThread(thread, DEFAULT_TIMEOUT);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700332 }
333
Jeff Brown6f357d32014-01-15 20:40:55 -0800334 public void addThread(Handler thread, long timeoutMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700335 synchronized (this) {
336 if (isAlive()) {
337 throw new RuntimeException("Threads can't be added once the Watchdog is running");
338 }
Jeff Brown6f357d32014-01-15 20:40:55 -0800339 final String name = thread.getLooper().getThread().getName();
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700340 mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800341 }
342 }
343
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800344 /**
345 * Perform a full reboot of the system.
346 */
347 void rebootSystem(String reason) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800348 Slog.i(TAG, "Rebooting system because: " + reason);
Jeff Brown6f357d32014-01-15 20:40:55 -0800349 IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE);
350 try {
351 pms.reboot(false, reason, false);
352 } catch (RemoteException ex) {
353 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800354 }
355
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700356 private int evaluateCheckerCompletionLocked() {
357 int state = COMPLETED;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700358 for (int i=0; i<mHandlerCheckers.size(); i++) {
359 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700360 state = Math.max(state, hc.getCompletionStateLocked());
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700361 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700362 return state;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700363 }
364
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700365 private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
366 ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700367 for (int i=0; i<mHandlerCheckers.size(); i++) {
368 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700369 if (hc.isOverdueLocked()) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700370 checkers.add(hc);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700371 }
372 }
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700373 return checkers;
374 }
375
Narayan Kamatha0a28082017-07-31 15:58:59 +0100376 private String describeCheckersLocked(List<HandlerChecker> checkers) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700377 StringBuilder builder = new StringBuilder(128);
378 for (int i=0; i<checkers.size(); i++) {
379 if (builder.length() > 0) {
380 builder.append(", ");
381 }
382 builder.append(checkers.get(i).describeBlockedStateLocked());
383 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700384 return builder.toString();
385 }
386
Steven Moreland6b47c542017-03-21 12:52:16 -0700387 private ArrayList<Integer> getInterestingHalPids() {
388 try {
389 IServiceManager serviceManager = IServiceManager.getService();
390 ArrayList<IServiceManager.InstanceDebugInfo> dump =
391 serviceManager.debugDump();
392 HashSet<Integer> pids = new HashSet<>();
393 for (IServiceManager.InstanceDebugInfo info : dump) {
394 if (info.pid == IServiceManager.PidConstant.NO_PID) {
395 continue;
396 }
397
398 if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
399 continue;
400 }
401
402 pids.add(info.pid);
403 }
404 return new ArrayList<Integer>(pids);
405 } catch (RemoteException e) {
406 return new ArrayList<Integer>();
407 }
408 }
409
410 private ArrayList<Integer> getInterestingNativePids() {
411 ArrayList<Integer> pids = getInterestingHalPids();
412
413 int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
414 if (nativePids != null) {
415 pids.ensureCapacity(pids.size() + nativePids.length);
416 for (int i : nativePids) {
417 pids.add(i);
418 }
419 }
420
421 return pids;
422 }
423
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800424 @Override
425 public void run() {
Christopher Tate6ee412d2010-05-28 12:01:56 -0700426 boolean waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800427 while (true) {
Narayan Kamatha0a28082017-07-31 15:58:59 +0100428 final List<HandlerChecker> blockedCheckers;
Jeff Brown7dd2d192013-09-06 15:05:23 -0700429 final String subject;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700430 final boolean allowRestart;
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700431 int debuggerWasConnected = 0;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800432 synchronized (this) {
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700433 long timeout = CHECK_INTERVAL;
434 // Make sure we (re)spin the checkers that have become idle within
435 // this wait-and-check interval
436 for (int i=0; i<mHandlerCheckers.size(); i++) {
437 HandlerChecker hc = mHandlerCheckers.get(i);
438 hc.scheduleCheckLocked();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700439 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800440
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700441 if (debuggerWasConnected > 0) {
442 debuggerWasConnected--;
443 }
444
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800445 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
446 // wait while asleep. If the device is asleep then the thing that we are waiting
Christopher Tate6ee412d2010-05-28 12:01:56 -0700447 // to timeout on is asleep as well and won't have a chance to run, causing a false
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800448 // positive on when to kill things.
449 long start = SystemClock.uptimeMillis();
Michael Wright8fa56f62013-04-01 16:36:05 -0700450 while (timeout > 0) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700451 if (Debug.isDebuggerConnected()) {
452 debuggerWasConnected = 2;
453 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800454 try {
Michael Wright8fa56f62013-04-01 16:36:05 -0700455 wait(timeout);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800456 } catch (InterruptedException e) {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800457 Log.wtf(TAG, e);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800458 }
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700459 if (Debug.isDebuggerConnected()) {
460 debuggerWasConnected = 2;
461 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700462 timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800463 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800464
Narayan Kamatha0a28082017-07-31 15:58:59 +0100465 boolean fdLimitTriggered = false;
466 if (mOpenFdMonitor != null) {
467 fdLimitTriggered = mOpenFdMonitor.monitor();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800468 }
Michael Wright8fa56f62013-04-01 16:36:05 -0700469
Narayan Kamatha0a28082017-07-31 15:58:59 +0100470 if (!fdLimitTriggered) {
471 final int waitState = evaluateCheckerCompletionLocked();
472 if (waitState == COMPLETED) {
473 // The monitors have returned; reset
474 waitedHalf = false;
475 continue;
476 } else if (waitState == WAITING) {
477 // still waiting but within their configured intervals; back off and recheck
478 continue;
479 } else if (waitState == WAITED_HALF) {
480 if (!waitedHalf) {
481 // We've waited half the deadlock-detection interval. Pull a stack
482 // trace and wait another half.
483 ArrayList<Integer> pids = new ArrayList<Integer>();
484 pids.add(Process.myPid());
485 ActivityManagerService.dumpStackTraces(true, pids, null, null,
486 getInterestingNativePids());
487 waitedHalf = true;
488 }
489 continue;
490 }
491
492 // something is overdue!
493 blockedCheckers = getBlockedCheckersLocked();
494 subject = describeCheckersLocked(blockedCheckers);
495 } else {
496 blockedCheckers = Collections.emptyList();
497 subject = "Open FD high water mark reached";
498 }
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700499 allowRestart = mAllowRestart;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800500 }
501
502 // If we got here, that means that the system is most likely hung.
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700503 // First collect stack traces from all threads of the system process.
504 // Then kill this process so that the system will restart.
Jeff Brown7dd2d192013-09-06 15:05:23 -0700505 EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800506
Steven Moreland6b47c542017-03-21 12:52:16 -0700507 ArrayList<Integer> pids = new ArrayList<>();
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800508 pids.add(Process.myPid());
Dan Egnor4bded072010-03-11 22:00:47 -0800509 if (mPhonePid > 0) pids.add(mPhonePid);
Christopher Tate6ee412d2010-05-28 12:01:56 -0700510 // Pass !waitedHalf so that just in case we somehow wind up here without having
511 // dumped the halfway stacks, we properly re-initialize the trace file.
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800512 final File stack = ActivityManagerService.dumpStackTraces(
Steven Moreland6b47c542017-03-21 12:52:16 -0700513 !waitedHalf, pids, null, null, getInterestingNativePids());
Dan Egnor4bded072010-03-11 22:00:47 -0800514
515 // Give some extra time to make sure the stack traces get written.
516 // The system's been hanging for a minute, another second or two won't hurt much.
517 SystemClock.sleep(2000);
518
Guang Zhu0620c452014-10-29 14:31:48 -0700519 // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
520 doSysRq('w');
521 doSysRq('l');
Colin Cross5df1d872012-11-29 11:42:11 -0800522
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800523 // Try to add the error to the dropbox, but assuming that the ActivityManager
524 // itself may be deadlocked. (which has happened, causing this statement to
525 // deadlock and the watchdog as a whole to be ineffective)
526 Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
527 public void run() {
528 mActivity.addErrorToDropBox(
Jeff Sharkeya353d262011-10-28 11:12:06 -0700529 "watchdog", null, "system_server", null, null,
Jeff Brown7dd2d192013-09-06 15:05:23 -0700530 subject, null, stack, null);
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800531 }
532 };
533 dropboxThread.start();
534 try {
535 dropboxThread.join(2000); // wait up to 2 seconds for it to return.
536 } catch (InterruptedException ignored) {}
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800537
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700538 IActivityController controller;
539 synchronized (this) {
540 controller = mController;
541 }
542 if (controller != null) {
543 Slog.i(TAG, "Reporting stuck state to activity controller");
544 try {
545 Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
546 // 1 = keep waiting, -1 = kill system
Jeff Brown7dd2d192013-09-06 15:05:23 -0700547 int res = controller.systemNotResponding(subject);
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700548 if (res >= 0) {
549 Slog.i(TAG, "Activity controller requested to coninue to wait");
550 waitedHalf = false;
551 continue;
552 }
553 } catch (RemoteException e) {
554 }
555 }
556
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700557 // Only kill the process if the debugger is not attached.
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700558 if (Debug.isDebuggerConnected()) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700559 debuggerWasConnected = 2;
560 }
561 if (debuggerWasConnected >= 2) {
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700562 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700563 } else if (debuggerWasConnected > 0) {
564 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700565 } else if (!allowRestart) {
566 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
567 } else {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700568 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
Andreas Gampe544416e2018-01-26 11:39:46 -0800569 WatchdogDiagnostics.diagnoseCheckers(blockedCheckers);
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700570 Slog.w(TAG, "*** GOODBYE!");
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700571 Process.killProcess(Process.myPid());
572 System.exit(10);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800573 }
Christopher Tate6ee412d2010-05-28 12:01:56 -0700574
575 waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800576 }
577 }
Christopher Tateecaa7b42010-06-04 14:55:02 -0700578
Guang Zhu0620c452014-10-29 14:31:48 -0700579 private void doSysRq(char c) {
580 try {
581 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
582 sysrq_trigger.write(c);
583 sysrq_trigger.close();
584 } catch (IOException e) {
585 Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
586 }
587 }
588
Narayan Kamatha0a28082017-07-31 15:58:59 +0100589 public static final class OpenFdMonitor {
590 /**
591 * Number of FDs below the soft limit that we trigger a runtime restart at. This was
592 * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number
593 * of FDs in reserve to complete a dump.
594 */
595 private static final int FD_HIGH_WATER_MARK = 12;
596
597 private final File mDumpDir;
598 private final File mFdHighWaterMark;
599
600 public static OpenFdMonitor create() {
601 // Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
602 if (!Build.IS_DEBUGGABLE) {
603 return null;
604 }
605
Narayan Kamatha0a28082017-07-31 15:58:59 +0100606 final StructRlimit rlimit;
607 try {
608 rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE);
609 } catch (ErrnoException errno) {
610 Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno);
611 return null;
612 }
613
614 // The assumption we're making here is that FD numbers are allocated (more or less)
615 // sequentially, which is currently (and historically) true since open is currently
616 // specified to always return the lowest-numbered non-open file descriptor for the
617 // current process.
618 //
619 // We do this to avoid having to enumerate the contents of /proc/self/fd in order to
620 // count the number of descriptors open in the process.
621 final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK));
Elliott Hughes4e4caa72018-03-23 11:06:36 -0700622 return new OpenFdMonitor(new File("/data/anr"), fdThreshold);
Narayan Kamatha0a28082017-07-31 15:58:59 +0100623 }
624
625 OpenFdMonitor(File dumpDir, File fdThreshold) {
626 mDumpDir = dumpDir;
627 mFdHighWaterMark = fdThreshold;
628 }
629
Nandana Dutt6647ef52018-07-12 17:02:57 +0100630 /**
631 * Dumps open file descriptors and their full paths to a temporary file in {@code mDumpDir}.
632 */
Narayan Kamatha0a28082017-07-31 15:58:59 +0100633 private void dumpOpenDescriptors() {
Nandana Dutt6647ef52018-07-12 17:02:57 +0100634 // We cannot exec lsof to get more info about open file descriptors because a newly
635 // forked process will not have the permissions to readlink. Instead list all open
636 // descriptors from /proc/pid/fd and resolve them.
637 List<String> dumpInfo = new ArrayList<>();
638 String fdDirPath = String.format("/proc/%d/fd/", Process.myPid());
639 File[] fds = new File(fdDirPath).listFiles();
640 if (fds == null) {
641 dumpInfo.add("Unable to list " + fdDirPath);
642 } else {
643 for (File f : fds) {
644 String fdSymLink = f.getAbsolutePath();
645 String resolvedPath = "";
646 try {
647 resolvedPath = Os.readlink(fdSymLink);
648 } catch (ErrnoException ex) {
649 resolvedPath = ex.getMessage();
650 }
651 dumpInfo.add(fdSymLink + "\t" + resolvedPath);
652 }
653 }
654
655 // Dump the fds & paths to a temp file.
Narayan Kamatha0a28082017-07-31 15:58:59 +0100656 try {
657 File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir);
Nandana Dutt6647ef52018-07-12 17:02:57 +0100658 Path out = Paths.get(dumpFile.getAbsolutePath());
659 Files.write(out, dumpInfo, StandardCharsets.UTF_8);
660 } catch (IOException ex) {
661 Slog.w(TAG, "Unable to write open descriptors to file: " + ex);
Narayan Kamatha0a28082017-07-31 15:58:59 +0100662 }
663 }
664
665 /**
666 * @return {@code true} if the high water mark was breached and a dump was written,
667 * {@code false} otherwise.
668 */
669 public boolean monitor() {
670 if (mFdHighWaterMark.exists()) {
671 dumpOpenDescriptors();
672 return true;
673 }
674
675 return false;
676 }
677 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800678}