blob: 8d46d1e272354d48ad6e41098f3b55450efc4fdb [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070019import android.app.IActivityController;
20import android.os.Binder;
Narayan Kamatha0a28082017-07-31 15:58:59 +010021import android.os.Build;
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070022import android.os.RemoteException;
Narayan Kamatha0a28082017-07-31 15:58:59 +010023import android.system.ErrnoException;
24import android.system.OsConstants;
25import android.system.StructRlimit;
Andreas Gampe032a9292017-07-21 11:41:00 -070026import com.android.internal.os.ZygoteConnectionConstants;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080027import com.android.server.am.ActivityManagerService;
28
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080029import android.content.BroadcastReceiver;
30import android.content.ContentResolver;
31import android.content.Context;
32import android.content.Intent;
33import android.content.IntentFilter;
Steven Moreland6b47c542017-03-21 12:52:16 -070034import android.hidl.manager.V1_0.IServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080035import android.os.Debug;
36import android.os.Handler;
Jeff Brown6f357d32014-01-15 20:40:55 -080037import android.os.IPowerManager;
John Michelau11641522013-03-18 18:28:23 -050038import android.os.Looper;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080039import android.os.Process;
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -080040import android.os.ServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080041import android.os.SystemClock;
42import android.os.SystemProperties;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080043import android.util.EventLog;
Dan Egnor9bdc94b2010-03-04 14:20:31 -080044import android.util.Log;
Joe Onorato8a9b2202010-02-26 18:56:32 -080045import android.util.Slog;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080046
Dan Egnor9bdc94b2010-03-04 14:20:31 -080047import java.io.File;
Colin Cross5df1d872012-11-29 11:42:11 -080048import java.io.FileWriter;
49import java.io.IOException;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080050import java.util.ArrayList;
Steven Moreland6b47c542017-03-21 12:52:16 -070051import java.util.Arrays;
Narayan Kamatha0a28082017-07-31 15:58:59 +010052import java.util.Collections;
Steven Moreland6b47c542017-03-21 12:52:16 -070053import java.util.HashSet;
54import java.util.List;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080055
56/** This class calls its monitor every minute. Killing this process if they don't return **/
57public class Watchdog extends Thread {
58 static final String TAG = "Watchdog";
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080059
60 // Set this to true to use debug default values.
61 static final boolean DB = false;
62
Christopher Tateecaa7b42010-06-04 14:55:02 -070063 // Set this to true to have the watchdog record kernel thread stacks when it fires
64 static final boolean RECORD_KERNEL_THREADS = true;
65
Andreas Gampe032a9292017-07-21 11:41:00 -070066 // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with
67 // timeout in com.android.internal.os.ZygoteConnection, or wrapped applications
68 // can trigger the watchdog.
69 // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped
70 // applications may not work with a debug build. CTS will fail.
Christopher Tatee6f81cf2013-10-23 17:28:27 -070071 static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
72 static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
73
74 // These are temporally ordered: larger values as lateness increases
75 static final int COMPLETED = 0;
76 static final int WAITING = 1;
77 static final int WAITED_HALF = 2;
78 static final int OVERDUE = 3;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080079
Igor Murashkin44d04aa2013-10-23 10:56:02 -070080 // Which native processes to dump into dropbox's stack traces
81 public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
Andy Hung3a64ecb2016-03-09 13:55:58 -080082 "/system/bin/audioserver",
Andy Hung689574a2016-04-13 19:27:43 -070083 "/system/bin/cameraserver",
84 "/system/bin/drmserver",
85 "/system/bin/mediadrmserver",
Dianne Hackbornf72467a2012-06-08 17:23:59 -070086 "/system/bin/mediaserver",
87 "/system/bin/sdcard",
Eric Laurent05d4e352016-03-14 18:49:08 -070088 "/system/bin/surfaceflinger",
Andy Hung689574a2016-04-13 19:27:43 -070089 "media.extractor", // system/bin/mediaextractor
Chong Zhange55e6e02017-06-02 10:52:04 -070090 "media.codec", // vendor/bin/hw/android.hardware.media.omx@1.0-service
Andreas Gampecf9e79b2016-05-11 18:41:25 -070091 "com.android.bluetooth", // Bluetooth service
Dianne Hackbornf72467a2012-06-08 17:23:59 -070092 };
93
Steven Moreland6b47c542017-03-21 12:52:16 -070094 public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList(
95 "android.hardware.audio@2.0::IDevicesFactory",
96 "android.hardware.bluetooth@1.0::IBluetoothHci",
97 "android.hardware.camera.provider@2.4::ICameraProvider",
Chia-I Wu74debcd2017-04-21 11:14:22 -070098 "android.hardware.graphics.composer@2.1::IComposer",
Peng Xu102122fb2017-07-11 21:12:11 -070099 "android.hardware.media.omx@1.0::IOmx",
100 "android.hardware.sensors@1.0::ISensors",
101 "android.hardware.vr@1.0::IVr"
Steven Moreland6b47c542017-03-21 12:52:16 -0700102 );
103
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800104 static Watchdog sWatchdog;
105
106 /* This handler will be used to post message back onto the main thread */
Wale Ogunwaled7fdd022015-04-13 16:22:38 -0700107 final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700108 final HandlerChecker mMonitorChecker;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800109 ContentResolver mResolver;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800110 ActivityManagerService mActivity;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800111
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800112 int mPhonePid;
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700113 IActivityController mController;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700114 boolean mAllowRestart = true;
Narayan Kamatha0a28082017-07-31 15:58:59 +0100115 final OpenFdMonitor mOpenFdMonitor;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800116
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800117 /**
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700118 * Used for checking status of handle threads and scheduling monitor callbacks.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800119 */
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700120 public final class HandlerChecker implements Runnable {
121 private final Handler mHandler;
122 private final String mName;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700123 private final long mWaitMax;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700124 private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700125 private boolean mCompleted;
126 private Monitor mCurrentMonitor;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700127 private long mStartTime;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700128
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700129 HandlerChecker(Handler handler, String name, long waitMaxMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700130 mHandler = handler;
131 mName = name;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700132 mWaitMax = waitMaxMillis;
133 mCompleted = true;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700134 }
135
136 public void addMonitor(Monitor monitor) {
137 mMonitors.add(monitor);
138 }
139
140 public void scheduleCheckLocked() {
Jeff Brown6c7b41a2015-02-26 14:43:53 -0800141 if (mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) {
142 // If the target looper has recently been polling, then
Dianne Hackbornefa92b22013-05-03 14:11:43 -0700143 // there is no reason to enqueue our checker on it since that
144 // is as good as it not being deadlocked. This avoid having
145 // to do a context switch to check the thread. Note that we
146 // only do this if mCheckReboot is false and we have no
147 // monitors, since those would need to be executed at this point.
148 mCompleted = true;
149 return;
150 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700151
152 if (!mCompleted) {
153 // we already have a check in flight, so no need
154 return;
155 }
156
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700157 mCompleted = false;
158 mCurrentMonitor = null;
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700159 mStartTime = SystemClock.uptimeMillis();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700160 mHandler.postAtFrontOfQueue(this);
161 }
162
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700163 public boolean isOverdueLocked() {
164 return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
165 }
166
167 public int getCompletionStateLocked() {
168 if (mCompleted) {
169 return COMPLETED;
170 } else {
171 long latency = SystemClock.uptimeMillis() - mStartTime;
172 if (latency < mWaitMax/2) {
173 return WAITING;
174 } else if (latency < mWaitMax) {
175 return WAITED_HALF;
176 }
177 }
178 return OVERDUE;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700179 }
180
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700181 public Thread getThread() {
182 return mHandler.getLooper().getThread();
183 }
184
185 public String getName() {
186 return mName;
187 }
188
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700189 public String describeBlockedStateLocked() {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700190 if (mCurrentMonitor == null) {
191 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
192 } else {
193 return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
194 + " on " + mName + " (" + getThread().getName() + ")";
195 }
John Michelau11641522013-03-18 18:28:23 -0500196 }
197
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800198 @Override
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700199 public void run() {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700200 final int size = mMonitors.size();
201 for (int i = 0 ; i < size ; i++) {
202 synchronized (Watchdog.this) {
203 mCurrentMonitor = mMonitors.get(i);
204 }
205 mCurrentMonitor.monitor();
206 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800207
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700208 synchronized (Watchdog.this) {
209 mCompleted = true;
210 mCurrentMonitor = null;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800211 }
212 }
213 }
214
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800215 final class RebootRequestReceiver extends BroadcastReceiver {
216 @Override
217 public void onReceive(Context c, Intent intent) {
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700218 if (intent.getIntExtra("nowait", 0) != 0) {
219 rebootSystem("Received ACTION_REBOOT broadcast");
220 return;
221 }
222 Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800223 }
224 }
225
Wale Ogunwale517daec2015-04-15 10:27:24 -0700226 /** Monitor for checking the availability of binder threads. The monitor will block until
227 * there is a binder thread available to process in coming IPCs to make sure other processes
228 * can still communicate with the service.
229 */
230 private static final class BinderThreadMonitor implements Watchdog.Monitor {
231 @Override
232 public void monitor() {
233 Binder.blockUntilThreadAvailable();
234 }
235 }
236
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800237 public interface Monitor {
238 void monitor();
239 }
240
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800241 public static Watchdog getInstance() {
242 if (sWatchdog == null) {
243 sWatchdog = new Watchdog();
244 }
245
246 return sWatchdog;
247 }
248
249 private Watchdog() {
250 super("watchdog");
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700251 // Initialize handler checkers for each common thread we want to check. Note
252 // that we are not currently checking the background thread, since it can
253 // potentially hold longer running operations with no guarantees about the timeliness
254 // of operations there.
255
256 // The shared foreground thread is the main checker. It is where we
257 // will also dispatch monitor checks and do other work.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700258 mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
259 "foreground thread", DEFAULT_TIMEOUT);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700260 mHandlerCheckers.add(mMonitorChecker);
261 // Add checker for main thread. We only do a quick check since there
262 // can be UI running on the thread.
263 mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700264 "main thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700265 // Add checker for shared UI thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700266 mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
267 "ui thread", DEFAULT_TIMEOUT));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700268 // And also check IO thread.
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700269 mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
270 "i/o thread", DEFAULT_TIMEOUT));
Jeff Brown4ccb8232014-01-16 22:16:42 -0800271 // And the display thread.
272 mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(),
273 "display thread", DEFAULT_TIMEOUT));
Wale Ogunwale517daec2015-04-15 10:27:24 -0700274
275 // Initialize monitor for Binder threads.
276 addMonitor(new BinderThreadMonitor());
Andreas Gampe032a9292017-07-21 11:41:00 -0700277
Narayan Kamatha0a28082017-07-31 15:58:59 +0100278 mOpenFdMonitor = OpenFdMonitor.create();
279
Andreas Gampe032a9292017-07-21 11:41:00 -0700280 // See the notes on DEFAULT_TIMEOUT.
281 assert DB ||
282 DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800283 }
284
Adam Lesinski182f73f2013-12-05 16:48:06 -0800285 public void init(Context context, ActivityManagerService activity) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800286 mResolver = context.getContentResolver();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800287 mActivity = activity;
288
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800289 context.registerReceiver(new RebootRequestReceiver(),
290 new IntentFilter(Intent.ACTION_REBOOT),
291 android.Manifest.permission.REBOOT, null);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800292 }
293
Christopher Tatec27181c2010-06-30 14:41:09 -0700294 public void processStarted(String name, int pid) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800295 synchronized (this) {
296 if ("com.android.phone".equals(name)) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800297 mPhonePid = pid;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800298 }
299 }
300 }
301
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700302 public void setActivityController(IActivityController controller) {
303 synchronized (this) {
304 mController = controller;
305 }
306 }
307
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700308 public void setAllowRestart(boolean allowRestart) {
309 synchronized (this) {
310 mAllowRestart = allowRestart;
311 }
312 }
313
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800314 public void addMonitor(Monitor monitor) {
315 synchronized (this) {
316 if (isAlive()) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700317 throw new RuntimeException("Monitors can't be added once the Watchdog is running");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800318 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700319 mMonitorChecker.addMonitor(monitor);
320 }
321 }
322
Jeff Brown6f357d32014-01-15 20:40:55 -0800323 public void addThread(Handler thread) {
324 addThread(thread, DEFAULT_TIMEOUT);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700325 }
326
Jeff Brown6f357d32014-01-15 20:40:55 -0800327 public void addThread(Handler thread, long timeoutMillis) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700328 synchronized (this) {
329 if (isAlive()) {
330 throw new RuntimeException("Threads can't be added once the Watchdog is running");
331 }
Jeff Brown6f357d32014-01-15 20:40:55 -0800332 final String name = thread.getLooper().getThread().getName();
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700333 mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800334 }
335 }
336
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800337 /**
338 * Perform a full reboot of the system.
339 */
340 void rebootSystem(String reason) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800341 Slog.i(TAG, "Rebooting system because: " + reason);
Jeff Brown6f357d32014-01-15 20:40:55 -0800342 IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE);
343 try {
344 pms.reboot(false, reason, false);
345 } catch (RemoteException ex) {
346 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800347 }
348
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700349 private int evaluateCheckerCompletionLocked() {
350 int state = COMPLETED;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700351 for (int i=0; i<mHandlerCheckers.size(); i++) {
352 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700353 state = Math.max(state, hc.getCompletionStateLocked());
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700354 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700355 return state;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700356 }
357
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700358 private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
359 ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700360 for (int i=0; i<mHandlerCheckers.size(); i++) {
361 HandlerChecker hc = mHandlerCheckers.get(i);
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700362 if (hc.isOverdueLocked()) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700363 checkers.add(hc);
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700364 }
365 }
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700366 return checkers;
367 }
368
Narayan Kamatha0a28082017-07-31 15:58:59 +0100369 private String describeCheckersLocked(List<HandlerChecker> checkers) {
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700370 StringBuilder builder = new StringBuilder(128);
371 for (int i=0; i<checkers.size(); i++) {
372 if (builder.length() > 0) {
373 builder.append(", ");
374 }
375 builder.append(checkers.get(i).describeBlockedStateLocked());
376 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700377 return builder.toString();
378 }
379
Steven Moreland6b47c542017-03-21 12:52:16 -0700380 private ArrayList<Integer> getInterestingHalPids() {
381 try {
382 IServiceManager serviceManager = IServiceManager.getService();
383 ArrayList<IServiceManager.InstanceDebugInfo> dump =
384 serviceManager.debugDump();
385 HashSet<Integer> pids = new HashSet<>();
386 for (IServiceManager.InstanceDebugInfo info : dump) {
387 if (info.pid == IServiceManager.PidConstant.NO_PID) {
388 continue;
389 }
390
391 if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
392 continue;
393 }
394
395 pids.add(info.pid);
396 }
397 return new ArrayList<Integer>(pids);
398 } catch (RemoteException e) {
399 return new ArrayList<Integer>();
400 }
401 }
402
403 private ArrayList<Integer> getInterestingNativePids() {
404 ArrayList<Integer> pids = getInterestingHalPids();
405
406 int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
407 if (nativePids != null) {
408 pids.ensureCapacity(pids.size() + nativePids.length);
409 for (int i : nativePids) {
410 pids.add(i);
411 }
412 }
413
414 return pids;
415 }
416
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800417 @Override
418 public void run() {
Christopher Tate6ee412d2010-05-28 12:01:56 -0700419 boolean waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800420 while (true) {
Narayan Kamatha0a28082017-07-31 15:58:59 +0100421 final List<HandlerChecker> blockedCheckers;
Jeff Brown7dd2d192013-09-06 15:05:23 -0700422 final String subject;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700423 final boolean allowRestart;
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700424 int debuggerWasConnected = 0;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800425 synchronized (this) {
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700426 long timeout = CHECK_INTERVAL;
427 // Make sure we (re)spin the checkers that have become idle within
428 // this wait-and-check interval
429 for (int i=0; i<mHandlerCheckers.size(); i++) {
430 HandlerChecker hc = mHandlerCheckers.get(i);
431 hc.scheduleCheckLocked();
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700432 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800433
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700434 if (debuggerWasConnected > 0) {
435 debuggerWasConnected--;
436 }
437
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800438 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
439 // wait while asleep. If the device is asleep then the thing that we are waiting
Christopher Tate6ee412d2010-05-28 12:01:56 -0700440 // to timeout on is asleep as well and won't have a chance to run, causing a false
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800441 // positive on when to kill things.
442 long start = SystemClock.uptimeMillis();
Michael Wright8fa56f62013-04-01 16:36:05 -0700443 while (timeout > 0) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700444 if (Debug.isDebuggerConnected()) {
445 debuggerWasConnected = 2;
446 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800447 try {
Michael Wright8fa56f62013-04-01 16:36:05 -0700448 wait(timeout);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800449 } catch (InterruptedException e) {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800450 Log.wtf(TAG, e);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800451 }
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700452 if (Debug.isDebuggerConnected()) {
453 debuggerWasConnected = 2;
454 }
Christopher Tatee6f81cf2013-10-23 17:28:27 -0700455 timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800456 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800457
Narayan Kamatha0a28082017-07-31 15:58:59 +0100458 boolean fdLimitTriggered = false;
459 if (mOpenFdMonitor != null) {
460 fdLimitTriggered = mOpenFdMonitor.monitor();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800461 }
Michael Wright8fa56f62013-04-01 16:36:05 -0700462
Narayan Kamatha0a28082017-07-31 15:58:59 +0100463 if (!fdLimitTriggered) {
464 final int waitState = evaluateCheckerCompletionLocked();
465 if (waitState == COMPLETED) {
466 // The monitors have returned; reset
467 waitedHalf = false;
468 continue;
469 } else if (waitState == WAITING) {
470 // still waiting but within their configured intervals; back off and recheck
471 continue;
472 } else if (waitState == WAITED_HALF) {
473 if (!waitedHalf) {
474 // We've waited half the deadlock-detection interval. Pull a stack
475 // trace and wait another half.
476 ArrayList<Integer> pids = new ArrayList<Integer>();
477 pids.add(Process.myPid());
478 ActivityManagerService.dumpStackTraces(true, pids, null, null,
479 getInterestingNativePids());
480 waitedHalf = true;
481 }
482 continue;
483 }
484
485 // something is overdue!
486 blockedCheckers = getBlockedCheckersLocked();
487 subject = describeCheckersLocked(blockedCheckers);
488 } else {
489 blockedCheckers = Collections.emptyList();
490 subject = "Open FD high water mark reached";
491 }
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700492 allowRestart = mAllowRestart;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800493 }
494
495 // If we got here, that means that the system is most likely hung.
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700496 // First collect stack traces from all threads of the system process.
497 // Then kill this process so that the system will restart.
Jeff Brown7dd2d192013-09-06 15:05:23 -0700498 EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800499
Steven Moreland6b47c542017-03-21 12:52:16 -0700500 ArrayList<Integer> pids = new ArrayList<>();
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800501 pids.add(Process.myPid());
Dan Egnor4bded072010-03-11 22:00:47 -0800502 if (mPhonePid > 0) pids.add(mPhonePid);
Christopher Tate6ee412d2010-05-28 12:01:56 -0700503 // Pass !waitedHalf so that just in case we somehow wind up here without having
504 // dumped the halfway stacks, we properly re-initialize the trace file.
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800505 final File stack = ActivityManagerService.dumpStackTraces(
Steven Moreland6b47c542017-03-21 12:52:16 -0700506 !waitedHalf, pids, null, null, getInterestingNativePids());
Dan Egnor4bded072010-03-11 22:00:47 -0800507
508 // Give some extra time to make sure the stack traces get written.
509 // The system's been hanging for a minute, another second or two won't hurt much.
510 SystemClock.sleep(2000);
511
Christopher Tateecaa7b42010-06-04 14:55:02 -0700512 // Pull our own kernel thread stacks as well if we're configured for that
513 if (RECORD_KERNEL_THREADS) {
514 dumpKernelStackTraces();
515 }
516
Guang Zhu0620c452014-10-29 14:31:48 -0700517 // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
518 doSysRq('w');
519 doSysRq('l');
Colin Cross5df1d872012-11-29 11:42:11 -0800520
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800521 // Try to add the error to the dropbox, but assuming that the ActivityManager
522 // itself may be deadlocked. (which has happened, causing this statement to
523 // deadlock and the watchdog as a whole to be ineffective)
524 Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
525 public void run() {
526 mActivity.addErrorToDropBox(
Jeff Sharkeya353d262011-10-28 11:12:06 -0700527 "watchdog", null, "system_server", null, null,
Jeff Brown7dd2d192013-09-06 15:05:23 -0700528 subject, null, stack, null);
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800529 }
530 };
531 dropboxThread.start();
532 try {
533 dropboxThread.join(2000); // wait up to 2 seconds for it to return.
534 } catch (InterruptedException ignored) {}
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800535
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700536 IActivityController controller;
537 synchronized (this) {
538 controller = mController;
539 }
540 if (controller != null) {
541 Slog.i(TAG, "Reporting stuck state to activity controller");
542 try {
543 Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
544 // 1 = keep waiting, -1 = kill system
Jeff Brown7dd2d192013-09-06 15:05:23 -0700545 int res = controller.systemNotResponding(subject);
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700546 if (res >= 0) {
547 Slog.i(TAG, "Activity controller requested to coninue to wait");
548 waitedHalf = false;
549 continue;
550 }
551 } catch (RemoteException e) {
552 }
553 }
554
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700555 // Only kill the process if the debugger is not attached.
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700556 if (Debug.isDebuggerConnected()) {
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700557 debuggerWasConnected = 2;
558 }
559 if (debuggerWasConnected >= 2) {
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700560 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
Dianne Hackborn89ad4562014-08-24 16:45:38 -0700561 } else if (debuggerWasConnected > 0) {
562 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700563 } else if (!allowRestart) {
564 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
565 } else {
Jeff Brown7dd2d192013-09-06 15:05:23 -0700566 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700567 for (int i=0; i<blockedCheckers.size(); i++) {
568 Slog.w(TAG, blockedCheckers.get(i).getName() + " stack trace:");
569 StackTraceElement[] stackTrace
570 = blockedCheckers.get(i).getThread().getStackTrace();
571 for (StackTraceElement element: stackTrace) {
572 Slog.w(TAG, " at " + element);
573 }
Michael Wright56a6c662013-04-30 20:13:07 -0700574 }
Dianne Hackbornfa012b32013-05-10 15:23:28 -0700575 Slog.w(TAG, "*** GOODBYE!");
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700576 Process.killProcess(Process.myPid());
577 System.exit(10);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800578 }
Christopher Tate6ee412d2010-05-28 12:01:56 -0700579
580 waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800581 }
582 }
Christopher Tateecaa7b42010-06-04 14:55:02 -0700583
Guang Zhu0620c452014-10-29 14:31:48 -0700584 private void doSysRq(char c) {
585 try {
586 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
587 sysrq_trigger.write(c);
588 sysrq_trigger.close();
589 } catch (IOException e) {
590 Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
591 }
592 }
593
Christopher Tateecaa7b42010-06-04 14:55:02 -0700594 private File dumpKernelStackTraces() {
595 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
596 if (tracesPath == null || tracesPath.length() == 0) {
597 return null;
598 }
599
600 native_dumpKernelStacks(tracesPath);
601 return new File(tracesPath);
602 }
603
604 private native void native_dumpKernelStacks(String tracesPath);
Narayan Kamatha0a28082017-07-31 15:58:59 +0100605
606 public static final class OpenFdMonitor {
607 /**
608 * Number of FDs below the soft limit that we trigger a runtime restart at. This was
609 * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number
610 * of FDs in reserve to complete a dump.
611 */
612 private static final int FD_HIGH_WATER_MARK = 12;
613
614 private final File mDumpDir;
615 private final File mFdHighWaterMark;
616
617 public static OpenFdMonitor create() {
618 // Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
619 if (!Build.IS_DEBUGGABLE) {
620 return null;
621 }
622
623 // Don't run the FD monitor on builds that have a global ANR trace file. We're using
624 // the ANR trace directory as a quick hack in order to get these traces in bugreports
625 // and we wouldn't want to overwrite something important.
626 final String dumpDirStr = SystemProperties.get("dalvik.vm.stack-trace-dir", "");
627 if (dumpDirStr.isEmpty()) {
628 return null;
629 }
630
631 final StructRlimit rlimit;
632 try {
633 rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE);
634 } catch (ErrnoException errno) {
635 Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno);
636 return null;
637 }
638
639 // The assumption we're making here is that FD numbers are allocated (more or less)
640 // sequentially, which is currently (and historically) true since open is currently
641 // specified to always return the lowest-numbered non-open file descriptor for the
642 // current process.
643 //
644 // We do this to avoid having to enumerate the contents of /proc/self/fd in order to
645 // count the number of descriptors open in the process.
646 final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK));
647 return new OpenFdMonitor(new File(dumpDirStr), fdThreshold);
648 }
649
650 OpenFdMonitor(File dumpDir, File fdThreshold) {
651 mDumpDir = dumpDir;
652 mFdHighWaterMark = fdThreshold;
653 }
654
655 private void dumpOpenDescriptors() {
656 try {
657 File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir);
658 java.lang.Process proc = new ProcessBuilder()
659 .command("/system/bin/lsof", "-p", String.valueOf(Process.myPid()))
660 .redirectErrorStream(true)
661 .redirectOutput(dumpFile)
662 .start();
663
664 int returnCode = proc.waitFor();
665 if (returnCode != 0) {
666 Slog.w(TAG, "Unable to dump open descriptors, lsof return code: "
667 + returnCode);
668 dumpFile.delete();
669 }
670 } catch (IOException | InterruptedException ex) {
671 Slog.w(TAG, "Unable to dump open descriptors: " + ex);
672 }
673 }
674
675 /**
676 * @return {@code true} if the high water mark was breached and a dump was written,
677 * {@code false} otherwise.
678 */
679 public boolean monitor() {
680 if (mFdHighWaterMark.exists()) {
681 dumpOpenDescriptors();
682 return true;
683 }
684
685 return false;
686 }
687 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800688}