blob: 070b8e59d09d537f0cd65572a6d119144d5b89d0 [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070019import android.app.IActivityController;
20import android.os.Binder;
21import android.os.RemoteException;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080022import com.android.server.am.ActivityManagerService;
Jeff Brown4f8ecd82012-06-18 18:29:13 -070023import com.android.server.power.PowerManagerService;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080024
25import android.app.AlarmManager;
26import android.app.PendingIntent;
27import android.content.BroadcastReceiver;
28import android.content.ContentResolver;
29import android.content.Context;
30import android.content.Intent;
31import android.content.IntentFilter;
Jeff Browna4d82042012-10-02 19:11:19 -070032import android.os.BatteryManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080033import android.os.Debug;
34import android.os.Handler;
John Michelau11641522013-03-18 18:28:23 -050035import android.os.Looper;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080036import android.os.Process;
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -080037import android.os.ServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080038import android.os.SystemClock;
39import android.os.SystemProperties;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080040import android.util.EventLog;
Dan Egnor9bdc94b2010-03-04 14:20:31 -080041import android.util.Log;
Joe Onorato8a9b2202010-02-26 18:56:32 -080042import android.util.Slog;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080043
Dan Egnor9bdc94b2010-03-04 14:20:31 -080044import java.io.File;
Colin Cross5df1d872012-11-29 11:42:11 -080045import java.io.FileWriter;
46import java.io.IOException;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080047import java.util.ArrayList;
48import java.util.Calendar;
49
50/** This class calls its monitor every minute. Killing this process if they don't return **/
51public class Watchdog extends Thread {
52 static final String TAG = "Watchdog";
Joe Onorato43a17652011-04-06 19:22:23 -070053 static final boolean localLOGV = false || false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080054
55 // Set this to true to use debug default values.
56 static final boolean DB = false;
57
Christopher Tateecaa7b42010-06-04 14:55:02 -070058 // Set this to true to have the watchdog record kernel thread stacks when it fires
59 static final boolean RECORD_KERNEL_THREADS = true;
60
Dianne Hackbornf6438b12013-05-09 18:53:48 -070061 static final int TIME_TO_WAIT = DB ? 5*1000 : 30*1000;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080062
Dianne Hackbornf72467a2012-06-08 17:23:59 -070063 static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
64 "/system/bin/mediaserver",
65 "/system/bin/sdcard",
66 "/system/bin/surfaceflinger"
67 };
68
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080069 static Watchdog sWatchdog;
70
71 /* This handler will be used to post message back onto the main thread */
Dianne Hackborn8d044e82013-04-30 17:24:15 -070072 final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<HandlerChecker>();
73 final HandlerChecker mMonitorChecker;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080074 ContentResolver mResolver;
75 BatteryService mBattery;
76 PowerManagerService mPower;
77 AlarmManagerService mAlarm;
78 ActivityManagerService mActivity;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080079
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080080 int mPhonePid;
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -070081 IActivityController mController;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -070082 boolean mAllowRestart = true;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080083
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080084 /**
Dianne Hackborn8d044e82013-04-30 17:24:15 -070085 * Used for checking status of handle threads and scheduling monitor callbacks.
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080086 */
Dianne Hackborn8d044e82013-04-30 17:24:15 -070087 public final class HandlerChecker implements Runnable {
88 private final Handler mHandler;
89 private final String mName;
90 private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
Dianne Hackborn8d044e82013-04-30 17:24:15 -070091 private boolean mCompleted;
92 private Monitor mCurrentMonitor;
93
Dianne Hackbornf6438b12013-05-09 18:53:48 -070094 HandlerChecker(Handler handler, String name) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -070095 mHandler = handler;
96 mName = name;
Dianne Hackborn8d044e82013-04-30 17:24:15 -070097 }
98
99 public void addMonitor(Monitor monitor) {
100 mMonitors.add(monitor);
101 }
102
103 public void scheduleCheckLocked() {
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700104 if (mMonitors.size() == 0 && mHandler.getLooper().isIdling()) {
Dianne Hackbornefa92b22013-05-03 14:11:43 -0700105 // If the target looper is or just recently was idling, then
106 // there is no reason to enqueue our checker on it since that
107 // is as good as it not being deadlocked. This avoid having
108 // to do a context switch to check the thread. Note that we
109 // only do this if mCheckReboot is false and we have no
110 // monitors, since those would need to be executed at this point.
111 mCompleted = true;
112 return;
113 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700114 mCompleted = false;
115 mCurrentMonitor = null;
116 mHandler.postAtFrontOfQueue(this);
117 }
118
119 public boolean isCompletedLocked() {
120 return mCompleted;
121 }
122
123 public String describeBlockedStateLocked() {
124 return mCurrentMonitor == null ? mName : mCurrentMonitor.getClass().getName();
John Michelau11641522013-03-18 18:28:23 -0500125 }
126
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800127 @Override
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700128 public void run() {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700129 final int size = mMonitors.size();
130 for (int i = 0 ; i < size ; i++) {
131 synchronized (Watchdog.this) {
132 mCurrentMonitor = mMonitors.get(i);
133 }
134 mCurrentMonitor.monitor();
135 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800136
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700137 synchronized (Watchdog.this) {
138 mCompleted = true;
139 mCurrentMonitor = null;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800140 }
141 }
142 }
143
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800144 final class RebootRequestReceiver extends BroadcastReceiver {
145 @Override
146 public void onReceive(Context c, Intent intent) {
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700147 if (intent.getIntExtra("nowait", 0) != 0) {
148 rebootSystem("Received ACTION_REBOOT broadcast");
149 return;
150 }
151 Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800152 }
153 }
154
155 public interface Monitor {
156 void monitor();
157 }
158
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800159 public static Watchdog getInstance() {
160 if (sWatchdog == null) {
161 sWatchdog = new Watchdog();
162 }
163
164 return sWatchdog;
165 }
166
167 private Watchdog() {
168 super("watchdog");
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700169 // Initialize handler checkers for each common thread we want to check. Note
170 // that we are not currently checking the background thread, since it can
171 // potentially hold longer running operations with no guarantees about the timeliness
172 // of operations there.
173
174 // The shared foreground thread is the main checker. It is where we
175 // will also dispatch monitor checks and do other work.
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700176 mMonitorChecker = new HandlerChecker(FgThread.getHandler(), "foreground thread");
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700177 mHandlerCheckers.add(mMonitorChecker);
178 // Add checker for main thread. We only do a quick check since there
179 // can be UI running on the thread.
180 mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700181 "main thread"));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700182 // Add checker for shared UI thread.
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700183 mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(), "ui thread"));
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700184 // And also check IO thread.
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700185 mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(), "i/o thread"));
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800186 }
187
188 public void init(Context context, BatteryService battery,
189 PowerManagerService power, AlarmManagerService alarm,
190 ActivityManagerService activity) {
191 mResolver = context.getContentResolver();
192 mBattery = battery;
193 mPower = power;
194 mAlarm = alarm;
195 mActivity = activity;
196
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800197 context.registerReceiver(new RebootRequestReceiver(),
198 new IntentFilter(Intent.ACTION_REBOOT),
199 android.Manifest.permission.REBOOT, null);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800200 }
201
Christopher Tatec27181c2010-06-30 14:41:09 -0700202 public void processStarted(String name, int pid) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800203 synchronized (this) {
204 if ("com.android.phone".equals(name)) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800205 mPhonePid = pid;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800206 }
207 }
208 }
209
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700210 public void setActivityController(IActivityController controller) {
211 synchronized (this) {
212 mController = controller;
213 }
214 }
215
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700216 public void setAllowRestart(boolean allowRestart) {
217 synchronized (this) {
218 mAllowRestart = allowRestart;
219 }
220 }
221
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800222 public void addMonitor(Monitor monitor) {
223 synchronized (this) {
224 if (isAlive()) {
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700225 throw new RuntimeException("Monitors can't be added once the Watchdog is running");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800226 }
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700227 mMonitorChecker.addMonitor(monitor);
228 }
229 }
230
231 public void addThread(Handler thread, String name) {
232 synchronized (this) {
233 if (isAlive()) {
234 throw new RuntimeException("Threads can't be added once the Watchdog is running");
235 }
Dianne Hackbornf6438b12013-05-09 18:53:48 -0700236 mHandlerCheckers.add(new HandlerChecker(thread, name));
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800237 }
238 }
239
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800240 /**
241 * Perform a full reboot of the system.
242 */
243 void rebootSystem(String reason) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800244 Slog.i(TAG, "Rebooting system because: " + reason);
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -0800245 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
Dianne Hackbornc428aae2012-10-03 16:38:22 -0700246 pms.reboot(false, reason, false);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800247 }
248
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700249 private boolean haveAllCheckersCompletedLocked() {
250 for (int i=0; i<mHandlerCheckers.size(); i++) {
251 HandlerChecker hc = mHandlerCheckers.get(i);
252 if (!hc.isCompletedLocked()) {
253 return false;
254 }
255 }
256 return true;
257 }
258
259 private String describeBlockedCheckersLocked() {
260 StringBuilder builder = new StringBuilder(128);
261 for (int i=0; i<mHandlerCheckers.size(); i++) {
262 HandlerChecker hc = mHandlerCheckers.get(i);
263 if (!hc.isCompletedLocked()) {
264 if (builder.length() > 0) {
265 builder.append(", ");
266 }
267 builder.append(hc.describeBlockedStateLocked());
268 }
269 }
270 return builder.toString();
271 }
272
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800273 @Override
274 public void run() {
Christopher Tate6ee412d2010-05-28 12:01:56 -0700275 boolean waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800276 while (true) {
Michael Wright8fa56f62013-04-01 16:36:05 -0700277 final String name;
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700278 final boolean allowRestart;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800279 synchronized (this) {
280 long timeout = TIME_TO_WAIT;
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700281 if (!waitedHalf) {
282 // If we are not at the half-point of waiting, perform a
283 // new set of checks. Otherwise we are still waiting for a previous set.
284 for (int i=0; i<mHandlerCheckers.size(); i++) {
285 HandlerChecker hc = mHandlerCheckers.get(i);
286 hc.scheduleCheckLocked();
287 }
288 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800289
290 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
291 // wait while asleep. If the device is asleep then the thing that we are waiting
Christopher Tate6ee412d2010-05-28 12:01:56 -0700292 // to timeout on is asleep as well and won't have a chance to run, causing a false
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800293 // positive on when to kill things.
294 long start = SystemClock.uptimeMillis();
Michael Wright8fa56f62013-04-01 16:36:05 -0700295 while (timeout > 0) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800296 try {
Michael Wright8fa56f62013-04-01 16:36:05 -0700297 wait(timeout);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800298 } catch (InterruptedException e) {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800299 Log.wtf(TAG, e);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800300 }
301 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800302 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800303
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700304 if (haveAllCheckersCompletedLocked()) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800305 // The monitors have returned.
Christopher Tate6ee412d2010-05-28 12:01:56 -0700306 waitedHalf = false;
307 continue;
308 }
309
310 if (!waitedHalf) {
311 // We've waited half the deadlock-detection interval. Pull a stack
312 // trace and wait another half.
Dianne Hackborn6b1afeb2010-08-31 15:40:21 -0700313 ArrayList<Integer> pids = new ArrayList<Integer>();
Christopher Tate6ee412d2010-05-28 12:01:56 -0700314 pids.add(Process.myPid());
Dianne Hackbornf72467a2012-06-08 17:23:59 -0700315 ActivityManagerService.dumpStackTraces(true, pids, null, null,
316 NATIVE_STACKS_OF_INTEREST);
Christopher Tate6ee412d2010-05-28 12:01:56 -0700317 waitedHalf = true;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800318 continue;
319 }
Michael Wright8fa56f62013-04-01 16:36:05 -0700320
Dianne Hackborn8d044e82013-04-30 17:24:15 -0700321 name = describeBlockedCheckersLocked();
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700322 allowRestart = mAllowRestart;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800323 }
324
325 // If we got here, that means that the system is most likely hung.
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700326 // First collect stack traces from all threads of the system process.
327 // Then kill this process so that the system will restart.
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800328 EventLog.writeEvent(EventLogTags.WATCHDOG, name);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800329
Dianne Hackborn6b1afeb2010-08-31 15:40:21 -0700330 ArrayList<Integer> pids = new ArrayList<Integer>();
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800331 pids.add(Process.myPid());
Dan Egnor4bded072010-03-11 22:00:47 -0800332 if (mPhonePid > 0) pids.add(mPhonePid);
Christopher Tate6ee412d2010-05-28 12:01:56 -0700333 // Pass !waitedHalf so that just in case we somehow wind up here without having
334 // dumped the halfway stacks, we properly re-initialize the trace file.
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800335 final File stack = ActivityManagerService.dumpStackTraces(
Dianne Hackbornf72467a2012-06-08 17:23:59 -0700336 !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST);
Dan Egnor4bded072010-03-11 22:00:47 -0800337
338 // Give some extra time to make sure the stack traces get written.
339 // The system's been hanging for a minute, another second or two won't hurt much.
340 SystemClock.sleep(2000);
341
Christopher Tateecaa7b42010-06-04 14:55:02 -0700342 // Pull our own kernel thread stacks as well if we're configured for that
343 if (RECORD_KERNEL_THREADS) {
344 dumpKernelStackTraces();
345 }
346
Colin Cross5df1d872012-11-29 11:42:11 -0800347 // Trigger the kernel to dump all blocked threads to the kernel log
348 try {
349 FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
350 sysrq_trigger.write("w");
351 sysrq_trigger.close();
352 } catch (IOException e) {
353 Slog.e(TAG, "Failed to write to /proc/sysrq-trigger");
354 Slog.e(TAG, e.getMessage());
355 }
356
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800357 // Try to add the error to the dropbox, but assuming that the ActivityManager
358 // itself may be deadlocked. (which has happened, causing this statement to
359 // deadlock and the watchdog as a whole to be ineffective)
360 Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
361 public void run() {
362 mActivity.addErrorToDropBox(
Jeff Sharkeya353d262011-10-28 11:12:06 -0700363 "watchdog", null, "system_server", null, null,
364 name, null, stack, null);
Brad Fitzpatrick9765c722011-01-14 11:28:22 -0800365 }
366 };
367 dropboxThread.start();
368 try {
369 dropboxThread.join(2000); // wait up to 2 seconds for it to return.
370 } catch (InterruptedException ignored) {}
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800371
Dianne Hackborn5b88a2f2013-05-03 16:25:11 -0700372 IActivityController controller;
373 synchronized (this) {
374 controller = mController;
375 }
376 if (controller != null) {
377 Slog.i(TAG, "Reporting stuck state to activity controller");
378 try {
379 Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
380 // 1 = keep waiting, -1 = kill system
381 int res = controller.systemNotResponding(name);
382 if (res >= 0) {
383 Slog.i(TAG, "Activity controller requested to coninue to wait");
384 waitedHalf = false;
385 continue;
386 }
387 } catch (RemoteException e) {
388 }
389 }
390
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700391 // Only kill the process if the debugger is not attached.
Dianne Hackborn8bd64df2013-05-06 16:07:26 -0700392 if (Debug.isDebuggerConnected()) {
393 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
394 } else if (!allowRestart) {
395 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
396 } else {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800397 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
Michael Wright56a6c662013-04-30 20:13:07 -0700398 Slog.w(TAG, "Main thread stack trace:");
Dianne Hackborn98eb06a2013-05-02 19:50:00 -0700399 StackTraceElement[] stackTrace = Looper.getMainLooper().getThread().getStackTrace();
Michael Wright56a6c662013-04-30 20:13:07 -0700400 for (StackTraceElement element: stackTrace) {
401 Slog.w(TAG, "\tat " + element);
402 }
403 Slog.w(TAG, "<End of main thread stack trace>");
Jean-Baptiste Queru784827b2012-09-04 13:35:12 -0700404 Process.killProcess(Process.myPid());
405 System.exit(10);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800406 }
Christopher Tate6ee412d2010-05-28 12:01:56 -0700407
408 waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800409 }
410 }
Christopher Tateecaa7b42010-06-04 14:55:02 -0700411
412 private File dumpKernelStackTraces() {
413 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
414 if (tracesPath == null || tracesPath.length() == 0) {
415 return null;
416 }
417
418 native_dumpKernelStacks(tracesPath);
419 return new File(tracesPath);
420 }
421
422 private native void native_dumpKernelStacks(String tracesPath);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800423}