blob: d4133f309338b1a00019e53e2478d0ece91f5fa2 [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.server;
18
19import com.android.server.am.ActivityManagerService;
20
21import android.app.AlarmManager;
22import android.app.PendingIntent;
23import android.content.BroadcastReceiver;
24import android.content.ContentResolver;
25import android.content.Context;
26import android.content.Intent;
27import android.content.IntentFilter;
28import android.os.Debug;
29import android.os.Handler;
30import android.os.Message;
31import android.os.Process;
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -080032import android.os.ServiceManager;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080033import android.os.SystemClock;
34import android.os.SystemProperties;
35import android.provider.Settings;
36import android.util.Config;
37import android.util.EventLog;
Dan Egnor9bdc94b2010-03-04 14:20:31 -080038import android.util.Log;
Joe Onorato8a9b2202010-02-26 18:56:32 -080039import android.util.Slog;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080040
Dan Egnor9bdc94b2010-03-04 14:20:31 -080041import java.io.File;
Christopher Tateecaa7b42010-06-04 14:55:02 -070042import java.io.FileInputStream;
43import java.io.FileOutputStream;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080044import java.io.IOException;
45import java.util.ArrayList;
46import java.util.Calendar;
47
48/** This class calls its monitor every minute. Killing this process if they don't return **/
49public class Watchdog extends Thread {
50 static final String TAG = "Watchdog";
51 static final boolean localLOGV = false || Config.LOGV;
52
53 // Set this to true to use debug default values.
54 static final boolean DB = false;
55
Christopher Tateecaa7b42010-06-04 14:55:02 -070056 // Set this to true to have the watchdog record kernel thread stacks when it fires
57 static final boolean RECORD_KERNEL_THREADS = true;
58
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080059 static final int MONITOR = 2718;
60 static final int GLOBAL_PSS = 2719;
61
Christopher Tate6ee412d2010-05-28 12:01:56 -070062 static final int TIME_TO_RESTART = DB ? 15*1000 : 60*1000;
63 static final int TIME_TO_WAIT = TIME_TO_RESTART / 2;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080064
65 static final int MEMCHECK_DEFAULT_INTERVAL = DB ? 30 : 30*60; // 30 minutes
66 static final int MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL = DB ? 60 : 2*60*60; // 2 hours
67 static final int MEMCHECK_DEFAULT_SYSTEM_SOFT_THRESHOLD = (DB ? 10:16)*1024*1024; // 16MB
68 static final int MEMCHECK_DEFAULT_SYSTEM_HARD_THRESHOLD = (DB ? 14:20)*1024*1024; // 20MB
69 static final int MEMCHECK_DEFAULT_PHONE_SOFT_THRESHOLD = (DB ? 4:8)*1024*1024; // 8MB
70 static final int MEMCHECK_DEFAULT_PHONE_HARD_THRESHOLD = (DB ? 8:12)*1024*1024; // 12MB
71
72 static final int MEMCHECK_DEFAULT_EXEC_START_TIME = 1*60*60; // 1:00am
73 static final int MEMCHECK_DEFAULT_EXEC_END_TIME = 5*60*60; // 5:00am
74 static final int MEMCHECK_DEFAULT_MIN_SCREEN_OFF = DB ? 1*60 : 5*60; // 5 minutes
75 static final int MEMCHECK_DEFAULT_MIN_ALARM = DB ? 1*60 : 3*60; // 3 minutes
76 static final int MEMCHECK_DEFAULT_RECHECK_INTERVAL = DB ? 1*60 : 5*60; // 5 minutes
77
78 static final int REBOOT_DEFAULT_INTERVAL = DB ? 1 : 0; // never force reboot
79 static final int REBOOT_DEFAULT_START_TIME = 3*60*60; // 3:00am
80 static final int REBOOT_DEFAULT_WINDOW = 60*60; // within 1 hour
81
82 static final String CHECKUP_ACTION = "com.android.service.Watchdog.CHECKUP";
83 static final String REBOOT_ACTION = "com.android.service.Watchdog.REBOOT";
84
85 static Watchdog sWatchdog;
86
87 /* This handler will be used to post message back onto the main thread */
88 final Handler mHandler;
89 final Runnable mGlobalPssCollected;
90 final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
91 ContentResolver mResolver;
92 BatteryService mBattery;
93 PowerManagerService mPower;
94 AlarmManagerService mAlarm;
95 ActivityManagerService mActivity;
96 boolean mCompleted;
97 boolean mForceKillSystem;
98 Monitor mCurrentMonitor;
99
100 PssRequestor mPhoneReq;
101 int mPhonePid;
102 int mPhonePss;
103
104 long mLastMemCheckTime = -(MEMCHECK_DEFAULT_INTERVAL*1000);
105 boolean mHavePss;
106 long mLastMemCheckRealtime = -(MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL*1000);
107 boolean mHaveGlobalPss;
108 final MemMonitor mSystemMemMonitor = new MemMonitor("system",
Doug Zongkerf6888892010-01-06 16:38:14 -0800109 Settings.Secure.MEMCHECK_SYSTEM_ENABLED,
110 Settings.Secure.MEMCHECK_SYSTEM_SOFT_THRESHOLD,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800111 MEMCHECK_DEFAULT_SYSTEM_SOFT_THRESHOLD,
Doug Zongkerf6888892010-01-06 16:38:14 -0800112 Settings.Secure.MEMCHECK_SYSTEM_HARD_THRESHOLD,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800113 MEMCHECK_DEFAULT_SYSTEM_HARD_THRESHOLD);
114 final MemMonitor mPhoneMemMonitor = new MemMonitor("com.android.phone",
Doug Zongkerf6888892010-01-06 16:38:14 -0800115 Settings.Secure.MEMCHECK_PHONE_ENABLED,
116 Settings.Secure.MEMCHECK_PHONE_SOFT_THRESHOLD,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800117 MEMCHECK_DEFAULT_PHONE_SOFT_THRESHOLD,
Doug Zongkerf6888892010-01-06 16:38:14 -0800118 Settings.Secure.MEMCHECK_PHONE_HARD_THRESHOLD,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800119 MEMCHECK_DEFAULT_PHONE_HARD_THRESHOLD);
120
121 final Calendar mCalendar = Calendar.getInstance();
122 long mMemcheckLastTime;
123 long mMemcheckExecStartTime;
124 long mMemcheckExecEndTime;
125 int mMinScreenOff = MEMCHECK_DEFAULT_MIN_SCREEN_OFF;
126 int mMinAlarm = MEMCHECK_DEFAULT_MIN_ALARM;
127 boolean mNeedScheduledCheck;
128 PendingIntent mCheckupIntent;
129 PendingIntent mRebootIntent;
130
131 long mBootTime;
132 int mRebootInterval;
133
134 boolean mReqRebootNoWait; // should wait for one interval before reboot?
135 int mReqRebootInterval = -1; // >= 0 if a reboot has been requested
136 int mReqRebootStartTime = -1; // >= 0 if a specific start time has been requested
137 int mReqRebootWindow = -1; // >= 0 if a specific window has been requested
138 int mReqMinScreenOff = -1; // >= 0 if a specific screen off time has been requested
139 int mReqMinNextAlarm = -1; // >= 0 if specific time to next alarm has been requested
140 int mReqRecheckInterval= -1; // >= 0 if a specific recheck interval has been requested
141
142 /**
143 * This class monitors the memory in a particular process.
144 */
145 final class MemMonitor {
146 final String mProcessName;
147 final String mEnabledSetting;
148 final String mSoftSetting;
149 final String mHardSetting;
150
151 int mSoftThreshold;
152 int mHardThreshold;
153 boolean mEnabled;
154 long mLastPss;
155
156 static final int STATE_OK = 0;
157 static final int STATE_SOFT = 1;
158 static final int STATE_HARD = 2;
159 int mState;
160
161 MemMonitor(String processName, String enabledSetting,
162 String softSetting, int defSoftThreshold,
163 String hardSetting, int defHardThreshold) {
164 mProcessName = processName;
165 mEnabledSetting = enabledSetting;
166 mSoftSetting = softSetting;
167 mHardSetting = hardSetting;
168 mSoftThreshold = defSoftThreshold;
169 mHardThreshold = defHardThreshold;
170 }
171
172 void retrieveSettings(ContentResolver resolver) {
Doug Zongkerf6888892010-01-06 16:38:14 -0800173 mSoftThreshold = Settings.Secure.getInt(
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800174 resolver, mSoftSetting, mSoftThreshold);
Doug Zongkerf6888892010-01-06 16:38:14 -0800175 mHardThreshold = Settings.Secure.getInt(
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800176 resolver, mHardSetting, mHardThreshold);
Doug Zongkerf6888892010-01-06 16:38:14 -0800177 mEnabled = Settings.Secure.getInt(
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800178 resolver, mEnabledSetting, 0) != 0;
179 }
180
181 boolean checkLocked(long curTime, int pid, int pss) {
182 mLastPss = pss;
183 if (mLastPss < mSoftThreshold) {
184 mState = STATE_OK;
185 } else if (mLastPss < mHardThreshold) {
186 mState = STATE_SOFT;
187 } else {
188 mState = STATE_HARD;
189 }
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800190 EventLog.writeEvent(EventLogTags.WATCHDOG_PROC_PSS, mProcessName, pid, mLastPss);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800191
192 if (mState == STATE_OK) {
193 // Memory is good, don't recover.
194 return false;
195 }
196
197 if (mState == STATE_HARD) {
198 // Memory is really bad, kill right now.
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800199 EventLog.writeEvent(EventLogTags.WATCHDOG_HARD_RESET, mProcessName, pid,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800200 mHardThreshold, mLastPss);
201 return mEnabled;
202 }
203
204 // It is time to schedule a reset...
205 // Check if we are currently within the time to kill processes due
206 // to memory use.
207 computeMemcheckTimesLocked(curTime);
208 String skipReason = null;
209 if (curTime < mMemcheckExecStartTime || curTime > mMemcheckExecEndTime) {
210 skipReason = "time";
211 } else {
212 skipReason = shouldWeBeBrutalLocked(curTime);
213 }
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800214 EventLog.writeEvent(EventLogTags.WATCHDOG_SOFT_RESET, mProcessName, pid,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800215 mSoftThreshold, mLastPss, skipReason != null ? skipReason : "");
216 if (skipReason != null) {
217 mNeedScheduledCheck = true;
218 return false;
219 }
220 return mEnabled;
221 }
222
223 void clear() {
224 mLastPss = 0;
225 mState = STATE_OK;
226 }
227 }
228
229 /**
230 * Used for scheduling monitor callbacks and checking memory usage.
231 */
232 final class HeartbeatHandler extends Handler {
233 @Override
234 public void handleMessage(Message msg) {
235 switch (msg.what) {
236 case GLOBAL_PSS: {
237 if (mHaveGlobalPss) {
238 // During the last pass we collected pss information, so
239 // now it is time to report it.
240 mHaveGlobalPss = false;
Joe Onorato8a9b2202010-02-26 18:56:32 -0800241 if (localLOGV) Slog.v(TAG, "Received global pss, logging.");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800242 logGlobalMemory();
243 }
244 } break;
245
246 case MONITOR: {
247 if (mHavePss) {
248 // During the last pass we collected pss information, so
249 // now it is time to report it.
250 mHavePss = false;
Joe Onorato8a9b2202010-02-26 18:56:32 -0800251 if (localLOGV) Slog.v(TAG, "Have pss, checking memory.");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800252 checkMemory();
253 }
254
255 if (mHaveGlobalPss) {
256 // During the last pass we collected pss information, so
257 // now it is time to report it.
258 mHaveGlobalPss = false;
Joe Onorato8a9b2202010-02-26 18:56:32 -0800259 if (localLOGV) Slog.v(TAG, "Have global pss, logging.");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800260 logGlobalMemory();
261 }
262
263 long now = SystemClock.uptimeMillis();
264
265 // See if we should force a reboot.
266 int rebootInterval = mReqRebootInterval >= 0
Doug Zongkerf6888892010-01-06 16:38:14 -0800267 ? mReqRebootInterval : Settings.Secure.getInt(
268 mResolver, Settings.Secure.REBOOT_INTERVAL,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800269 REBOOT_DEFAULT_INTERVAL);
270 if (mRebootInterval != rebootInterval) {
271 mRebootInterval = rebootInterval;
272 // We have been running long enough that a reboot can
273 // be considered...
274 checkReboot(false);
275 }
276
277 // See if we should check memory conditions.
Doug Zongkerf6888892010-01-06 16:38:14 -0800278 long memCheckInterval = Settings.Secure.getLong(
279 mResolver, Settings.Secure.MEMCHECK_INTERVAL,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800280 MEMCHECK_DEFAULT_INTERVAL) * 1000;
281 if ((mLastMemCheckTime+memCheckInterval) < now) {
282 // It is now time to collect pss information. This
283 // is async so we won't report it now. And to keep
284 // things simple, we will assume that everyone has
285 // reported back by the next MONITOR message.
286 mLastMemCheckTime = now;
Joe Onorato8a9b2202010-02-26 18:56:32 -0800287 if (localLOGV) Slog.v(TAG, "Collecting memory usage.");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800288 collectMemory();
289 mHavePss = true;
290
Doug Zongkerf6888892010-01-06 16:38:14 -0800291 long memCheckRealtimeInterval = Settings.Secure.getLong(
292 mResolver, Settings.Secure.MEMCHECK_LOG_REALTIME_INTERVAL,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800293 MEMCHECK_DEFAULT_LOG_REALTIME_INTERVAL) * 1000;
294 long realtimeNow = SystemClock.elapsedRealtime();
295 if ((mLastMemCheckRealtime+memCheckRealtimeInterval) < realtimeNow) {
296 mLastMemCheckRealtime = realtimeNow;
Joe Onorato8a9b2202010-02-26 18:56:32 -0800297 if (localLOGV) Slog.v(TAG, "Collecting global memory usage.");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800298 collectGlobalMemory();
299 mHaveGlobalPss = true;
300 }
301 }
302
303 final int size = mMonitors.size();
304 for (int i = 0 ; i < size ; i++) {
305 mCurrentMonitor = mMonitors.get(i);
306 mCurrentMonitor.monitor();
307 }
308
309 synchronized (Watchdog.this) {
310 mCompleted = true;
311 mCurrentMonitor = null;
312 }
313 } break;
314 }
315 }
316 }
317
318 final class GlobalPssCollected implements Runnable {
319 public void run() {
320 mHandler.sendEmptyMessage(GLOBAL_PSS);
321 }
322 }
323
324 final class CheckupReceiver extends BroadcastReceiver {
325 @Override
326 public void onReceive(Context c, Intent intent) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800327 if (localLOGV) Slog.v(TAG, "Alarm went off, checking memory.");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800328 checkMemory();
329 }
330 }
331
332 final class RebootReceiver extends BroadcastReceiver {
333 @Override
334 public void onReceive(Context c, Intent intent) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800335 if (localLOGV) Slog.v(TAG, "Alarm went off, checking reboot.");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800336 checkReboot(true);
337 }
338 }
339
340 final class RebootRequestReceiver extends BroadcastReceiver {
341 @Override
342 public void onReceive(Context c, Intent intent) {
343 mReqRebootNoWait = intent.getIntExtra("nowait", 0) != 0;
344 mReqRebootInterval = intent.getIntExtra("interval", -1);
345 mReqRebootStartTime = intent.getIntExtra("startTime", -1);
346 mReqRebootWindow = intent.getIntExtra("window", -1);
347 mReqMinScreenOff = intent.getIntExtra("minScreenOff", -1);
348 mReqMinNextAlarm = intent.getIntExtra("minNextAlarm", -1);
349 mReqRecheckInterval = intent.getIntExtra("recheckInterval", -1);
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800350 EventLog.writeEvent(EventLogTags.WATCHDOG_REQUESTED_REBOOT,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800351 mReqRebootNoWait ? 1 : 0, mReqRebootInterval,
352 mReqRecheckInterval, mReqRebootStartTime,
353 mReqRebootWindow, mReqMinScreenOff, mReqMinNextAlarm);
354 checkReboot(true);
355 }
356 }
357
358 public interface Monitor {
359 void monitor();
360 }
361
362 public interface PssRequestor {
363 void requestPss();
364 }
365
366 public class PssStats {
367 public int mEmptyPss;
368 public int mEmptyCount;
369 public int mBackgroundPss;
370 public int mBackgroundCount;
371 public int mServicePss;
372 public int mServiceCount;
373 public int mVisiblePss;
374 public int mVisibleCount;
375 public int mForegroundPss;
376 public int mForegroundCount;
377
378 public int mNoPssCount;
379
380 public int mProcDeaths[] = new int[10];
381 }
382
383 public static Watchdog getInstance() {
384 if (sWatchdog == null) {
385 sWatchdog = new Watchdog();
386 }
387
388 return sWatchdog;
389 }
390
391 private Watchdog() {
392 super("watchdog");
393 mHandler = new HeartbeatHandler();
394 mGlobalPssCollected = new GlobalPssCollected();
395 }
396
397 public void init(Context context, BatteryService battery,
398 PowerManagerService power, AlarmManagerService alarm,
399 ActivityManagerService activity) {
400 mResolver = context.getContentResolver();
401 mBattery = battery;
402 mPower = power;
403 mAlarm = alarm;
404 mActivity = activity;
405
406 context.registerReceiver(new CheckupReceiver(),
407 new IntentFilter(CHECKUP_ACTION));
408 mCheckupIntent = PendingIntent.getBroadcast(context,
409 0, new Intent(CHECKUP_ACTION), 0);
410
411 context.registerReceiver(new RebootReceiver(),
412 new IntentFilter(REBOOT_ACTION));
413 mRebootIntent = PendingIntent.getBroadcast(context,
414 0, new Intent(REBOOT_ACTION), 0);
415
416 context.registerReceiver(new RebootRequestReceiver(),
417 new IntentFilter(Intent.ACTION_REBOOT),
418 android.Manifest.permission.REBOOT, null);
419
420 mBootTime = System.currentTimeMillis();
421 }
422
423 public void processStarted(PssRequestor req, String name, int pid) {
424 synchronized (this) {
425 if ("com.android.phone".equals(name)) {
426 mPhoneReq = req;
427 mPhonePid = pid;
428 mPhonePss = 0;
429 }
430 }
431 }
432
433 public void reportPss(PssRequestor req, String name, int pss) {
434 synchronized (this) {
435 if (mPhoneReq == req) {
436 mPhonePss = pss;
437 }
438 }
439 }
440
441 public void addMonitor(Monitor monitor) {
442 synchronized (this) {
443 if (isAlive()) {
444 throw new RuntimeException("Monitors can't be added while the Watchdog is running");
445 }
446 mMonitors.add(monitor);
447 }
448 }
449
450 /**
451 * Retrieve memory usage information from specific processes being
452 * monitored. This is an async operation, so must be done before doing
453 * memory checks.
454 */
455 void collectMemory() {
456 synchronized (this) {
457 if (mPhoneReq != null) {
458 mPhoneReq.requestPss();
459 }
460 }
461 }
462
463 /**
464 * Retrieve memory usage over all application processes. This is an
465 * async operation, so must be done before doing memory checks.
466 */
467 void collectGlobalMemory() {
468 mActivity.requestPss(mGlobalPssCollected);
469 }
470
471 /**
472 * Check memory usage in the system, scheduling kills/reboots as needed.
473 * This always runs on the mHandler thread.
474 */
475 void checkMemory() {
476 boolean needScheduledCheck;
477 long curTime;
478 long nextTime = 0;
479
Doug Zongkerf6888892010-01-06 16:38:14 -0800480 long recheckInterval = Settings.Secure.getLong(
481 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800482 MEMCHECK_DEFAULT_RECHECK_INTERVAL) * 1000;
483
484 mSystemMemMonitor.retrieveSettings(mResolver);
485 mPhoneMemMonitor.retrieveSettings(mResolver);
486 retrieveBrutalityAmount();
487
488 synchronized (this) {
489 curTime = System.currentTimeMillis();
490 mNeedScheduledCheck = false;
491
492 // How is the system doing?
493 if (mSystemMemMonitor.checkLocked(curTime, Process.myPid(),
494 (int)Process.getPss(Process.myPid()))) {
495 // Not good! Time to suicide.
496 mForceKillSystem = true;
497 notifyAll();
498 return;
499 }
500
501 // How is the phone process doing?
502 if (mPhoneReq != null) {
503 if (mPhoneMemMonitor.checkLocked(curTime, mPhonePid,
504 mPhonePss)) {
505 // Just kill the phone process and let it restart.
Joe Onorato8a9b2202010-02-26 18:56:32 -0800506 Slog.i(TAG, "Watchdog is killing the phone process");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800507 Process.killProcess(mPhonePid);
508 }
509 } else {
510 mPhoneMemMonitor.clear();
511 }
512
513 needScheduledCheck = mNeedScheduledCheck;
514 if (needScheduledCheck) {
515 // Something is going bad, but now is not a good time to
516 // tear things down... schedule an alarm to check again soon.
517 nextTime = curTime + recheckInterval;
518 if (nextTime < mMemcheckExecStartTime) {
519 nextTime = mMemcheckExecStartTime;
520 } else if (nextTime >= mMemcheckExecEndTime){
521 // Need to check during next exec time... so that needs
522 // to be computed.
Joe Onorato8a9b2202010-02-26 18:56:32 -0800523 if (localLOGV) Slog.v(TAG, "Computing next time range");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800524 computeMemcheckTimesLocked(nextTime);
525 nextTime = mMemcheckExecStartTime;
526 }
527
528 if (localLOGV) {
529 mCalendar.setTimeInMillis(nextTime);
Joe Onorato8a9b2202010-02-26 18:56:32 -0800530 Slog.v(TAG, "Next Alarm Time: " + mCalendar);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800531 }
532 }
533 }
534
535 if (needScheduledCheck) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800536 if (localLOGV) Slog.v(TAG, "Scheduling next memcheck alarm for "
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800537 + ((nextTime-curTime)/1000/60) + "m from now");
538 mAlarm.remove(mCheckupIntent);
539 mAlarm.set(AlarmManager.RTC_WAKEUP, nextTime, mCheckupIntent);
540 } else {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800541 if (localLOGV) Slog.v(TAG, "No need to schedule a memcheck alarm!");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800542 mAlarm.remove(mCheckupIntent);
543 }
544 }
545
546 final PssStats mPssStats = new PssStats();
547 final String[] mMemInfoFields = new String[] {
548 "MemFree:", "Buffers:", "Cached:",
549 "Active:", "Inactive:",
550 "AnonPages:", "Mapped:", "Slab:",
551 "SReclaimable:", "SUnreclaim:", "PageTables:" };
552 final long[] mMemInfoSizes = new long[mMemInfoFields.length];
553 final String[] mVMStatFields = new String[] {
554 "pgfree ", "pgactivate ", "pgdeactivate ",
555 "pgfault ", "pgmajfault " };
556 final long[] mVMStatSizes = new long[mVMStatFields.length];
557 final long[] mPrevVMStatSizes = new long[mVMStatFields.length];
558 long mLastLogGlobalMemoryTime;
559
560 void logGlobalMemory() {
561 PssStats stats = mPssStats;
562 mActivity.collectPss(stats);
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800563 EventLog.writeEvent(EventLogTags.WATCHDOG_PSS_STATS,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800564 stats.mEmptyPss, stats.mEmptyCount,
565 stats.mBackgroundPss, stats.mBackgroundCount,
566 stats.mServicePss, stats.mServiceCount,
567 stats.mVisiblePss, stats.mVisibleCount,
568 stats.mForegroundPss, stats.mForegroundCount,
569 stats.mNoPssCount);
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800570 EventLog.writeEvent(EventLogTags.WATCHDOG_PROC_STATS,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800571 stats.mProcDeaths[0], stats.mProcDeaths[1], stats.mProcDeaths[2],
572 stats.mProcDeaths[3], stats.mProcDeaths[4]);
573 Process.readProcLines("/proc/meminfo", mMemInfoFields, mMemInfoSizes);
574 for (int i=0; i<mMemInfoSizes.length; i++) {
575 mMemInfoSizes[i] *= 1024;
576 }
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800577 EventLog.writeEvent(EventLogTags.WATCHDOG_MEMINFO,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800578 (int)mMemInfoSizes[0], (int)mMemInfoSizes[1], (int)mMemInfoSizes[2],
579 (int)mMemInfoSizes[3], (int)mMemInfoSizes[4],
580 (int)mMemInfoSizes[5], (int)mMemInfoSizes[6], (int)mMemInfoSizes[7],
581 (int)mMemInfoSizes[8], (int)mMemInfoSizes[9], (int)mMemInfoSizes[10]);
582 long now = SystemClock.uptimeMillis();
583 long dur = now - mLastLogGlobalMemoryTime;
584 mLastLogGlobalMemoryTime = now;
585 Process.readProcLines("/proc/vmstat", mVMStatFields, mVMStatSizes);
586 for (int i=0; i<mVMStatSizes.length; i++) {
587 long v = mVMStatSizes[i];
588 mVMStatSizes[i] -= mPrevVMStatSizes[i];
589 mPrevVMStatSizes[i] = v;
590 }
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800591 EventLog.writeEvent(EventLogTags.WATCHDOG_VMSTAT, dur,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800592 (int)mVMStatSizes[0], (int)mVMStatSizes[1], (int)mVMStatSizes[2],
593 (int)mVMStatSizes[3], (int)mVMStatSizes[4]);
594 }
595
596 void checkReboot(boolean fromAlarm) {
597 int rebootInterval = mReqRebootInterval >= 0 ? mReqRebootInterval
Doug Zongkerf6888892010-01-06 16:38:14 -0800598 : Settings.Secure.getInt(
599 mResolver, Settings.Secure.REBOOT_INTERVAL,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800600 REBOOT_DEFAULT_INTERVAL);
601 mRebootInterval = rebootInterval;
602 if (rebootInterval <= 0) {
603 // No reboot interval requested.
Joe Onorato8a9b2202010-02-26 18:56:32 -0800604 if (localLOGV) Slog.v(TAG, "No need to schedule a reboot alarm!");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800605 mAlarm.remove(mRebootIntent);
606 return;
607 }
608
609 long rebootStartTime = mReqRebootStartTime >= 0 ? mReqRebootStartTime
Doug Zongkerf6888892010-01-06 16:38:14 -0800610 : Settings.Secure.getLong(
611 mResolver, Settings.Secure.REBOOT_START_TIME,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800612 REBOOT_DEFAULT_START_TIME);
613 long rebootWindowMillis = (mReqRebootWindow >= 0 ? mReqRebootWindow
Doug Zongkerf6888892010-01-06 16:38:14 -0800614 : Settings.Secure.getLong(
615 mResolver, Settings.Secure.REBOOT_WINDOW,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800616 REBOOT_DEFAULT_WINDOW)) * 1000;
617 long recheckInterval = (mReqRecheckInterval >= 0 ? mReqRecheckInterval
Doug Zongkerf6888892010-01-06 16:38:14 -0800618 : Settings.Secure.getLong(
619 mResolver, Settings.Secure.MEMCHECK_RECHECK_INTERVAL,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800620 MEMCHECK_DEFAULT_RECHECK_INTERVAL)) * 1000;
621
622 retrieveBrutalityAmount();
623
624 long realStartTime;
625 long now;
626
627 synchronized (this) {
628 now = System.currentTimeMillis();
629 realStartTime = computeCalendarTime(mCalendar, now,
630 rebootStartTime);
631
632 long rebootIntervalMillis = rebootInterval*24*60*60*1000;
633 if (DB || mReqRebootNoWait ||
634 (now-mBootTime) >= (rebootIntervalMillis-rebootWindowMillis)) {
635 if (fromAlarm && rebootWindowMillis <= 0) {
636 // No reboot window -- just immediately reboot.
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800637 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800638 (int)rebootIntervalMillis, (int)rebootStartTime*1000,
639 (int)rebootWindowMillis, "");
640 rebootSystem("Checkin scheduled forced");
641 return;
642 }
643
644 // Are we within the reboot window?
645 if (now < realStartTime) {
646 // Schedule alarm for next check interval.
647 realStartTime = computeCalendarTime(mCalendar,
648 now, rebootStartTime);
649 } else if (now < (realStartTime+rebootWindowMillis)) {
650 String doit = shouldWeBeBrutalLocked(now);
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800651 EventLog.writeEvent(EventLogTags.WATCHDOG_SCHEDULED_REBOOT, now,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800652 (int)rebootInterval, (int)rebootStartTime*1000,
653 (int)rebootWindowMillis, doit != null ? doit : "");
654 if (doit == null) {
655 rebootSystem("Checked scheduled range");
656 return;
657 }
658
659 // Schedule next alarm either within the window or in the
660 // next interval.
661 if ((now+recheckInterval) >= (realStartTime+rebootWindowMillis)) {
662 realStartTime = computeCalendarTime(mCalendar,
663 now + rebootIntervalMillis, rebootStartTime);
664 } else {
665 realStartTime = now + recheckInterval;
666 }
667 } else {
668 // Schedule alarm for next check interval.
669 realStartTime = computeCalendarTime(mCalendar,
670 now + rebootIntervalMillis, rebootStartTime);
671 }
672 }
673 }
674
Joe Onorato8a9b2202010-02-26 18:56:32 -0800675 if (localLOGV) Slog.v(TAG, "Scheduling next reboot alarm for "
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800676 + ((realStartTime-now)/1000/60) + "m from now");
677 mAlarm.remove(mRebootIntent);
678 mAlarm.set(AlarmManager.RTC_WAKEUP, realStartTime, mRebootIntent);
679 }
680
681 /**
682 * Perform a full reboot of the system.
683 */
684 void rebootSystem(String reason) {
Joe Onorato8a9b2202010-02-26 18:56:32 -0800685 Slog.i(TAG, "Rebooting system because: " + reason);
Suchi Amalapurapu6ffce2e2010-03-08 14:48:40 -0800686 PowerManagerService pms = (PowerManagerService) ServiceManager.getService("power");
687 pms.reboot(reason);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800688 }
689
690 /**
691 * Load the current Gservices settings for when
692 * {@link #shouldWeBeBrutalLocked} will allow the brutality to happen.
693 * Must not be called with the lock held.
694 */
695 void retrieveBrutalityAmount() {
696 mMinScreenOff = (mReqMinScreenOff >= 0 ? mReqMinScreenOff
Doug Zongkerf6888892010-01-06 16:38:14 -0800697 : Settings.Secure.getInt(
698 mResolver, Settings.Secure.MEMCHECK_MIN_SCREEN_OFF,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800699 MEMCHECK_DEFAULT_MIN_SCREEN_OFF)) * 1000;
700 mMinAlarm = (mReqMinNextAlarm >= 0 ? mReqMinNextAlarm
Doug Zongkerf6888892010-01-06 16:38:14 -0800701 : Settings.Secure.getInt(
702 mResolver, Settings.Secure.MEMCHECK_MIN_ALARM,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800703 MEMCHECK_DEFAULT_MIN_ALARM)) * 1000;
704 }
705
706 /**
707 * Determine whether it is a good time to kill, crash, or otherwise
708 * plunder the current situation for the overall long-term benefit of
709 * the world.
710 *
711 * @param curTime The current system time.
712 * @return Returns null if this is a good time, else a String with the
713 * text of why it is not a good time.
714 */
715 String shouldWeBeBrutalLocked(long curTime) {
716 if (mBattery == null || !mBattery.isPowered()) {
717 return "battery";
718 }
719
720 if (mMinScreenOff >= 0 && (mPower == null ||
721 mPower.timeSinceScreenOn() < mMinScreenOff)) {
722 return "screen";
723 }
724
725 if (mMinAlarm >= 0 && (mAlarm == null ||
726 mAlarm.timeToNextAlarm() < mMinAlarm)) {
727 return "alarm";
728 }
729
730 return null;
731 }
732
733 /**
734 * Compute the times during which we next would like to perform process
735 * restarts.
736 *
737 * @param curTime The current system time.
738 */
739 void computeMemcheckTimesLocked(long curTime) {
740 if (mMemcheckLastTime == curTime) {
741 return;
742 }
743
744 mMemcheckLastTime = curTime;
745
Doug Zongkerf6888892010-01-06 16:38:14 -0800746 long memcheckExecStartTime = Settings.Secure.getLong(
747 mResolver, Settings.Secure.MEMCHECK_EXEC_START_TIME,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800748 MEMCHECK_DEFAULT_EXEC_START_TIME);
Doug Zongkerf6888892010-01-06 16:38:14 -0800749 long memcheckExecEndTime = Settings.Secure.getLong(
750 mResolver, Settings.Secure.MEMCHECK_EXEC_END_TIME,
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800751 MEMCHECK_DEFAULT_EXEC_END_TIME);
752
753 mMemcheckExecEndTime = computeCalendarTime(mCalendar, curTime,
754 memcheckExecEndTime);
755 if (mMemcheckExecEndTime < curTime) {
756 memcheckExecStartTime += 24*60*60;
757 memcheckExecEndTime += 24*60*60;
758 mMemcheckExecEndTime = computeCalendarTime(mCalendar, curTime,
759 memcheckExecEndTime);
760 }
761 mMemcheckExecStartTime = computeCalendarTime(mCalendar, curTime,
762 memcheckExecStartTime);
763
764 if (localLOGV) {
765 mCalendar.setTimeInMillis(curTime);
Joe Onorato8a9b2202010-02-26 18:56:32 -0800766 Slog.v(TAG, "Current Time: " + mCalendar);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800767 mCalendar.setTimeInMillis(mMemcheckExecStartTime);
Joe Onorato8a9b2202010-02-26 18:56:32 -0800768 Slog.v(TAG, "Start Check Time: " + mCalendar);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800769 mCalendar.setTimeInMillis(mMemcheckExecEndTime);
Joe Onorato8a9b2202010-02-26 18:56:32 -0800770 Slog.v(TAG, "End Check Time: " + mCalendar);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800771 }
772 }
773
774 static long computeCalendarTime(Calendar c, long curTime,
775 long secondsSinceMidnight) {
776
777 // start with now
778 c.setTimeInMillis(curTime);
779
780 int val = (int)secondsSinceMidnight / (60*60);
781 c.set(Calendar.HOUR_OF_DAY, val);
782 secondsSinceMidnight -= val * (60*60);
783 val = (int)secondsSinceMidnight / 60;
784 c.set(Calendar.MINUTE, val);
785 c.set(Calendar.SECOND, (int)secondsSinceMidnight - (val*60));
786 c.set(Calendar.MILLISECOND, 0);
787
788 long newTime = c.getTimeInMillis();
789 if (newTime < curTime) {
790 // The given time (in seconds since midnight) has already passed for today, so advance
791 // by one day (due to daylight savings, etc., the delta may differ from 24 hours).
792 c.add(Calendar.DAY_OF_MONTH, 1);
793 newTime = c.getTimeInMillis();
794 }
795
796 return newTime;
797 }
798
799 @Override
800 public void run() {
Christopher Tate6ee412d2010-05-28 12:01:56 -0700801 boolean waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800802 while (true) {
803 mCompleted = false;
804 mHandler.sendEmptyMessage(MONITOR);
805
806 synchronized (this) {
807 long timeout = TIME_TO_WAIT;
808
809 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
810 // wait while asleep. If the device is asleep then the thing that we are waiting
Christopher Tate6ee412d2010-05-28 12:01:56 -0700811 // to timeout on is asleep as well and won't have a chance to run, causing a false
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800812 // positive on when to kill things.
813 long start = SystemClock.uptimeMillis();
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800814 while (timeout > 0 && !mForceKillSystem) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800815 try {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800816 wait(timeout); // notifyAll() is called when mForceKillSystem is set
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800817 } catch (InterruptedException e) {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800818 Log.wtf(TAG, e);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800819 }
820 timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800821 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800822
823 if (mCompleted && !mForceKillSystem) {
824 // The monitors have returned.
Christopher Tate6ee412d2010-05-28 12:01:56 -0700825 waitedHalf = false;
826 continue;
827 }
828
829 if (!waitedHalf) {
830 // We've waited half the deadlock-detection interval. Pull a stack
831 // trace and wait another half.
832 ArrayList pids = new ArrayList();
833 pids.add(Process.myPid());
834 File stack = ActivityManagerService.dumpStackTraces(true, pids);
835 waitedHalf = true;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800836 continue;
837 }
838 }
839
840 // If we got here, that means that the system is most likely hung.
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800841 // First collect stack traces from all threads of the system process.
842 // Then kill this process so that the system will restart.
843
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800844 String name = (mCurrentMonitor != null) ? mCurrentMonitor.getClass().getName() : "null";
Doug Zongkerab5c49c2009-12-04 10:31:43 -0800845 EventLog.writeEvent(EventLogTags.WATCHDOG, name);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800846
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800847 ArrayList pids = new ArrayList();
848 pids.add(Process.myPid());
Dan Egnor4bded072010-03-11 22:00:47 -0800849 if (mPhonePid > 0) pids.add(mPhonePid);
Christopher Tate6ee412d2010-05-28 12:01:56 -0700850 // Pass !waitedHalf so that just in case we somehow wind up here without having
851 // dumped the halfway stacks, we properly re-initialize the trace file.
852 File stack = ActivityManagerService.dumpStackTraces(!waitedHalf, pids);
Dan Egnor4bded072010-03-11 22:00:47 -0800853
854 // Give some extra time to make sure the stack traces get written.
855 // The system's been hanging for a minute, another second or two won't hurt much.
856 SystemClock.sleep(2000);
857
Christopher Tateecaa7b42010-06-04 14:55:02 -0700858 // Pull our own kernel thread stacks as well if we're configured for that
859 if (RECORD_KERNEL_THREADS) {
860 dumpKernelStackTraces();
861 }
862
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800863 mActivity.addErrorToDropBox("watchdog", null, null, null, name, null, stack, null);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800864
865 // Only kill the process if the debugger is not attached.
866 if (!Debug.isDebuggerConnected()) {
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800867 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + name);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800868 Process.killProcess(Process.myPid());
Dan Egnor9bdc94b2010-03-04 14:20:31 -0800869 System.exit(10);
870 } else {
871 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800872 }
Christopher Tate6ee412d2010-05-28 12:01:56 -0700873
874 waitedHalf = false;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800875 }
876 }
Christopher Tateecaa7b42010-06-04 14:55:02 -0700877
878 private File dumpKernelStackTraces() {
879 String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
880 if (tracesPath == null || tracesPath.length() == 0) {
881 return null;
882 }
883
884 native_dumpKernelStacks(tracesPath);
885 return new File(tracesPath);
886 }
887
888 private native void native_dumpKernelStacks(String tracesPath);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800889}