lmkd: adjust thrashing dection strategy
When a device is thrashing the file cache, workingset refaults can
grow slowly because of variant reasons. Current thrashing detection
mechanism could reset the thrashing counter frequently as it relies
on presence of reclaim activity, however refaults can keep increasing
even when the device is not actively reclaiming. In addition, the
thrashing counter gets reset when conditions require a kill but lmkd
could not find an eligible process to be killed. This is problematic
because when this happens thrashing is being ignored.
Use a fixed 1 sec periods to aggregate the thrashing counter. Also we
need to keep monitoring thrashing counter while retrying as someone
could release the memory to mitigate the thrashing. If thrashing
counter is greater than the limit at the end of the 1 sec period this
means lmkd failed to find an eligible process to kill. In this case
we store accumulated thrashing in case a new eligible process appears
until accumulated thrashing is less that the limit or we miss an
entire 1 sec window.
Bug: 163134367
Test: heavy loading launch
Signed-off-by: Martin Liu <liumartin@google.com>
Change-Id: Ie9f4121ea604179c0ad510cc8430e7a6aec6e6b2
diff --git a/lmkd.cpp b/lmkd.cpp
index 1daf198..a4367c3 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -100,6 +100,7 @@
#define EIGHT_MEGA (1 << 23)
#define TARGET_UPDATE_MIN_INTERVAL_MS 1000
+#define THRASHING_RESET_INTERVAL_MS 1000
#define NS_PER_MS (NS_PER_SEC / MS_PER_SEC)
#define US_PER_MS (US_PER_SEC / MS_PER_SEC)
@@ -2313,16 +2314,18 @@
DIRECT_RECLAIM,
};
static int64_t init_ws_refault;
+ static int64_t prev_workingset_refault;
static int64_t base_file_lru;
static int64_t init_pgscan_kswapd;
static int64_t init_pgscan_direct;
static int64_t swap_low_threshold;
static bool killing;
- static int thrashing_limit;
- static bool in_reclaim;
+ static int thrashing_limit = thrashing_limit_pct;
static struct zone_watermarks watermarks;
static struct timespec wmark_update_tm;
static struct wakeup_info wi;
+ static struct timespec thrashing_reset_tm;
+ static int64_t prev_thrash_growth = 0;
union meminfo mi;
union vmstat vs;
@@ -2338,6 +2341,7 @@
bool cut_thrashing_limit = false;
int min_score_adj = 0;
int swap_util = 0;
+ long since_thrashing_reset_ms;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time");
@@ -2376,6 +2380,8 @@
/* Reset file-backed pagecache size and refault amounts after a kill */
base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file;
init_ws_refault = vs.field.workingset_refault;
+ thrashing_reset_tm = curr_tm;
+ prev_thrash_growth = 0;
}
/* Check free swap levels */
@@ -2394,22 +2400,50 @@
} else if (vs.field.pgscan_kswapd > init_pgscan_kswapd) {
init_pgscan_kswapd = vs.field.pgscan_kswapd;
reclaim = KSWAPD_RECLAIM;
- } else {
- in_reclaim = false;
- /* Skip if system is not reclaiming */
+ } else if (vs.field.workingset_refault == prev_workingset_refault) {
+ /* Device is not thrashing and not reclaiming, bail out early until we see these stats changing*/
goto no_kill;
}
- if (!in_reclaim) {
- /* Record file-backed pagecache size when entering reclaim cycle */
+ prev_workingset_refault = vs.field.workingset_refault;
+
+ /*
+ * It's possible we fail to find an eligible process to kill (ex. no process is
+ * above oom_adj_min). When this happens, we should retry to find a new process
+ * for a kill whenever a new eligible process is available. This is especially
+ * important for a slow growing refault case. While retrying, we should keep
+ * monitoring new thrashing counter as someone could release the memory to mitigate
+ * the thrashing. Thus, when thrashing reset window comes, we decay the prev thrashing
+ * counter by window counts. if the counter is still greater than thrashing limit,
+ * we preserve the current prev_thrash counter so we will retry kill again. Otherwise,
+ * we reset the prev_thrash counter so we will stop retrying.
+ */
+ since_thrashing_reset_ms = get_time_diff_ms(&thrashing_reset_tm, &curr_tm);
+ if (since_thrashing_reset_ms > THRASHING_RESET_INTERVAL_MS) {
+ long windows_passed;
+ /* Calculate prev_thrash_growth if we crossed THRASHING_RESET_INTERVAL_MS */
+ prev_thrash_growth = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru;
+ windows_passed = (since_thrashing_reset_ms / THRASHING_RESET_INTERVAL_MS);
+ /*
+ * Decay prev_thrashing unless over-the-limit thrashing was registered in the window we
+ * just crossed, which means there were no eligible processes to kill. We preserve the
+ * counter in that case to ensure a kill if a new eligible process appears.
+ */
+ if (windows_passed > 1 || prev_thrash_growth < thrashing_limit) {
+ prev_thrash_growth >>= windows_passed;
+ }
+
+ /* Record file-backed pagecache size when crossing THRASHING_RESET_INTERVAL_MS */
base_file_lru = vs.field.nr_inactive_file + vs.field.nr_active_file;
init_ws_refault = vs.field.workingset_refault;
+ thrashing_reset_tm = curr_tm;
thrashing_limit = thrashing_limit_pct;
} else {
/* Calculate what % of the file-backed pagecache refaulted so far */
thrashing = (vs.field.workingset_refault - init_ws_refault) * 100 / base_file_lru;
}
- in_reclaim = true;
+ /* Add previous cycle's decayed thrashing amount */
+ thrashing += prev_thrash_growth;
/*
* Refresh watermarks once per min in case user updated one of the margins.
@@ -2426,7 +2460,7 @@
calc_zone_watermarks(&zi, &watermarks);
wmark_update_tm = curr_tm;
- }
+ }
/* Find out which watermark is breached if any */
wmark = get_lowest_watermark(&mi, &watermarks);