lmkd: Add ro.lmk.filecache_min_kb property for min filecache watermark
We see many cases when device keeps thrashing despite lmkd kills. This
happens because killed processes do not free enough filecache to fit
the current workingset completely.
To prevent such cases, introduce ro.lmk.filecache_min_kb property to
specify min filecache size in KB that should be reached after thrashing
is detected. Lmkd will keep killing background processes until this
filecache size limit is satisfied.
Bug: 193293513
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I49ca4cd2f33b27fdbc432d9ce6944b1a1794b749
diff --git a/lmkd.cpp b/lmkd.cpp
index 0afcf2f..f3c301e 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -201,6 +201,7 @@
static int thrashing_limit_decay_pct;
static int thrashing_critical_pct;
static int swap_util_max;
+static int64_t filecache_min_kb;
static bool use_psi_monitors = false;
static int kpoll_fd;
static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
@@ -2397,6 +2398,7 @@
static struct wakeup_info wi;
static struct timespec thrashing_reset_tm;
static int64_t prev_thrash_growth = 0;
+ static bool check_filecache = false;
union meminfo mi;
union vmstat vs;
@@ -2475,7 +2477,10 @@
init_pgscan_kswapd = vs.field.pgscan_kswapd;
reclaim = KSWAPD_RECLAIM;
} else if (workingset_refault_file == prev_workingset_refault) {
- /* Device is not thrashing and not reclaiming, bail out early until we see these stats changing*/
+ /*
+ * Device is not thrashing and not reclaiming, bail out early until we see these stats
+ * changing
+ */
goto no_kill;
}
@@ -2488,7 +2493,7 @@
* important for a slow growing refault case. While retrying, we should keep
* monitoring new thrashing counter as someone could release the memory to mitigate
* the thrashing. Thus, when thrashing reset window comes, we decay the prev thrashing
- * counter by window counts. if the counter is still greater than thrashing limit,
+ * counter by window counts. If the counter is still greater than thrashing limit,
* we preserve the current prev_thrash counter so we will retry kill again. Otherwise,
* we reset the prev_thrash counter so we will stop retrying.
*/
@@ -2570,6 +2575,7 @@
if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) {
min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
}
+ check_filecache = true;
} else if (swap_is_low && wmark < WMARK_HIGH) {
/* Both free memory and swap are low */
kill_reason = LOW_MEM_AND_SWAP;
@@ -2600,6 +2606,7 @@
if (thrashing < thrashing_critical_pct) {
min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
}
+ check_filecache = true;
} else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) {
/* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */
kill_reason = DIRECT_RECL_AND_THRASHING;
@@ -2610,6 +2617,21 @@
if (thrashing < thrashing_critical_pct) {
min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
}
+ check_filecache = true;
+ } else if (check_filecache) {
+ int64_t file_lru_kb = (vs.field.nr_inactive_file + vs.field.nr_active_file) * page_k;
+
+ if (file_lru_kb < filecache_min_kb) {
+ /* File cache is too low after thrashing, keep killing background processes */
+ kill_reason = LOW_FILECACHE_AFTER_THRASHING;
+ snprintf(kill_desc, sizeof(kill_desc),
+ "filecache is low (%" PRId64 "kB < %" PRId64 "kB) after thrashing",
+ file_lru_kb, filecache_min_kb);
+ min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
+ } else {
+ /* File cache is big enough, stop checking */
+ check_filecache = false;
+ }
}
/* Kill a process if necessary */
@@ -3409,6 +3431,7 @@
thrashing_critical_pct = max(0, property_get_int32("ro.lmk.thrashing_limit_critical",
thrashing_limit_pct * 2));
swap_util_max = clamp(0, 100, property_get_int32("ro.lmk.swap_util_max", 100));
+ filecache_min_kb = property_get_int64("ro.lmk.filecache_min_kb", 0);
}
int main(int argc, char **argv) {