lmkd: Allow lmkd to kill perceptible apps during heavy thrashing

Occasionally a system can get into heavy file cache thrashing situation
and become unresponsive. In these situations we observe lmkd wakeups,
however it does not kill because all non-perceptible apps are already
killed and the system manages to reclaim enough memory to stay above
min watermark.
Add ro.lmk.thrashing_limit_critical property which when breached will
allow lmkd to kill perceptible apps. The property represents the
percentage of refaulted workingset pages as a fraction of overall file
cache size. By default it is disabled.

Bug: 181778155
Test: thrashing.py 500 10 200
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: Icb38ef6c90adaa4f5c956593b6ea0c4febc91dc0
diff --git a/lmkd.cpp b/lmkd.cpp
index 2136ab0..4467699 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -197,6 +197,7 @@
 static int psi_complete_stall_ms;
 static int thrashing_limit_pct;
 static int thrashing_limit_decay_pct;
+static int thrashing_critical_pct;
 static int swap_util_max;
 static bool use_psi_monitors = false;
 static int kpoll_fd;
@@ -2501,8 +2502,8 @@
         snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64
             "kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)",
             mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing);
-        /* Do not kill perceptible apps unless below min watermark */
-        if (wmark > WMARK_MIN) {
+        /* Do not kill perceptible apps unless below min watermark or heavily thrashing */
+        if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) {
             min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
         }
     } else if (swap_is_low && wmark < WMARK_HIGH) {
@@ -2511,8 +2512,8 @@
         snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%"
             PRId64 "kB < %" PRId64 "kB)", wmark < WMARK_LOW ? "min" : "low",
             mi.field.free_swap * page_k, swap_low_threshold * page_k);
-        /* Do not kill perceptible apps unless below min watermark */
-        if (wmark > WMARK_MIN) {
+        /* Do not kill perceptible apps unless below min watermark or heavily thrashing */
+        if (wmark > WMARK_MIN && thrashing < thrashing_critical_pct) {
             min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
         }
     } else if (wmark < WMARK_HIGH && swap_util_max < 100 &&
@@ -2531,16 +2532,20 @@
         snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and thrashing (%"
             PRId64 "%%)", wmark < WMARK_LOW ? "min" : "low", thrashing);
         cut_thrashing_limit = true;
-        /* Do not kill perceptible apps because of thrashing */
-        min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
+        /* Do not kill perceptible apps unless thrashing at critical levels */
+        if (thrashing < thrashing_critical_pct) {
+            min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
+        }
     } else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) {
         /* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */
         kill_reason = DIRECT_RECL_AND_THRASHING;
         snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%"
             PRId64 "%%)", thrashing);
         cut_thrashing_limit = true;
-        /* Do not kill perceptible apps because of thrashing */
-        min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
+        /* Do not kill perceptible apps unless thrashing at critical levels */
+        if (thrashing < thrashing_critical_pct) {
+            min_score_adj = PERCEPTIBLE_APP_ADJ + 1;
+        }
     }
 
     /* Kill a process if necessary */
@@ -3337,6 +3342,8 @@
         low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING));
     thrashing_limit_decay_pct = clamp(0, 100, property_get_int32("ro.lmk.thrashing_limit_decay",
         low_ram_device ? DEF_THRASHING_DECAY_LOWRAM : DEF_THRASHING_DECAY));
+    thrashing_critical_pct = max(0, property_get_int32("ro.lmk.thrashing_limit_critical",
+        thrashing_limit_pct * 2));
     swap_util_max = clamp(0, 100, property_get_int32("ro.lmk.swap_util_max", 100));
 }