lmkd: add ability to monitor all vmpressure events

Ability to monitor all available vmpressure event levels is needed
to accommodate systems with different memory resources. Low memory
systems can rely on medium and critical level events because working
under memory pressure is usual mode of operation. High performance
systems with more memory need to react earlier using also low
vmpressure level events to free memory early and prevent low memory
condition affecting its performance.

Bug: 63631020
Test: alloc-stress

Change-Id: I0cef1bd4c97d32c005045ae47f0ce3464ed98899
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
diff --git a/lmkd/lmkd.c b/lmkd/lmkd.c
index 5cfa2c8..87e674a 100644
--- a/lmkd/lmkd.c
+++ b/lmkd/lmkd.c
@@ -44,8 +44,6 @@
 #define MEMCG_SYSFS_PATH "/dev/memcg/"
 #define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes"
 #define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes"
-#define MEMPRESSURE_WATCH_MEDIUM_LEVEL "medium"
-#define MEMPRESSURE_WATCH_CRITICAL_LEVEL "critical"
 #define ZONEINFO_PATH "/proc/zoneinfo"
 #define LINE_MAX 128
 
@@ -72,13 +70,22 @@
 static int use_inkernel_interface = 1;
 static bool has_inkernel_module;
 
-/* memory pressure level medium event */
-static int mpevfd[2];
-#define CRITICAL_INDEX 1
-#define MEDIUM_INDEX 0
+/* memory pressure levels */
+enum vmpressure_level {
+    VMPRESS_LEVEL_LOW = 0,
+    VMPRESS_LEVEL_MEDIUM,
+    VMPRESS_LEVEL_CRITICAL,
+    VMPRESS_LEVEL_COUNT
+};
 
-static int medium_oomadj;
-static int critical_oomadj;
+static const char *level_name[] = {
+    "low",
+    "medium",
+    "critical"
+};
+
+static int level_oomadj[VMPRESS_LEVEL_COUNT];
+static int mpevfd[VMPRESS_LEVEL_COUNT];
 static bool debug_process_killing;
 static bool enable_pressure_upgrade;
 static int64_t upgrade_pressure;
@@ -90,8 +97,8 @@
 static int ctrl_dfd = -1;
 static int ctrl_dfd_reopened; /* did we reopen ctrl conn on this loop? */
 
-/* 2 memory pressure levels, 1 ctrl listen socket, 1 ctrl data socket */
-#define MAX_EPOLL_EVENTS 4
+/* 3 memory pressure levels, 1 ctrl listen socket, 1 ctrl data socket */
+#define MAX_EPOLL_EVENTS 5
 static int epollfd;
 static int maxevents;
 
@@ -226,7 +233,7 @@
     return 0;
 }
 
-static void writefilestring(char *path, char *s) {
+static void writefilestring(const char *path, char *s) {
     int fd = open(path, O_WRONLY | O_CLOEXEC);
     int len = strlen(s);
     int ret;
@@ -587,7 +594,8 @@
 }
 
 /* Kill one process specified by procp.  Returns the size of the process killed */
-static int kill_one_process(struct proc* procp, int min_score_adj, bool is_critical) {
+static int kill_one_process(struct proc* procp, int min_score_adj,
+                            enum vmpressure_level level) {
     int pid = procp->pid;
     uid_t uid = procp->uid;
     char *taskname;
@@ -606,12 +614,12 @@
         return -1;
     }
 
+    r = kill(pid, SIGKILL);
     ALOGI(
         "Killing '%s' (%d), uid %d, adj %d\n"
         "   to free %ldkB because system is under %s memory pressure oom_adj %d\n",
-        taskname, pid, uid, procp->oomadj, tasksize * page_k, is_critical ? "critical" : "medium",
-        min_score_adj);
-    r = kill(pid, SIGKILL);
+        taskname, pid, uid, procp->oomadj, tasksize * page_k,
+        level_name[level], min_score_adj);
     pid_remove(pid);
 
     if (r) {
@@ -626,10 +634,10 @@
  * Find a process to kill based on the current (possibly estimated) free memory
  * and cached memory sizes.  Returns the size of the killed processes.
  */
-static int find_and_kill_process(bool is_critical) {
+static int find_and_kill_process(enum vmpressure_level level) {
     int i;
     int killed_size = 0;
-    int min_score_adj = is_critical ? critical_oomadj : medium_oomadj;
+    int min_score_adj = level_oomadj[level];
 
     for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
         struct proc *procp;
@@ -638,7 +646,7 @@
         procp = proc_adj_lru(i);
 
         if (procp) {
-            killed_size = kill_one_process(procp, min_score_adj, is_critical);
+            killed_size = kill_one_process(procp, min_score_adj, level);
             if (killed_size < 0) {
                 goto retry;
             } else {
@@ -674,14 +682,23 @@
     return mem_usage;
 }
 
-static void mp_event_common(bool is_critical) {
+enum vmpressure_level upgrade_level(enum vmpressure_level level) {
+    return (enum vmpressure_level)((level < VMPRESS_LEVEL_CRITICAL) ?
+        level + 1 : level);
+}
+
+enum vmpressure_level downgrade_level(enum vmpressure_level level) {
+    return (enum vmpressure_level)((level > VMPRESS_LEVEL_LOW) ?
+        level - 1 : level);
+}
+
+static void mp_event_common(enum vmpressure_level level) {
     int ret;
     unsigned long long evcount;
-    int index = is_critical ? CRITICAL_INDEX : MEDIUM_INDEX;
     int64_t mem_usage, memsw_usage;
     int64_t mem_pressure;
 
-    ret = read(mpevfd[index], &evcount, sizeof(evcount));
+    ret = read(mpevfd[level], &evcount, sizeof(evcount));
     if (ret < 0)
         ALOGE("Error reading memory pressure event fd; errno=%d",
               errno);
@@ -689,18 +706,19 @@
     mem_usage = get_memory_usage(MEMCG_MEMORY_USAGE);
     memsw_usage = get_memory_usage(MEMCG_MEMORYSW_USAGE);
     if (memsw_usage < 0 || mem_usage < 0) {
-        find_and_kill_process(is_critical);
-        return;
+        goto do_kill;
     }
 
     // Calculate percent for swappinness.
     mem_pressure = (mem_usage * 100) / memsw_usage;
 
-    if (enable_pressure_upgrade && !is_critical) {
+    if (enable_pressure_upgrade && level != VMPRESS_LEVEL_CRITICAL) {
         // We are swapping too much.
         if (mem_pressure < upgrade_pressure) {
-            ALOGI("Event upgraded to critical.");
-            is_critical = true;
+            level = upgrade_level(level);
+            if (debug_process_killing) {
+                ALOGI("Event upgraded to %s", level_name[level]);
+            }
         }
     }
 
@@ -708,41 +726,51 @@
     // kill any process, since enough memory is available.
     if (mem_pressure > downgrade_pressure) {
         if (debug_process_killing) {
-            ALOGI("Ignore %s memory pressure", is_critical ? "critical" : "medium");
+            ALOGI("Ignore %s memory pressure", level_name[level]);
         }
         return;
-    } else if (is_critical && mem_pressure > upgrade_pressure) {
+    } else if (level == VMPRESS_LEVEL_CRITICAL &&
+               mem_pressure > upgrade_pressure) {
         if (debug_process_killing) {
             ALOGI("Downgrade critical memory pressure");
         }
-        // Downgrade event to medium, since enough memory available.
-        is_critical = false;
+        // Downgrade event, since enough memory available.
+        level = downgrade_level(level);
     }
 
-    if (find_and_kill_process(is_critical) == 0) {
+do_kill:
+    if (find_and_kill_process(level) == 0) {
         if (debug_process_killing) {
             ALOGI("Nothing to kill");
         }
     }
 }
 
-static void mp_event(uint32_t events __unused) {
-    mp_event_common(false);
+static void mp_event_low(uint32_t events __unused) {
+    mp_event_common(VMPRESS_LEVEL_LOW);
+}
+
+static void mp_event_medium(uint32_t events __unused) {
+    mp_event_common(VMPRESS_LEVEL_MEDIUM);
 }
 
 static void mp_event_critical(uint32_t events __unused) {
-    mp_event_common(true);
+    mp_event_common(VMPRESS_LEVEL_CRITICAL);
 }
 
-static int init_mp_common(char *levelstr, void *event_handler, bool is_critical)
-{
+static bool init_mp_common(void *event_handler, enum vmpressure_level level) {
     int mpfd;
     int evfd;
     int evctlfd;
     char buf[256];
     struct epoll_event epev;
     int ret;
-    int mpevfd_index = is_critical ? CRITICAL_INDEX : MEDIUM_INDEX;
+    const char *levelstr = level_name[level];
+
+    if (level_oomadj[level] > OOM_SCORE_ADJ_MAX) {
+        ALOGI("%s pressure events are disabled", levelstr);
+        return true;
+    }
 
     mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC);
     if (mpfd < 0) {
@@ -783,8 +811,8 @@
         goto err;
     }
     maxevents++;
-    mpevfd[mpevfd_index] = evfd;
-    return 0;
+    mpevfd[level] = evfd;
+    return true;
 
 err:
     close(evfd);
@@ -793,17 +821,7 @@
 err_open_evctlfd:
     close(mpfd);
 err_open_mpfd:
-    return -1;
-}
-
-static int init_mp_medium()
-{
-    return init_mp_common(MEMPRESSURE_WATCH_MEDIUM_LEVEL, (void *)&mp_event, false);
-}
-
-static int init_mp_critical()
-{
-    return init_mp_common(MEMPRESSURE_WATCH_CRITICAL_LEVEL, (void *)&mp_event_critical, true);
+    return false;
 }
 
 static int init(void) {
@@ -848,10 +866,13 @@
     if (use_inkernel_interface) {
         ALOGI("Using in-kernel low memory killer interface");
     } else {
-        ret = init_mp_medium();
-        ret |= init_mp_critical();
-        if (ret)
+        if (!init_mp_common((void *)&mp_event_low, VMPRESS_LEVEL_LOW) ||
+            !init_mp_common((void *)&mp_event_medium, VMPRESS_LEVEL_MEDIUM) ||
+            !init_mp_common((void *)&mp_event_critical,
+                            VMPRESS_LEVEL_CRITICAL)) {
             ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer");
+            return -1;
+        }
     }
 
     for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) {
@@ -892,8 +913,13 @@
             .sched_priority = 1,
     };
 
-    medium_oomadj = property_get_int32("ro.lmk.medium", 800);
-    critical_oomadj = property_get_int32("ro.lmk.critical", 0);
+    /* By default disable low level vmpressure events */
+    level_oomadj[VMPRESS_LEVEL_LOW] =
+        property_get_int32("ro.lmk.low", OOM_SCORE_ADJ_MAX + 1);
+    level_oomadj[VMPRESS_LEVEL_MEDIUM] =
+        property_get_int32("ro.lmk.medium", 800);
+    level_oomadj[VMPRESS_LEVEL_CRITICAL] =
+        property_get_int32("ro.lmk.critical", 0);
     debug_process_killing = property_get_bool("ro.lmk.debug", false);
     enable_pressure_upgrade = property_get_bool("ro.lmk.critical_upgrade", false);
     upgrade_pressure = (int64_t)property_get_int32("ro.lmk.upgrade_pressure", 50);