memcg: skip scanning active lists based on individual size
Reclaim decides to skip scanning an active list when the corresponding
inactive list is above a certain size in comparison to leave the assumed
working set alone while there are still enough reclaim candidates around.
The memcg implementation of comparing those lists instead reports whether
the whole memcg is low on the requested type of inactive pages,
considering all nodes and zones.
This can lead to an oversized active list not being scanned because of the
state of the other lists in the memcg, as well as an active list being
scanned while its corresponding inactive list has enough pages.
Not only is this wrong, it's also a scalability hazard, because the global
memory state over all nodes and zones has to be gathered for each memcg
and zone scanned.
Make these calculations purely based on the size of the two LRU lists
that are actually affected by the outcome of the decision.
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f6c4beb..ce7b35d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1104,15 +1104,19 @@
return ret;
}
-static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
- unsigned long active;
- unsigned long inactive;
- unsigned long gb;
unsigned long inactive_ratio;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+ unsigned long inactive;
+ unsigned long active;
+ unsigned long gb;
- inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
- active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
+ inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_INACTIVE_ANON));
+ active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_ACTIVE_ANON));
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
@@ -1120,39 +1124,20 @@
else
inactive_ratio = 1;
- if (present_pages) {
- present_pages[0] = inactive;
- present_pages[1] = active;
- }
-
- return inactive_ratio;
+ return inactive * inactive_ratio < active;
}
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
unsigned long active;
unsigned long inactive;
- unsigned long present_pages[2];
- unsigned long inactive_ratio;
+ int zid = zone_idx(zone);
+ int nid = zone_to_nid(zone);
- inactive_ratio = calc_inactive_ratio(memcg, present_pages);
-
- inactive = present_pages[0];
- active = present_pages[1];
-
- if (inactive * inactive_ratio < active)
- return 1;
-
- return 0;
-}
-
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
-{
- unsigned long active;
- unsigned long inactive;
-
- inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
- active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
+ inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_INACTIVE_FILE));
+ active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_ACTIVE_FILE));
return (active > inactive);
}
@@ -4192,8 +4177,6 @@
}
#ifdef CONFIG_DEBUG_VM
- cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
-
{
int nid, zid;
struct mem_cgroup_per_zone *mz;