mm: per-cgroup memory reclaim stats
Track the following reclaim counters for every memory cgroup: PGREFILL,
PGSCAN, PGSTEAL, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE and PGLAZYFREED.
These values are exposed using the memory.stats interface of cgroup v2.
The meaning of each value is the same as for global counters, available
using /proc/vmstat.
Also, for consistency, rename mem_cgroup_count_vm_event() to
count_memcg_event_mm().
Link: http://lkml.kernel.org/r/1494530183-30808-1-git-send-email-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 558c3a7..5ac2fbd 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -956,6 +956,34 @@
Number of times a shadow node has been reclaimed
+ pgrefill
+
+ Amount of scanned pages (in an active LRU list)
+
+ pgscan
+
+ Amount of scanned pages (in an inactive LRU list)
+
+ pgsteal
+
+ Amount of reclaimed pages
+
+ pgactivate
+
+ Amount of pages moved to the active LRU list
+
+ pgdeactivate
+
+ Amount of pages moved to the inactive LRU lis
+
+ pglazyfree
+
+ Amount of pages postponed to be freed under memory pressure
+
+ pglazyfreed
+
+ Amount of reclaimed lazyfree pages
+
memory.swap.current
A read-only single value file which exists on non-root
diff --git a/fs/dax.c b/fs/dax.c
index 33d05aa0..cd8fced 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1213,7 +1213,7 @@
case IOMAP_MAPPED:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+ count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 0c3905e..6719c0b 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -89,7 +89,7 @@
* -- nyc
*/
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+ count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
return VM_FAULT_MAJOR;
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 899949b..b2a5b1c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -357,6 +357,17 @@
}
struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
+static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
+{
+ struct mem_cgroup_per_node *mz;
+
+ if (mem_cgroup_disabled())
+ return NULL;
+
+ mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ return mz->memcg;
+}
+
/**
* parent_mem_cgroup - find the accounting parent of a memcg
* @memcg: memcg whose parent to find
@@ -546,8 +557,23 @@
gfp_t gfp_mask,
unsigned long *total_scanned);
-static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
- enum vm_event_item idx)
+static inline void count_memcg_events(struct mem_cgroup *memcg,
+ enum vm_event_item idx,
+ unsigned long count)
+{
+ if (!mem_cgroup_disabled())
+ this_cpu_add(memcg->stat->events[idx], count);
+}
+
+static inline void count_memcg_page_event(struct page *page,
+ enum memcg_stat_item idx)
+{
+ if (page->mem_cgroup)
+ count_memcg_events(page->mem_cgroup, idx, 1);
+}
+
+static inline void count_memcg_event_mm(struct mm_struct *mm,
+ enum vm_event_item idx)
{
struct mem_cgroup *memcg;
@@ -675,6 +701,11 @@
return NULL;
}
+static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
+{
+ return NULL;
+}
+
static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
{
return true;
@@ -789,8 +820,19 @@
{
}
+static inline void count_memcg_events(struct mem_cgroup *memcg,
+ enum vm_event_item idx,
+ unsigned long count)
+{
+}
+
+static inline void count_memcg_page_event(struct page *page,
+ enum memcg_stat_item idx)
+{
+}
+
static inline
-void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
+void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
}
#endif /* CONFIG_MEMCG */
diff --git a/mm/filemap.c b/mm/filemap.c
index aea58e9..2e906ef 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2265,7 +2265,7 @@
/* No page in the page cache at all */
do_sync_mmap_readahead(vmf->vma, ra, file, offset);
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
+ count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
ret = VM_FAULT_MAJOR;
retry_find:
page = find_get_page(mapping, offset);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc51a33..3e2f8cf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5230,6 +5230,16 @@
seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
+ seq_printf(m, "pgrefill %lu\n", events[PGREFILL]);
+ seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] +
+ events[PGSCAN_DIRECT]);
+ seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] +
+ events[PGSTEAL_DIRECT]);
+ seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]);
+ seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]);
+ seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]);
+ seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]);
+
seq_printf(m, "workingset_refault %lu\n",
stat[WORKINGSET_REFAULT]);
seq_printf(m, "workingset_activate %lu\n",
diff --git a/mm/memory.c b/mm/memory.c
index bf3aab1..e31dd97 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2719,7 +2719,7 @@
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
} else if (PageHWPoison(page)) {
/*
* hwpoisoned dirty swapcache pages are kept for killing
@@ -3837,7 +3837,7 @@
__set_current_state(TASK_RUNNING);
count_vm_event(PGFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGFAULT);
+ count_memcg_event_mm(vma->vm_mm, PGFAULT);
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
diff --git a/mm/shmem.c b/mm/shmem.c
index a06f237..9418f5a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1646,8 +1646,7 @@
if (fault_type) {
*fault_type |= VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(charge_mm,
- PGMAJFAULT);
+ count_memcg_event_mm(charge_mm, PGMAJFAULT);
}
/* Here we actually start the io */
page = shmem_swapin(swap, gfp, info, index);
diff --git a/mm/swap.c b/mm/swap.c
index 98d08b4..4f44dbd 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -591,6 +591,7 @@
add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE);
__count_vm_events(PGLAZYFREE, hpage_nr_pages(page));
+ count_memcg_page_event(page, PGLAZYFREE);
update_page_reclaim_stat(lruvec, 1, 0);
}
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index aebb157..7d3c6c5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1294,6 +1294,7 @@
}
count_vm_event(PGLAZYFREED);
+ count_memcg_page_event(page, PGLAZYFREED);
} else if (!mapping || !__remove_mapping(mapping, page, true))
goto keep_locked;
/*
@@ -1323,6 +1324,7 @@
if (!PageMlocked(page)) {
SetPageActive(page);
pgactivate++;
+ count_memcg_page_event(page, PGACTIVATE);
}
keep_locked:
unlock_page(page);
@@ -1762,11 +1764,16 @@
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
reclaim_stat->recent_scanned[file] += nr_taken;
- if (global_reclaim(sc)) {
- if (current_is_kswapd())
+ if (current_is_kswapd()) {
+ if (global_reclaim(sc))
__count_vm_events(PGSCAN_KSWAPD, nr_scanned);
- else
+ count_memcg_events(lruvec_memcg(lruvec), PGSCAN_KSWAPD,
+ nr_scanned);
+ } else {
+ if (global_reclaim(sc))
__count_vm_events(PGSCAN_DIRECT, nr_scanned);
+ count_memcg_events(lruvec_memcg(lruvec), PGSCAN_DIRECT,
+ nr_scanned);
}
spin_unlock_irq(&pgdat->lru_lock);
@@ -1778,11 +1785,16 @@
spin_lock_irq(&pgdat->lru_lock);
- if (global_reclaim(sc)) {
- if (current_is_kswapd())
+ if (current_is_kswapd()) {
+ if (global_reclaim(sc))
__count_vm_events(PGSTEAL_KSWAPD, nr_reclaimed);
- else
+ count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_KSWAPD,
+ nr_reclaimed);
+ } else {
+ if (global_reclaim(sc))
__count_vm_events(PGSTEAL_DIRECT, nr_reclaimed);
+ count_memcg_events(lruvec_memcg(lruvec), PGSTEAL_DIRECT,
+ nr_reclaimed);
}
putback_inactive_pages(lruvec, &page_list);
@@ -1927,8 +1939,11 @@
}
}
- if (!is_active_lru(lru))
+ if (!is_active_lru(lru)) {
__count_vm_events(PGDEACTIVATE, nr_moved);
+ count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
+ nr_moved);
+ }
return nr_moved;
}
@@ -1966,6 +1981,7 @@
reclaim_stat->recent_scanned[file] += nr_taken;
__count_vm_events(PGREFILL, nr_scanned);
+ count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
spin_unlock_irq(&pgdat->lru_lock);