mm: memcontrol: take a css reference for each charged page
Charges currently pin the css indirectly by playing tricks during
css_offline(): user pages stall the offlining process until all of them
have been reparented, whereas kmemcg acquires a keep-alive reference if
outstanding kernel pages are detected at that point.
In preparation for removing all this complexity, make the pinning explicit
and acquire a css references for every charged page.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1d51968..9f96b25 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -113,6 +113,19 @@
}
/**
+ * css_get_many - obtain references on the specified css
+ * @css: target css
+ * @n: number of references to get
+ *
+ * The caller must already have a reference.
+ */
+static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_get_many(&css->refcnt, n);
+}
+
+/**
* css_tryget - try to obtain a reference on the specified css
* @css: target css
*
@@ -159,6 +172,19 @@
percpu_ref_put(&css->refcnt);
}
+/**
+ * css_put_many - put css references
+ * @css: target css
+ * @n: number of references to put
+ *
+ * Put references obtained via css_get() and css_tryget_online().
+ */
+static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+ if (!(css->flags & CSS_NO_REF))
+ percpu_ref_put_many(&css->refcnt, n);
+}
+
/* bits in struct cgroup flags field */
enum {
/* Control Group requires release notifications to userspace */
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 51ce60c..530b249 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -147,6 +147,29 @@
}
/**
+ * percpu_ref_get_many - increment a percpu refcount
+ * @ref: percpu_ref to get
+ * @nr: number of references to get
+ *
+ * Analogous to atomic_long_add().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
+{
+ unsigned long __percpu *percpu_count;
+
+ rcu_read_lock_sched();
+
+ if (__ref_is_percpu(ref, &percpu_count))
+ this_cpu_add(*percpu_count, nr);
+ else
+ atomic_long_add(nr, &ref->count);
+
+ rcu_read_unlock_sched();
+}
+
+/**
* percpu_ref_get - increment a percpu refcount
* @ref: percpu_ref to get
*
@@ -156,16 +179,7 @@
*/
static inline void percpu_ref_get(struct percpu_ref *ref)
{
- unsigned long __percpu *percpu_count;
-
- rcu_read_lock_sched();
-
- if (__ref_is_percpu(ref, &percpu_count))
- this_cpu_inc(*percpu_count);
- else
- atomic_long_inc(&ref->count);
-
- rcu_read_unlock_sched();
+ percpu_ref_get_many(ref, 1);
}
/**
@@ -231,6 +245,30 @@
}
/**
+ * percpu_ref_put_many - decrement a percpu refcount
+ * @ref: percpu_ref to put
+ * @nr: number of references to put
+ *
+ * Decrement the refcount, and if 0, call the release function (which was passed
+ * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
+{
+ unsigned long __percpu *percpu_count;
+
+ rcu_read_lock_sched();
+
+ if (__ref_is_percpu(ref, &percpu_count))
+ this_cpu_sub(*percpu_count, nr);
+ else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
+ ref->release(ref);
+
+ rcu_read_unlock_sched();
+}
+
+/**
* percpu_ref_put - decrement a percpu refcount
* @ref: percpu_ref to put
*
@@ -241,16 +279,7 @@
*/
static inline void percpu_ref_put(struct percpu_ref *ref)
{
- unsigned long __percpu *percpu_count;
-
- rcu_read_lock_sched();
-
- if (__ref_is_percpu(ref, &percpu_count))
- this_cpu_dec(*percpu_count);
- else if (unlikely(atomic_long_dec_and_test(&ref->count)))
- ref->release(ref);
-
- rcu_read_unlock_sched();
+ percpu_ref_put_many(ref, 1);
}
/**
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3cd3bb..f69da2a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2273,6 +2273,7 @@
page_counter_uncharge(&old->memory, stock->nr_pages);
if (do_swap_account)
page_counter_uncharge(&old->memsw, stock->nr_pages);
+ css_put_many(&old->css, stock->nr_pages);
stock->nr_pages = 0;
}
stock->cached = NULL;
@@ -2530,6 +2531,7 @@
return -EINTR;
done_restock:
+ css_get_many(&memcg->css, batch);
if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages);
done:
@@ -2544,6 +2546,8 @@
page_counter_uncharge(&memcg->memory, nr_pages);
if (do_swap_account)
page_counter_uncharge(&memcg->memsw, nr_pages);
+
+ css_put_many(&memcg->css, nr_pages);
}
/*
@@ -2739,6 +2743,7 @@
page_counter_charge(&memcg->memory, nr_pages);
if (do_swap_account)
page_counter_charge(&memcg->memsw, nr_pages);
+ css_get_many(&memcg->css, nr_pages);
ret = 0;
} else if (ret)
page_counter_uncharge(&memcg->kmem, nr_pages);
@@ -2754,8 +2759,10 @@
page_counter_uncharge(&memcg->memsw, nr_pages);
/* Not down to 0 */
- if (page_counter_uncharge(&memcg->kmem, nr_pages))
+ if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
+ css_put_many(&memcg->css, nr_pages);
return;
+ }
/*
* Releases a reference taken in kmem_cgroup_css_offline in case
@@ -2767,6 +2774,8 @@
*/
if (memcg_kmem_test_and_clear_dead(memcg))
css_put(&memcg->css);
+
+ css_put_many(&memcg->css, nr_pages);
}
/*
@@ -3394,10 +3403,13 @@
ret = mem_cgroup_move_account(page, nr_pages,
pc, child, parent);
if (!ret) {
+ if (!mem_cgroup_is_root(parent))
+ css_get_many(&parent->css, nr_pages);
/* Take charge off the local counters */
page_counter_cancel(&child->memory, nr_pages);
if (do_swap_account)
page_counter_cancel(&child->memsw, nr_pages);
+ css_put_many(&child->css, nr_pages);
}
if (nr_pages > 1)
@@ -5767,7 +5779,6 @@
{
struct mem_cgroup *from = mc.from;
struct mem_cgroup *to = mc.to;
- int i;
/* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) {
@@ -5795,8 +5806,7 @@
if (!mem_cgroup_is_root(mc.to))
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
- for (i = 0; i < mc.moved_swap; i++)
- css_put(&mc.from->css);
+ css_put_many(&mc.from->css, mc.moved_swap);
/* we've already done css_get(mc.to) */
mc.moved_swap = 0;
@@ -6343,6 +6353,9 @@
__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);
+
+ if (!mem_cgroup_is_root(memcg))
+ css_put_many(&memcg->css, max(nr_mem, nr_memsw));
}
static void uncharge_list(struct list_head *page_list)