Merge branch 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
"Nothing too interesting. Just a handful of cleanup patches"
* 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
Revert "cgroup: remove redundant variable in cgroup_mount()"
cgroup: remove redundant variable in cgroup_mount()
cgroup: fix missing unlock in cgroup_release_agent()
cgroup: remove CGRP_RELEASABLE flag
perf/cgroup: Remove perf_put_cgroup()
cgroup: remove redundant check in cgroup_ino()
cpuset: simplify proc_cpuset_show()
cgroup: simplify proc_cgroup_show()
cgroup: use a per-cgroup work for release agent
cgroup: remove bogus comments
cgroup: remove redundant code in cgroup_rmdir()
cgroup: remove some useless forward declarations
cgroup: fix a typo in comment.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4c542b9..950100e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -376,37 +376,6 @@
#endif
-#ifdef CONFIG_CGROUPS
-static int cgroup_open(struct inode *inode, struct file *file)
-{
- struct pid *pid = PROC_I(inode)->pid;
- return single_open(file, proc_cgroup_show, pid);
-}
-
-static const struct file_operations proc_cgroup_operations = {
- .open = cgroup_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
-#ifdef CONFIG_PROC_PID_CPUSET
-
-static int cpuset_open(struct inode *inode, struct file *file)
-{
- struct pid *pid = PROC_I(inode)->pid;
- return single_open(file, proc_cpuset_show, pid);
-}
-
-static const struct file_operations proc_cpuset_operations = {
- .open = cpuset_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -2579,10 +2548,10 @@
REG("latency", S_IRUGO, proc_lstats_operations),
#endif
#ifdef CONFIG_PROC_PID_CPUSET
- REG("cpuset", S_IRUGO, proc_cpuset_operations),
+ ONE("cpuset", S_IRUGO, proc_cpuset_show),
#endif
#ifdef CONFIG_CGROUPS
- REG("cgroup", S_IRUGO, proc_cgroup_operations),
+ ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
@@ -2925,10 +2894,10 @@
REG("latency", S_IRUGO, proc_lstats_operations),
#endif
#ifdef CONFIG_PROC_PID_CPUSET
- REG("cpuset", S_IRUGO, proc_cpuset_operations),
+ ONE("cpuset", S_IRUGO, proc_cpuset_show),
#endif
#ifdef CONFIG_CGROUPS
- REG("cgroup", S_IRUGO, proc_cgroup_operations),
+ ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b5223c5..1d51968 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -27,7 +27,6 @@
struct cgroup_root;
struct cgroup_subsys;
-struct inode;
struct cgroup;
extern int cgroup_init_early(void);
@@ -38,7 +37,8 @@
extern int cgroupstats_build(struct cgroupstats *stats,
struct dentry *dentry);
-extern int proc_cgroup_show(struct seq_file *, void *);
+extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk);
/* define the enumeration of all cgroup subsystems */
#define SUBSYS(_x) _x ## _cgrp_id,
@@ -161,11 +161,6 @@
/* bits in struct cgroup flags field */
enum {
- /*
- * Control Group has previously had a child cgroup or a task,
- * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
- */
- CGRP_RELEASABLE,
/* Control Group requires release notifications to userspace */
CGRP_NOTIFY_ON_RELEASE,
/*
@@ -235,13 +230,6 @@
struct list_head e_csets[CGROUP_SUBSYS_COUNT];
/*
- * Linked list running through all cgroups that can
- * potentially be reaped by the release agent. Protected by
- * release_list_lock
- */
- struct list_head release_list;
-
- /*
* list of pidlists, up to two for each namespace (one for procs, one
* for tasks); created on demand.
*/
@@ -250,6 +238,9 @@
/* used to wait for offlining of csses */
wait_queue_head_t offline_waitq;
+
+ /* used to schedule release agent */
+ struct work_struct release_agent_work;
};
#define MAX_CGROUP_ROOT_NAMELEN 64
@@ -536,13 +527,10 @@
return !list_empty(&cgrp->cset_links);
}
-/* returns ino associated with a cgroup, 0 indicates unmounted root */
+/* returns ino associated with a cgroup */
static inline ino_t cgroup_ino(struct cgroup *cgrp)
{
- if (cgrp->kn)
- return cgrp->kn->ino;
- else
- return 0;
+ return cgrp->kn->ino;
}
/* cft/css accessors for cftype->write() operation */
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 6e39c9b..2f073db 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -86,7 +86,8 @@
extern void cpuset_task_status_allowed(struct seq_file *m,
struct task_struct *task);
-extern int proc_cpuset_show(struct seq_file *, void *);
+extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk);
extern int cpuset_mem_spread_node(void);
extern int cpuset_slab_spread_node(void);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3a73f99..cab7dc4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -185,7 +185,6 @@
static struct cftype cgroup_dfl_base_files[];
static struct cftype cgroup_legacy_base_files[];
-static void cgroup_put(struct cgroup *cgrp);
static int rebind_subsystems(struct cgroup_root *dst_root,
unsigned int ss_mask);
static int cgroup_destroy_locked(struct cgroup *cgrp);
@@ -195,7 +194,6 @@
static void kill_css(struct cgroup_subsys_state *css);
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add);
-static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
/* IDR wrappers which synchronize using cgroup_idr_lock */
static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
@@ -331,14 +329,6 @@
return false;
}
-static int cgroup_is_releasable(const struct cgroup *cgrp)
-{
- const int bits =
- (1 << CGRP_RELEASABLE) |
- (1 << CGRP_NOTIFY_ON_RELEASE);
- return (cgrp->flags & bits) == bits;
-}
-
static int notify_on_release(const struct cgroup *cgrp)
{
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -394,12 +384,7 @@
; \
else
-/* the list of cgroups eligible for automatic release. Protected by
- * release_list_lock */
-static LIST_HEAD(release_list);
-static DEFINE_RAW_SPINLOCK(release_list_lock);
static void cgroup_release_agent(struct work_struct *work);
-static DECLARE_WORK(release_agent_work, cgroup_release_agent);
static void check_for_release(struct cgroup *cgrp);
/*
@@ -498,7 +483,7 @@
return key;
}
-static void put_css_set_locked(struct css_set *cset, bool taskexit)
+static void put_css_set_locked(struct css_set *cset)
{
struct cgrp_cset_link *link, *tmp_link;
struct cgroup_subsys *ss;
@@ -524,11 +509,7 @@
/* @cgrp can't go away while we're holding css_set_rwsem */
if (list_empty(&cgrp->cset_links)) {
cgroup_update_populated(cgrp, false);
- if (notify_on_release(cgrp)) {
- if (taskexit)
- set_bit(CGRP_RELEASABLE, &cgrp->flags);
- check_for_release(cgrp);
- }
+ check_for_release(cgrp);
}
kfree(link);
@@ -537,7 +518,7 @@
kfree_rcu(cset, rcu_head);
}
-static void put_css_set(struct css_set *cset, bool taskexit)
+static void put_css_set(struct css_set *cset)
{
/*
* Ensure that the refcount doesn't hit zero while any readers
@@ -548,7 +529,7 @@
return;
down_write(&css_set_rwsem);
- put_css_set_locked(cset, taskexit);
+ put_css_set_locked(cset);
up_write(&css_set_rwsem);
}
@@ -969,14 +950,6 @@
* knows that the cgroup won't be removed, as cgroup_rmdir()
* needs that mutex.
*
- * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
- * (usually) take cgroup_mutex. These are the two most performance
- * critical pieces of code here. The exception occurs on cgroup_exit(),
- * when a task in a notify_on_release cgroup exits. Then cgroup_mutex
- * is taken, and if the cgroup count is zero, a usermode call made
- * to the release agent with the name of the cgroup (path relative to
- * the root of cgroup file system) as the argument.
- *
* A cgroup can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cgroups is empty. Since all
* tasks in the system use _some_ cgroup, and since there is always at
@@ -1587,7 +1560,6 @@
INIT_LIST_HEAD(&cgrp->self.sibling);
INIT_LIST_HEAD(&cgrp->self.children);
INIT_LIST_HEAD(&cgrp->cset_links);
- INIT_LIST_HEAD(&cgrp->release_list);
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
cgrp->self.cgroup = cgrp;
@@ -1597,6 +1569,7 @@
INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
init_waitqueue_head(&cgrp->offline_waitq);
+ INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
}
static void init_cgroup_root(struct cgroup_root *root,
@@ -2052,8 +2025,7 @@
* task. As trading it for new_cset is protected by cgroup_mutex,
* we're safe to drop it here; it will be freed under RCU.
*/
- set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
- put_css_set_locked(old_cset, false);
+ put_css_set_locked(old_cset);
}
/**
@@ -2074,7 +2046,7 @@
cset->mg_src_cgrp = NULL;
cset->mg_dst_cset = NULL;
list_del_init(&cset->mg_preload_node);
- put_css_set_locked(cset, false);
+ put_css_set_locked(cset);
}
up_write(&css_set_rwsem);
}
@@ -2168,8 +2140,8 @@
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
list_del_init(&src_cset->mg_preload_node);
- put_css_set(src_cset, false);
- put_css_set(dst_cset, false);
+ put_css_set(src_cset);
+ put_css_set(dst_cset);
continue;
}
@@ -2178,7 +2150,7 @@
if (list_empty(&dst_cset->mg_preload_node))
list_add(&dst_cset->mg_preload_node, &csets);
else
- put_css_set(dst_cset, false);
+ put_css_set(dst_cset);
}
list_splice_tail(&csets, preloaded_csets);
@@ -4173,7 +4145,6 @@
static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
- clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
if (val)
set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
else
@@ -4351,6 +4322,7 @@
/* cgroup free path */
atomic_dec(&cgrp->root->nr_cgrps);
cgroup_pidlist_destroy_all(cgrp);
+ cancel_work_sync(&cgrp->release_agent_work);
if (cgroup_parent(cgrp)) {
/*
@@ -4813,19 +4785,12 @@
for_each_css(css, ssid, cgrp)
kill_css(css);
- /* CSS_ONLINE is clear, remove from ->release_list for the last time */
- raw_spin_lock(&release_list_lock);
- if (!list_empty(&cgrp->release_list))
- list_del_init(&cgrp->release_list);
- raw_spin_unlock(&release_list_lock);
-
/*
* Remove @cgrp directory along with the base files. @cgrp has an
* extra ref on its kn.
*/
kernfs_remove(cgrp->kn);
- set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
check_for_release(cgroup_parent(cgrp));
/* put the base reference */
@@ -4842,13 +4807,10 @@
cgrp = cgroup_kn_lock_live(kn);
if (!cgrp)
return 0;
- cgroup_get(cgrp); /* for @kn->priv clearing */
ret = cgroup_destroy_locked(cgrp);
cgroup_kn_unlock(kn);
-
- cgroup_put(cgrp);
return ret;
}
@@ -5052,12 +5014,9 @@
* - Print task's cgroup paths into seq_file, one line for each hierarchy
* - Used for /proc/<pid>/cgroup.
*/
-
-/* TODO: Use a proper seq_file iterator */
-int proc_cgroup_show(struct seq_file *m, void *v)
+int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk)
{
- struct pid *pid;
- struct task_struct *tsk;
char *buf, *path;
int retval;
struct cgroup_root *root;
@@ -5067,14 +5026,6 @@
if (!buf)
goto out;
- retval = -ESRCH;
- pid = m->private;
- tsk = get_pid_task(pid, PIDTYPE_PID);
- if (!tsk)
- goto out_free;
-
- retval = 0;
-
mutex_lock(&cgroup_mutex);
down_read(&css_set_rwsem);
@@ -5104,11 +5055,10 @@
seq_putc(m, '\n');
}
+ retval = 0;
out_unlock:
up_read(&css_set_rwsem);
mutex_unlock(&cgroup_mutex);
- put_task_struct(tsk);
-out_free:
kfree(buf);
out:
return retval;
@@ -5179,7 +5129,7 @@
int i;
/*
- * This may race against cgroup_enable_task_cg_links(). As that
+ * This may race against cgroup_enable_task_cg_lists(). As that
* function sets use_task_css_set_links before grabbing
* tasklist_lock and we just went through tasklist_lock to add
* @child, it's guaranteed that either we see the set
@@ -5194,7 +5144,7 @@
* when implementing operations which need to migrate all tasks of
* a cgroup to another.
*
- * Note that if we lose to cgroup_enable_task_cg_links(), @child
+ * Note that if we lose to cgroup_enable_task_cg_lists(), @child
* will remain in init_css_set. This is safe because all tasks are
* in the init_css_set before cg_links is enabled and there's no
* operation which transfers all tasks out of init_css_set.
@@ -5278,30 +5228,14 @@
}
if (put_cset)
- put_css_set(cset, true);
+ put_css_set(cset);
}
static void check_for_release(struct cgroup *cgrp)
{
- if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
- !css_has_online_children(&cgrp->self)) {
- /*
- * Control Group is currently removeable. If it's not
- * already queued for a userspace notification, queue
- * it now
- */
- int need_schedule_work = 0;
-
- raw_spin_lock(&release_list_lock);
- if (!cgroup_is_dead(cgrp) &&
- list_empty(&cgrp->release_list)) {
- list_add(&cgrp->release_list, &release_list);
- need_schedule_work = 1;
- }
- raw_spin_unlock(&release_list_lock);
- if (need_schedule_work)
- schedule_work(&release_agent_work);
- }
+ if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) &&
+ !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
+ schedule_work(&cgrp->release_agent_work);
}
/*
@@ -5329,52 +5263,39 @@
*/
static void cgroup_release_agent(struct work_struct *work)
{
- BUG_ON(work != &release_agent_work);
+ struct cgroup *cgrp =
+ container_of(work, struct cgroup, release_agent_work);
+ char *pathbuf = NULL, *agentbuf = NULL, *path;
+ char *argv[3], *envp[3];
+
mutex_lock(&cgroup_mutex);
- raw_spin_lock(&release_list_lock);
- while (!list_empty(&release_list)) {
- char *argv[3], *envp[3];
- int i;
- char *pathbuf = NULL, *agentbuf = NULL, *path;
- struct cgroup *cgrp = list_entry(release_list.next,
- struct cgroup,
- release_list);
- list_del_init(&cgrp->release_list);
- raw_spin_unlock(&release_list_lock);
- pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
- if (!pathbuf)
- goto continue_free;
- path = cgroup_path(cgrp, pathbuf, PATH_MAX);
- if (!path)
- goto continue_free;
- agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
- if (!agentbuf)
- goto continue_free;
- i = 0;
- argv[i++] = agentbuf;
- argv[i++] = path;
- argv[i] = NULL;
+ pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+ agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+ if (!pathbuf || !agentbuf)
+ goto out;
- i = 0;
- /* minimal command environment */
- envp[i++] = "HOME=/";
- envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
- envp[i] = NULL;
+ path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+ if (!path)
+ goto out;
- /* Drop the lock while we invoke the usermode helper,
- * since the exec could involve hitting disk and hence
- * be a slow process */
- mutex_unlock(&cgroup_mutex);
- call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
- mutex_lock(&cgroup_mutex);
- continue_free:
- kfree(pathbuf);
- kfree(agentbuf);
- raw_spin_lock(&release_list_lock);
- }
- raw_spin_unlock(&release_list_lock);
+ argv[0] = agentbuf;
+ argv[1] = path;
+ argv[2] = NULL;
+
+ /* minimal command environment */
+ envp[0] = "HOME=/";
+ envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+ envp[2] = NULL;
+
mutex_unlock(&cgroup_mutex);
+ call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+ goto out_free;
+out:
+ mutex_unlock(&cgroup_mutex);
+out_free:
+ kfree(agentbuf);
+ kfree(pathbuf);
}
static int __init cgroup_disable(char *str)
@@ -5562,7 +5483,8 @@
static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
- return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
+ return (!cgroup_has_tasks(css->cgroup) &&
+ !css_has_online_children(&css->cgroup->self));
}
static struct cftype debug_files[] = {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 52cb04c..1f107c7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2730,10 +2730,9 @@
* and we take cpuset_mutex, keeping cpuset_attach() from changing it
* anyway.
*/
-int proc_cpuset_show(struct seq_file *m, void *unused_v)
+int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk)
{
- struct pid *pid;
- struct task_struct *tsk;
char *buf, *p;
struct cgroup_subsys_state *css;
int retval;
@@ -2743,24 +2742,16 @@
if (!buf)
goto out;
- retval = -ESRCH;
- pid = m->private;
- tsk = get_pid_task(pid, PIDTYPE_PID);
- if (!tsk)
- goto out_free;
-
retval = -ENAMETOOLONG;
rcu_read_lock();
css = task_css(tsk, cpuset_cgrp_id);
p = cgroup_path(css->cgroup, buf, PATH_MAX);
rcu_read_unlock();
if (!p)
- goto out_put_task;
+ goto out_free;
seq_puts(m, p);
seq_putc(m, '\n');
retval = 0;
-out_put_task:
- put_task_struct(tsk);
out_free:
kfree(buf);
out:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 963bf13..b1c6635 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -392,14 +392,9 @@
event->cgrp->css.cgroup);
}
-static inline void perf_put_cgroup(struct perf_event *event)
-{
- css_put(&event->cgrp->css);
-}
-
static inline void perf_detach_cgroup(struct perf_event *event)
{
- perf_put_cgroup(event);
+ css_put(&event->cgrp->css);
event->cgrp = NULL;
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 44c6bd2..8639f6b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -148,7 +148,7 @@
ino = cgroup_ino(css->cgroup);
css_put(css);
- if (!ino || ino != hwpoison_filter_memcg)
+ if (ino != hwpoison_filter_memcg)
return -EINVAL;
return 0;