Blame - kernel/bpf/cgroup.c - kernel/msm-4.9

blob: 54c47a93e86e76e692f8e79e478c75eb61a59052 [file] [log] [blame]

Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	1	/*
				2	* Functions to manage eBPF programs attached to cgroups
				3	*
				4	* Copyright (c) 2016 Daniel Mack
				5	*
				6	* This file is subject to the terms and conditions of version 2 of the GNU
				7	* General Public License. See the file COPYING in the main directory of the
				8	* Linux distribution for more details.
				9	*/
				10
				11	#include <linux/kernel.h>
				12	#include <linux/atomic.h>
				13	#include <linux/cgroup.h>
				14	#include <linux/slab.h>
				15	#include <linux/bpf.h>
				16	#include <linux/bpf-cgroup.h>
				17	#include <net/sock.h>
				18
				19	DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
				20	EXPORT_SYMBOL(cgroup_bpf_enabled_key);
				21
				22	/**
				23	* cgroup_bpf_put() - put references of all bpf programs
				24	* @cgrp: the cgroup to modify
				25	*/
				26	void cgroup_bpf_put(struct cgroup *cgrp)
				27	{
				28	unsigned int type;
				29
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	30	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
				31	struct list_head *progs = &cgrp->bpf.progs[type];
				32	struct bpf_prog_list pl, tmp;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	33
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	34	list_for_each_entry_safe(pl, tmp, progs, node) {
				35	list_del(&pl->node);
				36	bpf_prog_put(pl->prog);
				37	kfree(pl);
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	38	static_branch_dec(&cgroup_bpf_enabled_key);
				39	}
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	40	bpf_prog_array_free(cgrp->bpf.effective[type]);
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	41	}
				42	}
				43
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	44	/* count number of elements in the list.
				45	* it's slow but the list cannot be long
				46	*/
				47	static u32 prog_list_length(struct list_head *head)
				48	{
				49	struct bpf_prog_list *pl;
				50	u32 cnt = 0;
				51
				52	list_for_each_entry(pl, head, node) {
				53	if (!pl->prog)
				54	continue;
				55	cnt++;
				56	}
				57	return cnt;
				58	}
				59
				60	/* if parent has non-overridable prog attached,
				61	* disallow attaching new programs to the descendent cgroup.
				62	* if parent has overridable or multi-prog, allow attaching
				63	*/
				64	static bool hierarchy_allows_attach(struct cgroup *cgrp,
				65	enum bpf_attach_type type,
				66	u32 new_flags)
				67	{
				68	struct cgroup *p;
				69
				70	p = cgroup_parent(cgrp);
				71	if (!p)
				72	return true;
				73	do {
				74	u32 flags = p->bpf.flags[type];
				75	u32 cnt;
				76
				77	if (flags & BPF_F_ALLOW_MULTI)
				78	return true;
				79	cnt = prog_list_length(&p->bpf.progs[type]);
				80	WARN_ON_ONCE(cnt > 1);
				81	if (cnt == 1)
				82	return !!(flags & BPF_F_ALLOW_OVERRIDE);
				83	p = cgroup_parent(p);
				84	} while (p);
				85	return true;
				86	}
				87
				88	/* compute a chain of effective programs for a given cgroup:
				89	* start from the list of programs in this cgroup and add
				90	* all parent programs.
				91	* Note that parent's F_ALLOW_OVERRIDE-type program is yielding
				92	* to programs in this cgroup
				93	*/
				94	static int compute_effective_progs(struct cgroup *cgrp,
				95	enum bpf_attach_type type,
				96	struct bpf_prog_array __rcu **array)
				97	{
				98	struct bpf_prog_array __rcu *progs;
				99	struct bpf_prog_list *pl;
				100	struct cgroup *p = cgrp;
				101	int cnt = 0;
				102
				103	/* count number of effective programs by walking parents */
				104	do {
				105	if (cnt == 0 \|\| (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				106	cnt += prog_list_length(&p->bpf.progs[type]);
				107	p = cgroup_parent(p);
				108	} while (p);
				109
				110	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
				111	if (!progs)
				112	return -ENOMEM;
				113
				114	/* populate the array with effective progs */
				115	cnt = 0;
				116	p = cgrp;
				117	do {
				118	if (cnt == 0 \|\| (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				119	list_for_each_entry(pl,
				120	&p->bpf.progs[type], node) {
				121	if (!pl->prog)
				122	continue;
				123	rcu_dereference_protected(progs, 1)->
				124	progs[cnt++] = pl->prog;
				125	}
				126	p = cgroup_parent(p);
				127	} while (p);
				128
				129	*array = progs;
				130	return 0;
				131	}
				132
				133	static void activate_effective_progs(struct cgroup *cgrp,
				134	enum bpf_attach_type type,
				135	struct bpf_prog_array __rcu *array)
				136	{
				137	struct bpf_prog_array __rcu *old_array;
				138
				139	old_array = xchg(&cgrp->bpf.effective[type], array);
				140	/* free prog array after grace period, since __cgroup_bpf_run_*()
				141	* might be still walking the array
				142	*/
				143	bpf_prog_array_free(old_array);
				144	}
				145
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	146	/**
				147	* cgroup_bpf_inherit() - inherit effective programs from parent
				148	* @cgrp: the cgroup to modify
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	149	*/
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	150	int cgroup_bpf_inherit(struct cgroup *cgrp)
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	151	{
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	152	/* has to use marco instead of const int, since compiler thinks
				153	* that array below is variable length
				154	*/
				155	#define NR ARRAY_SIZE(cgrp->bpf.effective)
				156	struct bpf_prog_array __rcu *arrays[NR] = {};
				157	int i;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	158
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	159	for (i = 0; i < NR; i++)
				160	INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	161
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	162	for (i = 0; i < NR; i++)
				163	if (compute_effective_progs(cgrp, i, &arrays[i]))
				164	goto cleanup;
				165
				166	for (i = 0; i < NR; i++)
				167	activate_effective_progs(cgrp, i, arrays[i]);
				168
				169	return 0;
				170	cleanup:
				171	for (i = 0; i < NR; i++)
				172	bpf_prog_array_free(arrays[i]);
				173	return -ENOMEM;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	174	}
				175
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	176	#define BPF_CGROUP_MAX_PROGS 64
				177
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	178	/**
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	179	* __cgroup_bpf_attach() - Attach the program to a cgroup, and
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	180	* propagate the change to descendants
				181	* @cgrp: The cgroup which descendants to traverse
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	182	* @prog: A program to attach
				183	* @type: Type of attach operation
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	184	*
				185	* Must be called with cgroup_mutex held.
				186	*/
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	187	int __cgroup_bpf_attach(struct cgroup cgrp, struct bpf_prog prog,
				188	enum bpf_attach_type type, u32 flags)
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	189	{
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	190	struct list_head *progs = &cgrp->bpf.progs[type];
				191	struct bpf_prog *old_prog = NULL;
				192	struct cgroup_subsys_state *css;
				193	struct bpf_prog_list *pl;
				194	bool pl_was_allocated;
				195	u32 old_flags;
				196	int err;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	197
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	198	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
				199	/* invalid combination */
				200	return -EINVAL;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	201
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	202	if (!hierarchy_allows_attach(cgrp, type, flags))
				203	return -EPERM;
				204
				205	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
				206	/* Disallow attaching non-overridable on top
				207	* of existing overridable in this cgroup.
				208	* Disallow attaching multi-prog if overridable or none
Alexei Starovoitov	1ee2b4b	2017-02-10 20:28:24 -0800	[diff] [blame]	209	*/
				210	return -EPERM;
				211
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	212	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
				213	return -E2BIG;
Alexei Starovoitov	1ee2b4b	2017-02-10 20:28:24 -0800	[diff] [blame]	214
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	215	if (flags & BPF_F_ALLOW_MULTI) {
				216	list_for_each_entry(pl, progs, node)
				217	if (pl->prog == prog)
				218	/* disallow attaching the same prog twice */
				219	return -EINVAL;
Alexei Starovoitov	1ee2b4b	2017-02-10 20:28:24 -0800	[diff] [blame]	220
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	221	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
				222	if (!pl)
				223	return -ENOMEM;
				224	pl_was_allocated = true;
				225	pl->prog = prog;
				226	list_add_tail(&pl->node, progs);
				227	} else {
				228	if (list_empty(progs)) {
				229	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
				230	if (!pl)
				231	return -ENOMEM;
				232	pl_was_allocated = true;
				233	list_add_tail(&pl->node, progs);
Alexei Starovoitov	1ee2b4b	2017-02-10 20:28:24 -0800	[diff] [blame]	234	} else {
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	235	pl = list_first_entry(progs, typeof(*pl), node);
				236	old_prog = pl->prog;
				237	pl_was_allocated = false;
Alexei Starovoitov	1ee2b4b	2017-02-10 20:28:24 -0800	[diff] [blame]	238	}
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	239	pl->prog = prog;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	240	}
				241
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	242	old_flags = cgrp->bpf.flags[type];
				243	cgrp->bpf.flags[type] = flags;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	244
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	245	/* allocate and recompute effective prog arrays */
				246	css_for_each_descendant_pre(css, &cgrp->self) {
				247	struct cgroup *desc = container_of(css, struct cgroup, self);
				248
				249	err = compute_effective_progs(desc, type, &desc->bpf.inactive);
				250	if (err)
				251	goto cleanup;
				252	}
				253
				254	/* all allocations were successful. Activate all prog arrays */
				255	css_for_each_descendant_pre(css, &cgrp->self) {
				256	struct cgroup *desc = container_of(css, struct cgroup, self);
				257
				258	activate_effective_progs(desc, type, desc->bpf.inactive);
				259	desc->bpf.inactive = NULL;
				260	}
				261
				262	static_branch_inc(&cgroup_bpf_enabled_key);
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	263	if (old_prog) {
				264	bpf_prog_put(old_prog);
				265	static_branch_dec(&cgroup_bpf_enabled_key);
				266	}
Alexei Starovoitov	1ee2b4b	2017-02-10 20:28:24 -0800	[diff] [blame]	267	return 0;
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	268
				269	cleanup:
				270	/* oom while computing effective. Free all computed effective arrays
				271	* since they were not activated
				272	*/
				273	css_for_each_descendant_pre(css, &cgrp->self) {
				274	struct cgroup *desc = container_of(css, struct cgroup, self);
				275
				276	bpf_prog_array_free(desc->bpf.inactive);
				277	desc->bpf.inactive = NULL;
				278	}
				279
				280	/* and cleanup the prog list */
				281	pl->prog = old_prog;
				282	if (pl_was_allocated) {
				283	list_del(&pl->node);
				284	kfree(pl);
				285	}
				286	return err;
				287	}
				288
				289	/**
				290	* __cgroup_bpf_detach() - Detach the program from a cgroup, and
				291	* propagate the change to descendants
				292	* @cgrp: The cgroup which descendants to traverse
				293	* @prog: A program to detach or NULL
				294	* @type: Type of detach operation
				295	*
				296	* Must be called with cgroup_mutex held.
				297	*/
				298	int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
				299	enum bpf_attach_type type, u32 unused_flags)
				300	{
				301	struct list_head *progs = &cgrp->bpf.progs[type];
				302	u32 flags = cgrp->bpf.flags[type];
				303	struct bpf_prog *old_prog = NULL;
				304	struct cgroup_subsys_state *css;
				305	struct bpf_prog_list *pl;
				306	int err;
				307
				308	if (flags & BPF_F_ALLOW_MULTI) {
				309	if (!prog)
				310	/* to detach MULTI prog the user has to specify valid FD
				311	* of the program to be detached
				312	*/
				313	return -EINVAL;
				314	} else {
				315	if (list_empty(progs))
				316	/* report error when trying to detach and nothing is attached */
				317	return -ENOENT;
				318	}
				319
				320	if (flags & BPF_F_ALLOW_MULTI) {
				321	/* find the prog and detach it */
				322	list_for_each_entry(pl, progs, node) {
				323	if (pl->prog != prog)
				324	continue;
				325	old_prog = prog;
				326	/* mark it deleted, so it's ignored while
				327	* recomputing effective
				328	*/
				329	pl->prog = NULL;
				330	break;
				331	}
				332	if (!old_prog)
				333	return -ENOENT;
				334	} else {
				335	/* to maintain backward compatibility NONE and OVERRIDE cgroups
				336	* allow detaching with invalid FD (prog==NULL)
				337	*/
				338	pl = list_first_entry(progs, typeof(*pl), node);
				339	old_prog = pl->prog;
				340	pl->prog = NULL;
				341	}
				342
				343	/* allocate and recompute effective prog arrays */
				344	css_for_each_descendant_pre(css, &cgrp->self) {
				345	struct cgroup *desc = container_of(css, struct cgroup, self);
				346
				347	err = compute_effective_progs(desc, type, &desc->bpf.inactive);
				348	if (err)
				349	goto cleanup;
				350	}
				351
				352	/* all allocations were successful. Activate all prog arrays */
				353	css_for_each_descendant_pre(css, &cgrp->self) {
				354	struct cgroup *desc = container_of(css, struct cgroup, self);
				355
				356	activate_effective_progs(desc, type, desc->bpf.inactive);
				357	desc->bpf.inactive = NULL;
				358	}
				359
				360	/* now can actually delete it from this cgroup list */
				361	list_del(&pl->node);
				362	kfree(pl);
				363	if (list_empty(progs))
				364	/* last program was detached, reset flags to zero */
				365	cgrp->bpf.flags[type] = 0;
				366
				367	bpf_prog_put(old_prog);
				368	static_branch_dec(&cgroup_bpf_enabled_key);
				369	return 0;
				370
				371	cleanup:
				372	/* oom while computing effective. Free all computed effective arrays
				373	* since they were not activated
				374	*/
				375	css_for_each_descendant_pre(css, &cgrp->self) {
				376	struct cgroup *desc = container_of(css, struct cgroup, self);
				377
				378	bpf_prog_array_free(desc->bpf.inactive);
				379	desc->bpf.inactive = NULL;
				380	}
				381
				382	/* and restore back old_prog */
				383	pl->prog = old_prog;
				384	return err;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	385	}
				386
				387	/**
				388	* __cgroup_bpf_run_filter() - Run a program for packet filtering
Willem de Bruijn	2c1289c	2017-04-11 14:08:08 -0400	[diff] [blame]	389	* @sk: The socket sending or receiving traffic
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	390	* @skb: The skb that is being sent or received
				391	* @type: The type of program to be exectuted
				392	*
				393	* If no socket is passed, or the socket is not of type INET or INET6,
				394	* this function does nothing and returns 0.
				395	*
				396	* The program type passed in via @type must be suitable for network
				397	* filtering. No further check is performed to assert that.
				398	*
				399	* This function will return %-EPERM if any if an attached program was found
				400	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				401	*/
				402	int __cgroup_bpf_run_filter(struct sock *sk,
				403	struct sk_buff *skb,
				404	enum bpf_attach_type type)
				405	{
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	406	unsigned int offset = skb->data - skb_network_header(skb);
				407	struct sock *save_sk;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	408	struct cgroup *cgrp;
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	409	int ret;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	410
				411	if (!sk \|\| !sk_fullsock(sk))
				412	return 0;
				413
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	414	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	415	return 0;
				416
				417	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov	148f111	2017-10-02 22:50:21 -0700	[diff] [blame]	418	save_sk = skb->sk;
				419	skb->sk = sk;
				420	__skb_push(skb, offset);
				421	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
				422	bpf_prog_run_save_cb);
				423	__skb_pull(skb, offset);
				424	skb->sk = save_sk;
				425	return ret == 1 ? 0 : -EPERM;
Daniel Mack	f791c42	2016-11-23 16:52:26 +0100	[diff] [blame]	426	}
				427	EXPORT_SYMBOL(__cgroup_bpf_run_filter);