Blame - kernel/bpf/core.c - kernel/msm-4.9

blob: f1e8a0def99b0b530f5b5ddf6dc50528cb64509f [file] [log] [blame]

Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	1	/*
				2	* Linux Socket Filter - Kernel level socket filtering
				3	*
				4	* Based on the design of the Berkeley Packet Filter. The new
				5	* internal format has been designed by PLUMgrid:
				6	*
				7	* Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
				8	*
				9	* Authors:
				10	*
				11	* Jay Schulist <jschlst@samba.org>
				12	* Alexei Starovoitov <ast@plumgrid.com>
				13	* Daniel Borkmann <dborkman@redhat.com>
				14	*
				15	* This program is free software; you can redistribute it and/or
				16	* modify it under the terms of the GNU General Public License
				17	* as published by the Free Software Foundation; either version
				18	* 2 of the License, or (at your option) any later version.
				19	*
				20	* Andi Kleen - Fix a few bad bugs and races.
Alexei Starovoitov	4df95ff	2014-07-30 20:34:14 -0700	[diff] [blame]	21	* Kris Katterjohn - Added many additional checks in bpf_check_classic()
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	22	*/
Daniel Borkmann	738cbe7	2014-09-08 08:04:47 +0200	[diff] [blame]	23
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	24	#include <linux/filter.h>
				25	#include <linux/skbuff.h>
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	26	#include <linux/vmalloc.h>
Daniel Borkmann	738cbe7	2014-09-08 08:04:47 +0200	[diff] [blame]	27	#include <linux/random.h>
				28	#include <linux/moduleloader.h>
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	29	#include <linux/bpf.h>
Josh Poimboeuf	39853cc	2016-02-28 22:22:37 -0600	[diff] [blame]	30	#include <linux/frame.h>
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	31
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	32	#include <asm/unaligned.h>
				33
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	34	/* Registers */
				35	#define BPF_R0 regs[BPF_REG_0]
				36	#define BPF_R1 regs[BPF_REG_1]
				37	#define BPF_R2 regs[BPF_REG_2]
				38	#define BPF_R3 regs[BPF_REG_3]
				39	#define BPF_R4 regs[BPF_REG_4]
				40	#define BPF_R5 regs[BPF_REG_5]
				41	#define BPF_R6 regs[BPF_REG_6]
				42	#define BPF_R7 regs[BPF_REG_7]
				43	#define BPF_R8 regs[BPF_REG_8]
				44	#define BPF_R9 regs[BPF_REG_9]
				45	#define BPF_R10 regs[BPF_REG_10]
				46
				47	/* Named registers */
				48	#define DST regs[insn->dst_reg]
				49	#define SRC regs[insn->src_reg]
				50	#define FP regs[BPF_REG_FP]
				51	#define ARG1 regs[BPF_REG_ARG1]
				52	#define CTX regs[BPF_REG_CTX]
				53	#define IMM insn->imm
				54
				55	/* No hurry in this branch
				56	*
				57	* Exported for the bpf jit load helper.
				58	*/
				59	void bpf_internal_load_pointer_neg_helper(const struct sk_buff skb, int k, unsigned int size)
				60	{
				61	u8 *ptr = NULL;
				62
				63	if (k >= SKF_NET_OFF)
				64	ptr = skb_network_header(skb) + k - SKF_NET_OFF;
				65	else if (k >= SKF_LL_OFF)
				66	ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	67
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	68	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
				69	return ptr;
				70
				71	return NULL;
				72	}
				73
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	74	struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
				75	{
				76	gfp_t gfp_flags = GFP_KERNEL \| __GFP_HIGHMEM \| __GFP_ZERO \|
				77	gfp_extra_flags;
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	78	struct bpf_prog_aux *aux;
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	79	struct bpf_prog *fp;
				80
				81	size = round_up(size, PAGE_SIZE);
				82	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
				83	if (fp == NULL)
				84	return NULL;
				85
Daniel Borkmann	a91263d	2015-09-30 01:41:50 +0200	[diff] [blame]	86	kmemcheck_annotate_bitfield(fp, meta);
				87
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	88	aux = kzalloc(sizeof(*aux), GFP_KERNEL \| gfp_extra_flags);
				89	if (aux == NULL) {
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	90	vfree(fp);
				91	return NULL;
				92	}
				93
				94	fp->pages = size / PAGE_SIZE;
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	95	fp->aux = aux;
Daniel Borkmann	e9d8afa	2015-10-29 14:58:08 +0100	[diff] [blame]	96	fp->aux->prog = fp;
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	97
				98	return fp;
				99	}
				100	EXPORT_SYMBOL_GPL(bpf_prog_alloc);
				101
				102	struct bpf_prog bpf_prog_realloc(struct bpf_prog fp_old, unsigned int size,
				103	gfp_t gfp_extra_flags)
				104	{
				105	gfp_t gfp_flags = GFP_KERNEL \| __GFP_HIGHMEM \| __GFP_ZERO \|
				106	gfp_extra_flags;
				107	struct bpf_prog *fp;
				108
				109	BUG_ON(fp_old == NULL);
				110
				111	size = round_up(size, PAGE_SIZE);
				112	if (size <= fp_old->pages * PAGE_SIZE)
				113	return fp_old;
				114
				115	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
				116	if (fp != NULL) {
Daniel Borkmann	a91263d	2015-09-30 01:41:50 +0200	[diff] [blame]	117	kmemcheck_annotate_bitfield(fp, meta);
				118
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	119	memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
				120	fp->pages = size / PAGE_SIZE;
Daniel Borkmann	e9d8afa	2015-10-29 14:58:08 +0100	[diff] [blame]	121	fp->aux->prog = fp;
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	122
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	123	/* We keep fp->aux from fp_old around in the new
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	124	* reallocated structure.
				125	*/
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	126	fp_old->aux = NULL;
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	127	__bpf_prog_free(fp_old);
				128	}
				129
				130	return fp;
				131	}
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	132
				133	void __bpf_prog_free(struct bpf_prog *fp)
				134	{
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	135	kfree(fp->aux);
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	136	vfree(fp);
				137	}
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	138
Daniel Borkmann	c237ee5	2016-05-13 19:08:30 +0200	[diff] [blame]	139	static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
				140	{
				141	return BPF_CLASS(insn->code) == BPF_JMP &&
				142	/* Call and Exit are both special jumps with no
				143	* target inside the BPF instruction image.
				144	*/
				145	BPF_OP(insn->code) != BPF_CALL &&
				146	BPF_OP(insn->code) != BPF_EXIT;
				147	}
				148
				149	static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
				150	{
				151	struct bpf_insn *insn = prog->insnsi;
				152	u32 i, insn_cnt = prog->len;
				153
				154	for (i = 0; i < insn_cnt; i++, insn++) {
				155	if (!bpf_is_jmp_and_has_target(insn))
				156	continue;
				157
				158	/* Adjust offset of jmps if we cross boundaries. */
				159	if (i < pos && i + insn->off + 1 > pos)
				160	insn->off += delta;
				161	else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
				162	insn->off -= delta;
				163	}
				164	}
				165
				166	struct bpf_prog bpf_patch_insn_single(struct bpf_prog prog, u32 off,
				167	const struct bpf_insn *patch, u32 len)
				168	{
				169	u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
				170	struct bpf_prog *prog_adj;
				171
				172	/* Since our patchlet doesn't expand the image, we're done. */
				173	if (insn_delta == 0) {
				174	memcpy(prog->insnsi + off, patch, sizeof(*patch));
				175	return prog;
				176	}
				177
				178	insn_adj_cnt = prog->len + insn_delta;
				179
				180	/* Several new instructions need to be inserted. Make room
				181	* for them. Likely, there's no need for a new allocation as
				182	* last page could have large enough tailroom.
				183	*/
				184	prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
				185	GFP_USER);
				186	if (!prog_adj)
				187	return NULL;
				188
				189	prog_adj->len = insn_adj_cnt;
				190
				191	/* Patching happens in 3 steps:
				192	*
				193	* 1) Move over tail of insnsi from next instruction onwards,
				194	* so we can patch the single target insn with one or more
				195	* new ones (patching is always from 1 to n insns, n > 0).
				196	* 2) Inject new instructions at the target location.
				197	* 3) Adjust branch offsets if necessary.
				198	*/
				199	insn_rest = insn_adj_cnt - off - len;
				200
				201	memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
				202	sizeof(patch) insn_rest);
				203	memcpy(prog_adj->insnsi + off, patch, sizeof(patch) len);
				204
				205	bpf_adj_branches(prog_adj, off, insn_delta);
				206
				207	return prog_adj;
				208	}
				209
Daniel Borkmann	b954d83	2014-09-10 15:01:02 +0200	[diff] [blame]	210	#ifdef CONFIG_BPF_JIT
Daniel Borkmann	738cbe7	2014-09-08 08:04:47 +0200	[diff] [blame]	211	struct bpf_binary_header *
				212	bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
				213	unsigned int alignment,
				214	bpf_jit_fill_hole_t bpf_fill_ill_insns)
				215	{
				216	struct bpf_binary_header *hdr;
				217	unsigned int size, hole, start;
				218
				219	/* Most of BPF filters are really small, but if some of them
				220	* fill a page, allow at least 128 extra bytes to insert a
				221	* random section of illegal instructions.
				222	*/
				223	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
				224	hdr = module_alloc(size);
				225	if (hdr == NULL)
				226	return NULL;
				227
				228	/* Fill space with illegal/arch-dep instructions. */
				229	bpf_fill_ill_insns(hdr, size);
				230
				231	hdr->pages = size / PAGE_SIZE;
				232	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
				233	PAGE_SIZE - sizeof(*hdr));
				234	start = (prandom_u32() % hole) & ~(alignment - 1);
				235
				236	/* Leave a random number of instructions before BPF code. */
				237	*image_ptr = &hdr->image[start];
				238
				239	return hdr;
				240	}
				241
				242	void bpf_jit_binary_free(struct bpf_binary_header *hdr)
				243	{
Rusty Russell	be1f221	2015-01-20 09:07:05 +1030	[diff] [blame]	244	module_memfree(hdr);
Daniel Borkmann	738cbe7	2014-09-08 08:04:47 +0200	[diff] [blame]	245	}
Daniel Borkmann	4f3446b	2016-05-13 19:08:32 +0200	[diff] [blame^]	246
				247	int bpf_jit_harden __read_mostly;
				248
				249	static int bpf_jit_blind_insn(const struct bpf_insn *from,
				250	const struct bpf_insn *aux,
				251	struct bpf_insn *to_buff)
				252	{
				253	struct bpf_insn *to = to_buff;
				254	u32 imm_rnd = prandom_u32();
				255	s16 off;
				256
				257	BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
				258	BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
				259
				260	if (from->imm == 0 &&
				261	(from->code == (BPF_ALU \| BPF_MOV \| BPF_K) \|\|
				262	from->code == (BPF_ALU64 \| BPF_MOV \| BPF_K))) {
				263	*to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
				264	goto out;
				265	}
				266
				267	switch (from->code) {
				268	case BPF_ALU \| BPF_ADD \| BPF_K:
				269	case BPF_ALU \| BPF_SUB \| BPF_K:
				270	case BPF_ALU \| BPF_AND \| BPF_K:
				271	case BPF_ALU \| BPF_OR \| BPF_K:
				272	case BPF_ALU \| BPF_XOR \| BPF_K:
				273	case BPF_ALU \| BPF_MUL \| BPF_K:
				274	case BPF_ALU \| BPF_MOV \| BPF_K:
				275	case BPF_ALU \| BPF_DIV \| BPF_K:
				276	case BPF_ALU \| BPF_MOD \| BPF_K:
				277	*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
				278	*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				279	*to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
				280	break;
				281
				282	case BPF_ALU64 \| BPF_ADD \| BPF_K:
				283	case BPF_ALU64 \| BPF_SUB \| BPF_K:
				284	case BPF_ALU64 \| BPF_AND \| BPF_K:
				285	case BPF_ALU64 \| BPF_OR \| BPF_K:
				286	case BPF_ALU64 \| BPF_XOR \| BPF_K:
				287	case BPF_ALU64 \| BPF_MUL \| BPF_K:
				288	case BPF_ALU64 \| BPF_MOV \| BPF_K:
				289	case BPF_ALU64 \| BPF_DIV \| BPF_K:
				290	case BPF_ALU64 \| BPF_MOD \| BPF_K:
				291	*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
				292	*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				293	*to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
				294	break;
				295
				296	case BPF_JMP \| BPF_JEQ \| BPF_K:
				297	case BPF_JMP \| BPF_JNE \| BPF_K:
				298	case BPF_JMP \| BPF_JGT \| BPF_K:
				299	case BPF_JMP \| BPF_JGE \| BPF_K:
				300	case BPF_JMP \| BPF_JSGT \| BPF_K:
				301	case BPF_JMP \| BPF_JSGE \| BPF_K:
				302	case BPF_JMP \| BPF_JSET \| BPF_K:
				303	/* Accommodate for extra offset in case of a backjump. */
				304	off = from->off;
				305	if (off < 0)
				306	off -= 2;
				307	*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
				308	*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				309	*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
				310	break;
				311
				312	case BPF_LD \| BPF_ABS \| BPF_W:
				313	case BPF_LD \| BPF_ABS \| BPF_H:
				314	case BPF_LD \| BPF_ABS \| BPF_B:
				315	*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
				316	*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				317	*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
				318	break;
				319
				320	case BPF_LD \| BPF_IND \| BPF_W:
				321	case BPF_LD \| BPF_IND \| BPF_H:
				322	case BPF_LD \| BPF_IND \| BPF_B:
				323	*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
				324	*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				325	*to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
				326	*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
				327	break;
				328
				329	case BPF_LD \| BPF_IMM \| BPF_DW:
				330	*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
				331	*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				332	*to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
				333	*to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
				334	break;
				335	case 0: /* Part 2 of BPF_LD \| BPF_IMM \| BPF_DW. */
				336	*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
				337	*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				338	*to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
				339	break;
				340
				341	case BPF_ST \| BPF_MEM \| BPF_DW:
				342	case BPF_ST \| BPF_MEM \| BPF_W:
				343	case BPF_ST \| BPF_MEM \| BPF_H:
				344	case BPF_ST \| BPF_MEM \| BPF_B:
				345	*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
				346	*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
				347	*to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
				348	break;
				349	}
				350	out:
				351	return to - to_buff;
				352	}
				353
				354	static struct bpf_prog bpf_prog_clone_create(struct bpf_prog fp_other,
				355	gfp_t gfp_extra_flags)
				356	{
				357	gfp_t gfp_flags = GFP_KERNEL \| __GFP_HIGHMEM \| __GFP_ZERO \|
				358	gfp_extra_flags;
				359	struct bpf_prog *fp;
				360
				361	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
				362	if (fp != NULL) {
				363	kmemcheck_annotate_bitfield(fp, meta);
				364
				365	/* aux->prog still points to the fp_other one, so
				366	* when promoting the clone to the real program,
				367	* this still needs to be adapted.
				368	*/
				369	memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
				370	}
				371
				372	return fp;
				373	}
				374
				375	static void bpf_prog_clone_free(struct bpf_prog *fp)
				376	{
				377	/* aux was stolen by the other clone, so we cannot free
				378	* it from this path! It will be freed eventually by the
				379	* other program on release.
				380	*
				381	* At this point, we don't need a deferred release since
				382	* clone is guaranteed to not be locked.
				383	*/
				384	fp->aux = NULL;
				385	__bpf_prog_free(fp);
				386	}
				387
				388	void bpf_jit_prog_release_other(struct bpf_prog fp, struct bpf_prog fp_other)
				389	{
				390	/* We have to repoint aux->prog to self, as we don't
				391	* know whether fp here is the clone or the original.
				392	*/
				393	fp->aux->prog = fp;
				394	bpf_prog_clone_free(fp_other);
				395	}
				396
				397	struct bpf_prog bpf_jit_blind_constants(struct bpf_prog prog)
				398	{
				399	struct bpf_insn insn_buff[16], aux[2];
				400	struct bpf_prog clone, tmp;
				401	int insn_delta, insn_cnt;
				402	struct bpf_insn *insn;
				403	int i, rewritten;
				404
				405	if (!bpf_jit_blinding_enabled())
				406	return prog;
				407
				408	clone = bpf_prog_clone_create(prog, GFP_USER);
				409	if (!clone)
				410	return ERR_PTR(-ENOMEM);
				411
				412	insn_cnt = clone->len;
				413	insn = clone->insnsi;
				414
				415	for (i = 0; i < insn_cnt; i++, insn++) {
				416	/* We temporarily need to hold the original ld64 insn
				417	* so that we can still access the first part in the
				418	* second blinding run.
				419	*/
				420	if (insn[0].code == (BPF_LD \| BPF_IMM \| BPF_DW) &&
				421	insn[1].code == 0)
				422	memcpy(aux, insn, sizeof(aux));
				423
				424	rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
				425	if (!rewritten)
				426	continue;
				427
				428	tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
				429	if (!tmp) {
				430	/* Patching may have repointed aux->prog during
				431	* realloc from the original one, so we need to
				432	* fix it up here on error.
				433	*/
				434	bpf_jit_prog_release_other(prog, clone);
				435	return ERR_PTR(-ENOMEM);
				436	}
				437
				438	clone = tmp;
				439	insn_delta = rewritten - 1;
				440
				441	/* Walk new program and skip insns we just inserted. */
				442	insn = clone->insnsi + i + insn_delta;
				443	insn_cnt += insn_delta;
				444	i += insn_delta;
				445	}
				446
				447	return clone;
				448	}
Daniel Borkmann	b954d83	2014-09-10 15:01:02 +0200	[diff] [blame]	449	#endif /* CONFIG_BPF_JIT */
Daniel Borkmann	738cbe7	2014-09-08 08:04:47 +0200	[diff] [blame]	450
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	451	/* Base function for offset calculation. Needs to go into .text section,
				452	* therefore keeping it non-static as well; will also be used by JITs
				453	* anyway later on, so do not let the compiler omit it.
				454	*/
				455	noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
				456	{
				457	return 0;
				458	}
Alexei Starovoitov	4d9c5c5	2015-07-20 20:34:19 -0700	[diff] [blame]	459	EXPORT_SYMBOL_GPL(__bpf_call_base);
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	460
				461	/**
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	462	* __bpf_prog_run - run eBPF program on a given context
				463	* @ctx: is the data we are operating on
				464	* @insn: is the array of eBPF instructions
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	465	*
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	466	* Decode and execute eBPF instructions.
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	467	*/
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	468	static unsigned int __bpf_prog_run(void ctx, const struct bpf_insn insn)
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	469	{
				470	u64 stack[MAX_BPF_STACK / sizeof(u64)];
				471	u64 regs[MAX_BPF_REG], tmp;
				472	static const void *jumptable[256] = {
				473	[0 ... 255] = &&default_label,
				474	/* Now overwrite non-defaults ... */
				475	/* 32 bit ALU operations */
				476	[BPF_ALU \| BPF_ADD \| BPF_X] = &&ALU_ADD_X,
				477	[BPF_ALU \| BPF_ADD \| BPF_K] = &&ALU_ADD_K,
				478	[BPF_ALU \| BPF_SUB \| BPF_X] = &&ALU_SUB_X,
				479	[BPF_ALU \| BPF_SUB \| BPF_K] = &&ALU_SUB_K,
				480	[BPF_ALU \| BPF_AND \| BPF_X] = &&ALU_AND_X,
				481	[BPF_ALU \| BPF_AND \| BPF_K] = &&ALU_AND_K,
				482	[BPF_ALU \| BPF_OR \| BPF_X] = &&ALU_OR_X,
				483	[BPF_ALU \| BPF_OR \| BPF_K] = &&ALU_OR_K,
				484	[BPF_ALU \| BPF_LSH \| BPF_X] = &&ALU_LSH_X,
				485	[BPF_ALU \| BPF_LSH \| BPF_K] = &&ALU_LSH_K,
				486	[BPF_ALU \| BPF_RSH \| BPF_X] = &&ALU_RSH_X,
				487	[BPF_ALU \| BPF_RSH \| BPF_K] = &&ALU_RSH_K,
				488	[BPF_ALU \| BPF_XOR \| BPF_X] = &&ALU_XOR_X,
				489	[BPF_ALU \| BPF_XOR \| BPF_K] = &&ALU_XOR_K,
				490	[BPF_ALU \| BPF_MUL \| BPF_X] = &&ALU_MUL_X,
				491	[BPF_ALU \| BPF_MUL \| BPF_K] = &&ALU_MUL_K,
				492	[BPF_ALU \| BPF_MOV \| BPF_X] = &&ALU_MOV_X,
				493	[BPF_ALU \| BPF_MOV \| BPF_K] = &&ALU_MOV_K,
				494	[BPF_ALU \| BPF_DIV \| BPF_X] = &&ALU_DIV_X,
				495	[BPF_ALU \| BPF_DIV \| BPF_K] = &&ALU_DIV_K,
				496	[BPF_ALU \| BPF_MOD \| BPF_X] = &&ALU_MOD_X,
				497	[BPF_ALU \| BPF_MOD \| BPF_K] = &&ALU_MOD_K,
				498	[BPF_ALU \| BPF_NEG] = &&ALU_NEG,
				499	[BPF_ALU \| BPF_END \| BPF_TO_BE] = &&ALU_END_TO_BE,
				500	[BPF_ALU \| BPF_END \| BPF_TO_LE] = &&ALU_END_TO_LE,
				501	/* 64 bit ALU operations */
				502	[BPF_ALU64 \| BPF_ADD \| BPF_X] = &&ALU64_ADD_X,
				503	[BPF_ALU64 \| BPF_ADD \| BPF_K] = &&ALU64_ADD_K,
				504	[BPF_ALU64 \| BPF_SUB \| BPF_X] = &&ALU64_SUB_X,
				505	[BPF_ALU64 \| BPF_SUB \| BPF_K] = &&ALU64_SUB_K,
				506	[BPF_ALU64 \| BPF_AND \| BPF_X] = &&ALU64_AND_X,
				507	[BPF_ALU64 \| BPF_AND \| BPF_K] = &&ALU64_AND_K,
				508	[BPF_ALU64 \| BPF_OR \| BPF_X] = &&ALU64_OR_X,
				509	[BPF_ALU64 \| BPF_OR \| BPF_K] = &&ALU64_OR_K,
				510	[BPF_ALU64 \| BPF_LSH \| BPF_X] = &&ALU64_LSH_X,
				511	[BPF_ALU64 \| BPF_LSH \| BPF_K] = &&ALU64_LSH_K,
				512	[BPF_ALU64 \| BPF_RSH \| BPF_X] = &&ALU64_RSH_X,
				513	[BPF_ALU64 \| BPF_RSH \| BPF_K] = &&ALU64_RSH_K,
				514	[BPF_ALU64 \| BPF_XOR \| BPF_X] = &&ALU64_XOR_X,
				515	[BPF_ALU64 \| BPF_XOR \| BPF_K] = &&ALU64_XOR_K,
				516	[BPF_ALU64 \| BPF_MUL \| BPF_X] = &&ALU64_MUL_X,
				517	[BPF_ALU64 \| BPF_MUL \| BPF_K] = &&ALU64_MUL_K,
				518	[BPF_ALU64 \| BPF_MOV \| BPF_X] = &&ALU64_MOV_X,
				519	[BPF_ALU64 \| BPF_MOV \| BPF_K] = &&ALU64_MOV_K,
				520	[BPF_ALU64 \| BPF_ARSH \| BPF_X] = &&ALU64_ARSH_X,
				521	[BPF_ALU64 \| BPF_ARSH \| BPF_K] = &&ALU64_ARSH_K,
				522	[BPF_ALU64 \| BPF_DIV \| BPF_X] = &&ALU64_DIV_X,
				523	[BPF_ALU64 \| BPF_DIV \| BPF_K] = &&ALU64_DIV_K,
				524	[BPF_ALU64 \| BPF_MOD \| BPF_X] = &&ALU64_MOD_X,
				525	[BPF_ALU64 \| BPF_MOD \| BPF_K] = &&ALU64_MOD_K,
				526	[BPF_ALU64 \| BPF_NEG] = &&ALU64_NEG,
				527	/* Call instruction */
				528	[BPF_JMP \| BPF_CALL] = &&JMP_CALL,
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	529	[BPF_JMP \| BPF_CALL \| BPF_X] = &&JMP_TAIL_CALL,
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	530	/* Jumps */
				531	[BPF_JMP \| BPF_JA] = &&JMP_JA,
				532	[BPF_JMP \| BPF_JEQ \| BPF_X] = &&JMP_JEQ_X,
				533	[BPF_JMP \| BPF_JEQ \| BPF_K] = &&JMP_JEQ_K,
				534	[BPF_JMP \| BPF_JNE \| BPF_X] = &&JMP_JNE_X,
				535	[BPF_JMP \| BPF_JNE \| BPF_K] = &&JMP_JNE_K,
				536	[BPF_JMP \| BPF_JGT \| BPF_X] = &&JMP_JGT_X,
				537	[BPF_JMP \| BPF_JGT \| BPF_K] = &&JMP_JGT_K,
				538	[BPF_JMP \| BPF_JGE \| BPF_X] = &&JMP_JGE_X,
				539	[BPF_JMP \| BPF_JGE \| BPF_K] = &&JMP_JGE_K,
				540	[BPF_JMP \| BPF_JSGT \| BPF_X] = &&JMP_JSGT_X,
				541	[BPF_JMP \| BPF_JSGT \| BPF_K] = &&JMP_JSGT_K,
				542	[BPF_JMP \| BPF_JSGE \| BPF_X] = &&JMP_JSGE_X,
				543	[BPF_JMP \| BPF_JSGE \| BPF_K] = &&JMP_JSGE_K,
				544	[BPF_JMP \| BPF_JSET \| BPF_X] = &&JMP_JSET_X,
				545	[BPF_JMP \| BPF_JSET \| BPF_K] = &&JMP_JSET_K,
				546	/* Program return */
				547	[BPF_JMP \| BPF_EXIT] = &&JMP_EXIT,
				548	/* Store instructions */
				549	[BPF_STX \| BPF_MEM \| BPF_B] = &&STX_MEM_B,
				550	[BPF_STX \| BPF_MEM \| BPF_H] = &&STX_MEM_H,
				551	[BPF_STX \| BPF_MEM \| BPF_W] = &&STX_MEM_W,
				552	[BPF_STX \| BPF_MEM \| BPF_DW] = &&STX_MEM_DW,
				553	[BPF_STX \| BPF_XADD \| BPF_W] = &&STX_XADD_W,
				554	[BPF_STX \| BPF_XADD \| BPF_DW] = &&STX_XADD_DW,
				555	[BPF_ST \| BPF_MEM \| BPF_B] = &&ST_MEM_B,
				556	[BPF_ST \| BPF_MEM \| BPF_H] = &&ST_MEM_H,
				557	[BPF_ST \| BPF_MEM \| BPF_W] = &&ST_MEM_W,
				558	[BPF_ST \| BPF_MEM \| BPF_DW] = &&ST_MEM_DW,
				559	/* Load instructions */
				560	[BPF_LDX \| BPF_MEM \| BPF_B] = &&LDX_MEM_B,
				561	[BPF_LDX \| BPF_MEM \| BPF_H] = &&LDX_MEM_H,
				562	[BPF_LDX \| BPF_MEM \| BPF_W] = &&LDX_MEM_W,
				563	[BPF_LDX \| BPF_MEM \| BPF_DW] = &&LDX_MEM_DW,
				564	[BPF_LD \| BPF_ABS \| BPF_W] = &&LD_ABS_W,
				565	[BPF_LD \| BPF_ABS \| BPF_H] = &&LD_ABS_H,
				566	[BPF_LD \| BPF_ABS \| BPF_B] = &&LD_ABS_B,
				567	[BPF_LD \| BPF_IND \| BPF_W] = &&LD_IND_W,
				568	[BPF_LD \| BPF_IND \| BPF_H] = &&LD_IND_H,
				569	[BPF_LD \| BPF_IND \| BPF_B] = &&LD_IND_B,
Alexei Starovoitov	02ab695	2014-09-04 22:17:17 -0700	[diff] [blame]	570	[BPF_LD \| BPF_IMM \| BPF_DW] = &&LD_IMM_DW,
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	571	};
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	572	u32 tail_call_cnt = 0;
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	573	void *ptr;
				574	int off;
				575
				576	#define CONT ({ insn++; goto select_insn; })
				577	#define CONT_JMP ({ insn++; goto select_insn; })
				578
				579	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
				580	ARG1 = (u64) (unsigned long) ctx;
				581
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	582	select_insn:
				583	goto *jumptable[insn->code];
				584
				585	/* ALU */
				586	#define ALU(OPCODE, OP) \
				587	ALU64_##OPCODE##_X: \
				588	DST = DST OP SRC; \
				589	CONT; \
				590	ALU_##OPCODE##_X: \
				591	DST = (u32) DST OP (u32) SRC; \
				592	CONT; \
				593	ALU64_##OPCODE##_K: \
				594	DST = DST OP IMM; \
				595	CONT; \
				596	ALU_##OPCODE##_K: \
				597	DST = (u32) DST OP (u32) IMM; \
				598	CONT;
				599
				600	ALU(ADD, +)
				601	ALU(SUB, -)
				602	ALU(AND, &)
				603	ALU(OR, \|)
				604	ALU(LSH, <<)
				605	ALU(RSH, >>)
				606	ALU(XOR, ^)
				607	ALU(MUL, *)
				608	#undef ALU
				609	ALU_NEG:
				610	DST = (u32) -DST;
				611	CONT;
				612	ALU64_NEG:
				613	DST = -DST;
				614	CONT;
				615	ALU_MOV_X:
				616	DST = (u32) SRC;
				617	CONT;
				618	ALU_MOV_K:
				619	DST = (u32) IMM;
				620	CONT;
				621	ALU64_MOV_X:
				622	DST = SRC;
				623	CONT;
				624	ALU64_MOV_K:
				625	DST = IMM;
				626	CONT;
Alexei Starovoitov	02ab695	2014-09-04 22:17:17 -0700	[diff] [blame]	627	LD_IMM_DW:
				628	DST = (u64) (u32) insn[0].imm \| ((u64) (u32) insn[1].imm) << 32;
				629	insn++;
				630	CONT;
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	631	ALU64_ARSH_X:
				632	((s64 ) &DST) >>= SRC;
				633	CONT;
				634	ALU64_ARSH_K:
				635	((s64 ) &DST) >>= IMM;
				636	CONT;
				637	ALU64_MOD_X:
				638	if (unlikely(SRC == 0))
				639	return 0;
Alexei Starovoitov	876a7ae	2015-04-27 14:40:37 -0700	[diff] [blame]	640	div64_u64_rem(DST, SRC, &tmp);
				641	DST = tmp;
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	642	CONT;
				643	ALU_MOD_X:
				644	if (unlikely(SRC == 0))
				645	return 0;
				646	tmp = (u32) DST;
				647	DST = do_div(tmp, (u32) SRC);
				648	CONT;
				649	ALU64_MOD_K:
Alexei Starovoitov	876a7ae	2015-04-27 14:40:37 -0700	[diff] [blame]	650	div64_u64_rem(DST, IMM, &tmp);
				651	DST = tmp;
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	652	CONT;
				653	ALU_MOD_K:
				654	tmp = (u32) DST;
				655	DST = do_div(tmp, (u32) IMM);
				656	CONT;
				657	ALU64_DIV_X:
				658	if (unlikely(SRC == 0))
				659	return 0;
Alexei Starovoitov	876a7ae	2015-04-27 14:40:37 -0700	[diff] [blame]	660	DST = div64_u64(DST, SRC);
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	661	CONT;
				662	ALU_DIV_X:
				663	if (unlikely(SRC == 0))
				664	return 0;
				665	tmp = (u32) DST;
				666	do_div(tmp, (u32) SRC);
				667	DST = (u32) tmp;
				668	CONT;
				669	ALU64_DIV_K:
Alexei Starovoitov	876a7ae	2015-04-27 14:40:37 -0700	[diff] [blame]	670	DST = div64_u64(DST, IMM);
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	671	CONT;
				672	ALU_DIV_K:
				673	tmp = (u32) DST;
				674	do_div(tmp, (u32) IMM);
				675	DST = (u32) tmp;
				676	CONT;
				677	ALU_END_TO_BE:
				678	switch (IMM) {
				679	case 16:
				680	DST = (__force u16) cpu_to_be16(DST);
				681	break;
				682	case 32:
				683	DST = (__force u32) cpu_to_be32(DST);
				684	break;
				685	case 64:
				686	DST = (__force u64) cpu_to_be64(DST);
				687	break;
				688	}
				689	CONT;
				690	ALU_END_TO_LE:
				691	switch (IMM) {
				692	case 16:
				693	DST = (__force u16) cpu_to_le16(DST);
				694	break;
				695	case 32:
				696	DST = (__force u32) cpu_to_le32(DST);
				697	break;
				698	case 64:
				699	DST = (__force u64) cpu_to_le64(DST);
				700	break;
				701	}
				702	CONT;
				703
				704	/* CALL */
				705	JMP_CALL:
				706	/* Function call scratches BPF_R1-BPF_R5 registers,
				707	* preserves BPF_R6-BPF_R9, and stores return value
				708	* into BPF_R0.
				709	*/
				710	BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
				711	BPF_R4, BPF_R5);
				712	CONT;
				713
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	714	JMP_TAIL_CALL: {
				715	struct bpf_map map = (struct bpf_map ) (unsigned long) BPF_R2;
				716	struct bpf_array *array = container_of(map, struct bpf_array, map);
				717	struct bpf_prog *prog;
				718	u64 index = BPF_R3;
				719
				720	if (unlikely(index >= array->map.max_entries))
				721	goto out;
				722
				723	if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
				724	goto out;
				725
				726	tail_call_cnt++;
				727
Wang Nan	2a36f0b	2015-08-06 07:02:33 +0000	[diff] [blame]	728	prog = READ_ONCE(array->ptrs[index]);
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	729	if (unlikely(!prog))
				730	goto out;
				731
Daniel Borkmann	c4675f9	2015-07-13 20:49:32 +0200	[diff] [blame]	732	/* ARG1 at this point is guaranteed to point to CTX from
				733	* the verifier side due to the fact that the tail call is
				734	* handeled like a helper, that is, bpf_tail_call_proto,
				735	* where arg1_type is ARG_PTR_TO_CTX.
				736	*/
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	737	insn = prog->insnsi;
				738	goto select_insn;
				739	out:
				740	CONT;
				741	}
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	742	/* JMP */
				743	JMP_JA:
				744	insn += insn->off;
				745	CONT;
				746	JMP_JEQ_X:
				747	if (DST == SRC) {
				748	insn += insn->off;
				749	CONT_JMP;
				750	}
				751	CONT;
				752	JMP_JEQ_K:
				753	if (DST == IMM) {
				754	insn += insn->off;
				755	CONT_JMP;
				756	}
				757	CONT;
				758	JMP_JNE_X:
				759	if (DST != SRC) {
				760	insn += insn->off;
				761	CONT_JMP;
				762	}
				763	CONT;
				764	JMP_JNE_K:
				765	if (DST != IMM) {
				766	insn += insn->off;
				767	CONT_JMP;
				768	}
				769	CONT;
				770	JMP_JGT_X:
				771	if (DST > SRC) {
				772	insn += insn->off;
				773	CONT_JMP;
				774	}
				775	CONT;
				776	JMP_JGT_K:
				777	if (DST > IMM) {
				778	insn += insn->off;
				779	CONT_JMP;
				780	}
				781	CONT;
				782	JMP_JGE_X:
				783	if (DST >= SRC) {
				784	insn += insn->off;
				785	CONT_JMP;
				786	}
				787	CONT;
				788	JMP_JGE_K:
				789	if (DST >= IMM) {
				790	insn += insn->off;
				791	CONT_JMP;
				792	}
				793	CONT;
				794	JMP_JSGT_X:
				795	if (((s64) DST) > ((s64) SRC)) {
				796	insn += insn->off;
				797	CONT_JMP;
				798	}
				799	CONT;
				800	JMP_JSGT_K:
				801	if (((s64) DST) > ((s64) IMM)) {
				802	insn += insn->off;
				803	CONT_JMP;
				804	}
				805	CONT;
				806	JMP_JSGE_X:
				807	if (((s64) DST) >= ((s64) SRC)) {
				808	insn += insn->off;
				809	CONT_JMP;
				810	}
				811	CONT;
				812	JMP_JSGE_K:
				813	if (((s64) DST) >= ((s64) IMM)) {
				814	insn += insn->off;
				815	CONT_JMP;
				816	}
				817	CONT;
				818	JMP_JSET_X:
				819	if (DST & SRC) {
				820	insn += insn->off;
				821	CONT_JMP;
				822	}
				823	CONT;
				824	JMP_JSET_K:
				825	if (DST & IMM) {
				826	insn += insn->off;
				827	CONT_JMP;
				828	}
				829	CONT;
				830	JMP_EXIT:
				831	return BPF_R0;
				832
				833	/* STX and ST and LDX*/
				834	#define LDST(SIZEOP, SIZE) \
				835	STX_MEM_##SIZEOP: \
				836	(SIZE )(unsigned long) (DST + insn->off) = SRC; \
				837	CONT; \
				838	ST_MEM_##SIZEOP: \
				839	(SIZE )(unsigned long) (DST + insn->off) = IMM; \
				840	CONT; \
				841	LDX_MEM_##SIZEOP: \
				842	DST = (SIZE )(unsigned long) (SRC + insn->off); \
				843	CONT;
				844
				845	LDST(B, u8)
				846	LDST(H, u16)
				847	LDST(W, u32)
				848	LDST(DW, u64)
				849	#undef LDST
				850	STX_XADD_W: /* lock xadd (u32 )(dst_reg + off16) += src_reg */
				851	atomic_add((u32) SRC, (atomic_t *)(unsigned long)
				852	(DST + insn->off));
				853	CONT;
				854	STX_XADD_DW: /* lock xadd (u64 )(dst_reg + off16) += src_reg */
				855	atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
				856	(DST + insn->off));
				857	CONT;
				858	LD_ABS_W: /* BPF_R0 = ntohl((u32 ) (skb->data + imm32)) */
				859	off = IMM;
				860	load_word:
				861	/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
				862	* only appearing in the programs where ctx ==
				863	* skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
Alexei Starovoitov	8fb575c	2014-07-30 20:34:15 -0700	[diff] [blame]	864	* == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	865	* internal BPF verifier will check that BPF_R6 ==
				866	* ctx.
				867	*
				868	* BPF_ABS and BPF_IND are wrappers of function calls,
				869	* so they scratch BPF_R1-BPF_R5 registers, preserve
				870	* BPF_R6-BPF_R9, and store return value into BPF_R0.
				871	*
				872	* Implicit input:
				873	* ctx == skb == BPF_R6 == CTX
				874	*
				875	* Explicit input:
				876	* SRC == any register
				877	* IMM == 32-bit immediate
				878	*
				879	* Output:
				880	* BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
				881	*/
				882
				883	ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
				884	if (likely(ptr != NULL)) {
				885	BPF_R0 = get_unaligned_be32(ptr);
				886	CONT;
				887	}
				888
				889	return 0;
				890	LD_ABS_H: /* BPF_R0 = ntohs((u16 ) (skb->data + imm32)) */
				891	off = IMM;
				892	load_half:
				893	ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
				894	if (likely(ptr != NULL)) {
				895	BPF_R0 = get_unaligned_be16(ptr);
				896	CONT;
				897	}
				898
				899	return 0;
				900	LD_ABS_B: /* BPF_R0 = (u8 ) (skb->data + imm32) */
				901	off = IMM;
				902	load_byte:
				903	ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
				904	if (likely(ptr != NULL)) {
				905	BPF_R0 = (u8 )ptr;
				906	CONT;
				907	}
				908
				909	return 0;
				910	LD_IND_W: /* BPF_R0 = ntohl((u32 ) (skb->data + src_reg + imm32)) */
				911	off = IMM + SRC;
				912	goto load_word;
				913	LD_IND_H: /* BPF_R0 = ntohs((u16 ) (skb->data + src_reg + imm32)) */
				914	off = IMM + SRC;
				915	goto load_half;
				916	LD_IND_B: /* BPF_R0 = (u8 ) (skb->data + src_reg + imm32) */
				917	off = IMM + SRC;
				918	goto load_byte;
				919
				920	default_label:
				921	/* If we ever reach this, we have a bug somewhere. */
				922	WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
				923	return 0;
				924	}
Josh Poimboeuf	39853cc	2016-02-28 22:22:37 -0600	[diff] [blame]	925	STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	926
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	927	bool bpf_prog_array_compatible(struct bpf_array *array,
				928	const struct bpf_prog *fp)
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	929	{
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	930	if (!array->owner_prog_type) {
				931	/* There's no owner yet where we could check for
				932	* compatibility.
				933	*/
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	934	array->owner_prog_type = fp->type;
				935	array->owner_jited = fp->jited;
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	936
				937	return true;
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	938	}
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	939
				940	return array->owner_prog_type == fp->type &&
				941	array->owner_jited == fp->jited;
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	942	}
				943
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	944	static int bpf_check_tail_call(const struct bpf_prog *fp)
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	945	{
				946	struct bpf_prog_aux *aux = fp->aux;
				947	int i;
				948
				949	for (i = 0; i < aux->used_map_cnt; i++) {
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	950	struct bpf_map *map = aux->used_maps[i];
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	951	struct bpf_array *array;
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	952
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	953	if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
				954	continue;
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	955
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	956	array = container_of(map, struct bpf_array, map);
				957	if (!bpf_prog_array_compatible(array, fp))
				958	return -EINVAL;
				959	}
				960
				961	return 0;
				962	}
				963
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	964	/**
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	965	* bpf_prog_select_runtime - select exec runtime for BPF program
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	966	* @fp: bpf_prog populated with internal BPF program
Daniel Borkmann	d1c55ab	2016-05-13 19:08:31 +0200	[diff] [blame]	967	* @err: pointer to error variable
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	968	*
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	969	* Try to JIT eBPF program, if JIT is not available, use interpreter.
				970	* The BPF program will be executed via BPF_PROG_RUN() macro.
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	971	*/
Daniel Borkmann	d1c55ab	2016-05-13 19:08:31 +0200	[diff] [blame]	972	struct bpf_prog bpf_prog_select_runtime(struct bpf_prog fp, int *err)
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	973	{
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	974	fp->bpf_func = (void *) __bpf_prog_run;
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	975
Daniel Borkmann	d1c55ab	2016-05-13 19:08:31 +0200	[diff] [blame]	976	/* eBPF JITs can rewrite the program in case constant
				977	* blinding is active. However, in case of error during
				978	* blinding, bpf_int_jit_compile() must always return a
				979	* valid program, which in this case would simply not
				980	* be JITed, but falls back to the interpreter.
				981	*/
				982	fp = bpf_int_jit_compile(fp);
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	983	bpf_prog_lock_ro(fp);
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	984
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	985	/* The tail call compatibility check can only be done at
				986	* this late stage as we need to determine, if we deal
				987	* with JITed or non JITed program concatenations and not
				988	* all eBPF JITs might immediately support all features.
				989	*/
Daniel Borkmann	d1c55ab	2016-05-13 19:08:31 +0200	[diff] [blame]	990	*err = bpf_check_tail_call(fp);
				991
				992	return fp;
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	993	}
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	994	EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	995
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	996	static void bpf_prog_free_deferred(struct work_struct *work)
				997	{
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	998	struct bpf_prog_aux *aux;
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	999
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	1000	aux = container_of(work, struct bpf_prog_aux, work);
				1001	bpf_jit_free(aux->prog);
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	1002	}
				1003
				1004	/* Free internal BPF program */
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	1005	void bpf_prog_free(struct bpf_prog *fp)
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	1006	{
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	1007	struct bpf_prog_aux *aux = fp->aux;
Daniel Borkmann	60a3b22	2014-09-02 22:53:44 +0200	[diff] [blame]	1008
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	1009	INIT_WORK(&aux->work, bpf_prog_free_deferred);
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame]	1010	schedule_work(&aux->work);
Alexei Starovoitov	f5bffec	2014-07-22 23:01:58 -0700	[diff] [blame]	1011	}
Alexei Starovoitov	7ae457c	2014-07-30 20:34:16 -0700	[diff] [blame]	1012	EXPORT_SYMBOL_GPL(bpf_prog_free);
Alexei Starovoitov	f89b775	2014-10-23 18:41:08 -0700	[diff] [blame]	1013
Daniel Borkmann	3ad0040	2015-10-08 01:20:39 +0200	[diff] [blame]	1014	/* RNG for unpriviledged user space with separated state from prandom_u32(). */
				1015	static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
				1016
				1017	void bpf_user_rnd_init_once(void)
				1018	{
				1019	prandom_init_once(&bpf_user_rnd_state);
				1020	}
				1021
				1022	u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
				1023	{
				1024	/* Should someone ever have the rather unwise idea to use some
				1025	* of the registers passed into this function, then note that
				1026	* this function is called from native eBPF and classic-to-eBPF
				1027	* transformations. Register assignments from both sides are
				1028	* different, f.e. classic always sets fn(ctx, A, X) here.
				1029	*/
				1030	struct rnd_state *state;
				1031	u32 res;
				1032
				1033	state = &get_cpu_var(bpf_user_rnd_state);
				1034	res = prandom_u32_state(state);
				1035	put_cpu_var(state);
				1036
				1037	return res;
				1038	}
				1039
Daniel Borkmann	3ba67da	2015-03-05 23:27:51 +0100	[diff] [blame]	1040	/* Weak definitions of helper functions in case we don't have bpf syscall. */
				1041	const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
				1042	const struct bpf_func_proto bpf_map_update_elem_proto __weak;
				1043	const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
				1044
Daniel Borkmann	03e69b5	2015-03-14 02:27:16 +0100	[diff] [blame]	1045	const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
Daniel Borkmann	c04167c	2015-03-14 02:27:17 +0100	[diff] [blame]	1046	const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
Daniel Borkmann	17ca8cb	2015-05-29 23:23:06 +0200	[diff] [blame]	1047	const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
Daniel Borkmann	bd570ff	2016-04-18 21:01:24 +0200	[diff] [blame]	1048
Alexei Starovoitov	ffeedaf	2015-06-12 19:39:12 -0700	[diff] [blame]	1049	const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
				1050	const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
				1051	const struct bpf_func_proto bpf_get_current_comm_proto __weak;
Daniel Borkmann	bd570ff	2016-04-18 21:01:24 +0200	[diff] [blame]	1052
Alexei Starovoitov	0756ea3	2015-06-12 19:39:13 -0700	[diff] [blame]	1053	const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
				1054	{
				1055	return NULL;
				1056	}
Daniel Borkmann	03e69b5	2015-03-14 02:27:16 +0100	[diff] [blame]	1057
Daniel Borkmann	bd570ff	2016-04-18 21:01:24 +0200	[diff] [blame]	1058	const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
				1059	{
				1060	return NULL;
				1061	}
				1062
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	1063	/* Always built-in helper functions. */
				1064	const struct bpf_func_proto bpf_tail_call_proto = {
				1065	.func = NULL,
				1066	.gpl_only = false,
				1067	.ret_type = RET_VOID,
				1068	.arg1_type = ARG_PTR_TO_CTX,
				1069	.arg2_type = ARG_CONST_MAP_PTR,
				1070	.arg3_type = ARG_ANYTHING,
				1071	};
				1072
				1073	/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */
Daniel Borkmann	d1c55ab	2016-05-13 19:08:31 +0200	[diff] [blame]	1074	struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	1075	{
Daniel Borkmann	d1c55ab	2016-05-13 19:08:31 +0200	[diff] [blame]	1076	return prog;
Daniel Borkmann	3324b58	2015-05-29 23:23:07 +0200	[diff] [blame]	1077	}
				1078
Alexei Starovoitov	969bf05	2016-05-05 19:49:10 -0700	[diff] [blame]	1079	bool __weak bpf_helper_changes_skb_data(void *func)
				1080	{
				1081	return false;
				1082	}
				1083
Alexei Starovoitov	f89b775	2014-10-23 18:41:08 -0700	[diff] [blame]	1084	/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
				1085	* skb_copy_bits(), so provide a weak definition of it for NET-less config.
				1086	*/
				1087	int __weak skb_copy_bits(const struct sk_buff skb, int offset, void to,
				1088	int len)
				1089	{
				1090	return -EFAULT;
				1091	}