blob: 8def8ae931d9c33bce4a990261ea65d8060d08d5 [file] [log] [blame]
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
6 *
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
Alexei Starovoitov4df95ff2014-07-30 20:34:14 -070021 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070022 */
Daniel Borkmann738cbe72014-09-08 08:04:47 +020023
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070024#include <linux/filter.h>
25#include <linux/skbuff.h>
Daniel Borkmann60a3b222014-09-02 22:53:44 +020026#include <linux/vmalloc.h>
Daniel Borkmann738cbe72014-09-08 08:04:47 +020027#include <linux/random.h>
28#include <linux/moduleloader.h>
Alexei Starovoitov09756af2014-09-26 00:17:00 -070029#include <linux/bpf.h>
Josh Poimboeuf39853cc2016-02-28 22:22:37 -060030#include <linux/frame.h>
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070031
Daniel Borkmann3324b582015-05-29 23:23:07 +020032#include <asm/unaligned.h>
33
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070034/* Registers */
35#define BPF_R0 regs[BPF_REG_0]
36#define BPF_R1 regs[BPF_REG_1]
37#define BPF_R2 regs[BPF_REG_2]
38#define BPF_R3 regs[BPF_REG_3]
39#define BPF_R4 regs[BPF_REG_4]
40#define BPF_R5 regs[BPF_REG_5]
41#define BPF_R6 regs[BPF_REG_6]
42#define BPF_R7 regs[BPF_REG_7]
43#define BPF_R8 regs[BPF_REG_8]
44#define BPF_R9 regs[BPF_REG_9]
45#define BPF_R10 regs[BPF_REG_10]
46
47/* Named registers */
48#define DST regs[insn->dst_reg]
49#define SRC regs[insn->src_reg]
50#define FP regs[BPF_REG_FP]
51#define ARG1 regs[BPF_REG_ARG1]
52#define CTX regs[BPF_REG_CTX]
53#define IMM insn->imm
54
55/* No hurry in this branch
56 *
57 * Exported for the bpf jit load helper.
58 */
59void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
60{
61 u8 *ptr = NULL;
62
63 if (k >= SKF_NET_OFF)
64 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
65 else if (k >= SKF_LL_OFF)
66 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Daniel Borkmann3324b582015-05-29 23:23:07 +020067
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -070068 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
69 return ptr;
70
71 return NULL;
72}
73
Daniel Borkmann60a3b222014-09-02 22:53:44 +020074struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
75{
76 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
77 gfp_extra_flags;
Alexei Starovoitov09756af2014-09-26 00:17:00 -070078 struct bpf_prog_aux *aux;
Daniel Borkmann60a3b222014-09-02 22:53:44 +020079 struct bpf_prog *fp;
80
81 size = round_up(size, PAGE_SIZE);
82 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
83 if (fp == NULL)
84 return NULL;
85
Daniel Borkmanna91263d2015-09-30 01:41:50 +020086 kmemcheck_annotate_bitfield(fp, meta);
87
Alexei Starovoitov09756af2014-09-26 00:17:00 -070088 aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
89 if (aux == NULL) {
Daniel Borkmann60a3b222014-09-02 22:53:44 +020090 vfree(fp);
91 return NULL;
92 }
93
94 fp->pages = size / PAGE_SIZE;
Alexei Starovoitov09756af2014-09-26 00:17:00 -070095 fp->aux = aux;
Daniel Borkmanne9d8afa2015-10-29 14:58:08 +010096 fp->aux->prog = fp;
Daniel Borkmann60a3b222014-09-02 22:53:44 +020097
98 return fp;
99}
100EXPORT_SYMBOL_GPL(bpf_prog_alloc);
101
102struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
103 gfp_t gfp_extra_flags)
104{
105 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
106 gfp_extra_flags;
107 struct bpf_prog *fp;
108
109 BUG_ON(fp_old == NULL);
110
111 size = round_up(size, PAGE_SIZE);
112 if (size <= fp_old->pages * PAGE_SIZE)
113 return fp_old;
114
115 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
116 if (fp != NULL) {
Daniel Borkmanna91263d2015-09-30 01:41:50 +0200117 kmemcheck_annotate_bitfield(fp, meta);
118
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200119 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
120 fp->pages = size / PAGE_SIZE;
Daniel Borkmanne9d8afa2015-10-29 14:58:08 +0100121 fp->aux->prog = fp;
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200122
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700123 /* We keep fp->aux from fp_old around in the new
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200124 * reallocated structure.
125 */
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700126 fp_old->aux = NULL;
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200127 __bpf_prog_free(fp_old);
128 }
129
130 return fp;
131}
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200132
133void __bpf_prog_free(struct bpf_prog *fp)
134{
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700135 kfree(fp->aux);
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200136 vfree(fp);
137}
Daniel Borkmann60a3b222014-09-02 22:53:44 +0200138
Daniel Borkmannc237ee52016-05-13 19:08:30 +0200139static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn)
140{
141 return BPF_CLASS(insn->code) == BPF_JMP &&
142 /* Call and Exit are both special jumps with no
143 * target inside the BPF instruction image.
144 */
145 BPF_OP(insn->code) != BPF_CALL &&
146 BPF_OP(insn->code) != BPF_EXIT;
147}
148
149static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
150{
151 struct bpf_insn *insn = prog->insnsi;
152 u32 i, insn_cnt = prog->len;
153
154 for (i = 0; i < insn_cnt; i++, insn++) {
155 if (!bpf_is_jmp_and_has_target(insn))
156 continue;
157
158 /* Adjust offset of jmps if we cross boundaries. */
159 if (i < pos && i + insn->off + 1 > pos)
160 insn->off += delta;
161 else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
162 insn->off -= delta;
163 }
164}
165
166struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
167 const struct bpf_insn *patch, u32 len)
168{
169 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
170 struct bpf_prog *prog_adj;
171
172 /* Since our patchlet doesn't expand the image, we're done. */
173 if (insn_delta == 0) {
174 memcpy(prog->insnsi + off, patch, sizeof(*patch));
175 return prog;
176 }
177
178 insn_adj_cnt = prog->len + insn_delta;
179
180 /* Several new instructions need to be inserted. Make room
181 * for them. Likely, there's no need for a new allocation as
182 * last page could have large enough tailroom.
183 */
184 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
185 GFP_USER);
186 if (!prog_adj)
187 return NULL;
188
189 prog_adj->len = insn_adj_cnt;
190
191 /* Patching happens in 3 steps:
192 *
193 * 1) Move over tail of insnsi from next instruction onwards,
194 * so we can patch the single target insn with one or more
195 * new ones (patching is always from 1 to n insns, n > 0).
196 * 2) Inject new instructions at the target location.
197 * 3) Adjust branch offsets if necessary.
198 */
199 insn_rest = insn_adj_cnt - off - len;
200
201 memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
202 sizeof(*patch) * insn_rest);
203 memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
204
205 bpf_adj_branches(prog_adj, off, insn_delta);
206
207 return prog_adj;
208}
209
Daniel Borkmannb954d832014-09-10 15:01:02 +0200210#ifdef CONFIG_BPF_JIT
Daniel Borkmann5124abd2019-08-16 23:59:20 +0100211/* All BPF JIT sysctl knobs here. */
212int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
213int bpf_jit_harden __read_mostly;
Daniel Borkmann6c1dc8f2018-12-11 12:14:12 +0100214long bpf_jit_limit __read_mostly;
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100215
216static atomic_long_t bpf_jit_current;
217
Daniel Borkmann6c1dc8f2018-12-11 12:14:12 +0100218/* Can be overridden by an arch's JIT compiler if it has a custom,
219 * dedicated BPF backend memory area, or if neither of the two
220 * below apply.
221 */
222u64 __weak bpf_jit_alloc_exec_limit(void)
223{
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100224#if defined(MODULES_VADDR)
Daniel Borkmann6c1dc8f2018-12-11 12:14:12 +0100225 return MODULES_END - MODULES_VADDR;
226#else
227 return VMALLOC_END - VMALLOC_START;
228#endif
229}
230
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100231static int __init bpf_jit_charge_init(void)
232{
233 /* Only used as heuristic here to derive limit. */
Daniel Borkmann6c1dc8f2018-12-11 12:14:12 +0100234 bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
235 PAGE_SIZE), LONG_MAX);
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100236 return 0;
237}
238pure_initcall(bpf_jit_charge_init);
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100239
240static int bpf_jit_charge_modmem(u32 pages)
241{
242 if (atomic_long_add_return(pages, &bpf_jit_current) >
243 (bpf_jit_limit >> PAGE_SHIFT)) {
244 if (!capable(CAP_SYS_ADMIN)) {
245 atomic_long_sub(pages, &bpf_jit_current);
246 return -EPERM;
247 }
248 }
249
250 return 0;
251}
252
253static void bpf_jit_uncharge_modmem(u32 pages)
254{
255 atomic_long_sub(pages, &bpf_jit_current);
256}
Daniel Borkmann5124abd2019-08-16 23:59:20 +0100257
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200258struct bpf_binary_header *
259bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
260 unsigned int alignment,
261 bpf_jit_fill_hole_t bpf_fill_ill_insns)
262{
263 struct bpf_binary_header *hdr;
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100264 u32 size, hole, start, pages;
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200265
266 /* Most of BPF filters are really small, but if some of them
267 * fill a page, allow at least 128 extra bytes to insert a
268 * random section of illegal instructions.
269 */
270 size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100271 pages = size / PAGE_SIZE;
272
273 if (bpf_jit_charge_modmem(pages))
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200274 return NULL;
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100275 hdr = module_alloc(size);
276 if (!hdr) {
277 bpf_jit_uncharge_modmem(pages);
278 return NULL;
279 }
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200280
281 /* Fill space with illegal/arch-dep instructions. */
282 bpf_fill_ill_insns(hdr, size);
283
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100284 hdr->pages = pages;
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200285 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
286 PAGE_SIZE - sizeof(*hdr));
Daniel Borkmannb7552e1b2016-05-18 14:14:28 +0200287 start = (get_random_int() % hole) & ~(alignment - 1);
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200288
289 /* Leave a random number of instructions before BPF code. */
290 *image_ptr = &hdr->image[start];
291
292 return hdr;
293}
294
295void bpf_jit_binary_free(struct bpf_binary_header *hdr)
296{
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100297 u32 pages = hdr->pages;
Daniel Borkmann4f3446b2016-05-13 19:08:32 +0200298
Rusty Russellbe1f2212015-01-20 09:07:05 +1030299 module_memfree(hdr);
Daniel Borkmannc98446e2019-08-17 00:00:08 +0100300 bpf_jit_uncharge_modmem(pages);
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200301}
Daniel Borkmann4f3446b2016-05-13 19:08:32 +0200302
303static int bpf_jit_blind_insn(const struct bpf_insn *from,
304 const struct bpf_insn *aux,
305 struct bpf_insn *to_buff)
306{
307 struct bpf_insn *to = to_buff;
Daniel Borkmannb7552e1b2016-05-18 14:14:28 +0200308 u32 imm_rnd = get_random_int();
Daniel Borkmann4f3446b2016-05-13 19:08:32 +0200309 s16 off;
310
311 BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
312 BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
313
314 if (from->imm == 0 &&
315 (from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
316 from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
317 *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
318 goto out;
319 }
320
321 switch (from->code) {
322 case BPF_ALU | BPF_ADD | BPF_K:
323 case BPF_ALU | BPF_SUB | BPF_K:
324 case BPF_ALU | BPF_AND | BPF_K:
325 case BPF_ALU | BPF_OR | BPF_K:
326 case BPF_ALU | BPF_XOR | BPF_K:
327 case BPF_ALU | BPF_MUL | BPF_K:
328 case BPF_ALU | BPF_MOV | BPF_K:
329 case BPF_ALU | BPF_DIV | BPF_K:
330 case BPF_ALU | BPF_MOD | BPF_K:
331 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
332 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
333 *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
334 break;
335
336 case BPF_ALU64 | BPF_ADD | BPF_K:
337 case BPF_ALU64 | BPF_SUB | BPF_K:
338 case BPF_ALU64 | BPF_AND | BPF_K:
339 case BPF_ALU64 | BPF_OR | BPF_K:
340 case BPF_ALU64 | BPF_XOR | BPF_K:
341 case BPF_ALU64 | BPF_MUL | BPF_K:
342 case BPF_ALU64 | BPF_MOV | BPF_K:
343 case BPF_ALU64 | BPF_DIV | BPF_K:
344 case BPF_ALU64 | BPF_MOD | BPF_K:
345 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
346 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
347 *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
348 break;
349
350 case BPF_JMP | BPF_JEQ | BPF_K:
351 case BPF_JMP | BPF_JNE | BPF_K:
352 case BPF_JMP | BPF_JGT | BPF_K:
353 case BPF_JMP | BPF_JGE | BPF_K:
354 case BPF_JMP | BPF_JSGT | BPF_K:
355 case BPF_JMP | BPF_JSGE | BPF_K:
356 case BPF_JMP | BPF_JSET | BPF_K:
357 /* Accommodate for extra offset in case of a backjump. */
358 off = from->off;
359 if (off < 0)
360 off -= 2;
361 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
362 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
363 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
364 break;
365
366 case BPF_LD | BPF_ABS | BPF_W:
367 case BPF_LD | BPF_ABS | BPF_H:
368 case BPF_LD | BPF_ABS | BPF_B:
369 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
370 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
371 *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
372 break;
373
374 case BPF_LD | BPF_IND | BPF_W:
375 case BPF_LD | BPF_IND | BPF_H:
376 case BPF_LD | BPF_IND | BPF_B:
377 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
378 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
379 *to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
380 *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
381 break;
382
383 case BPF_LD | BPF_IMM | BPF_DW:
384 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
385 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
386 *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
387 *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
388 break;
389 case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
390 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
391 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
392 *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
393 break;
394
395 case BPF_ST | BPF_MEM | BPF_DW:
396 case BPF_ST | BPF_MEM | BPF_W:
397 case BPF_ST | BPF_MEM | BPF_H:
398 case BPF_ST | BPF_MEM | BPF_B:
399 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
400 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
401 *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
402 break;
403 }
404out:
405 return to - to_buff;
406}
407
408static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
409 gfp_t gfp_extra_flags)
410{
411 gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
412 gfp_extra_flags;
413 struct bpf_prog *fp;
414
415 fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
416 if (fp != NULL) {
417 kmemcheck_annotate_bitfield(fp, meta);
418
419 /* aux->prog still points to the fp_other one, so
420 * when promoting the clone to the real program,
421 * this still needs to be adapted.
422 */
423 memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
424 }
425
426 return fp;
427}
428
429static void bpf_prog_clone_free(struct bpf_prog *fp)
430{
431 /* aux was stolen by the other clone, so we cannot free
432 * it from this path! It will be freed eventually by the
433 * other program on release.
434 *
435 * At this point, we don't need a deferred release since
436 * clone is guaranteed to not be locked.
437 */
438 fp->aux = NULL;
439 __bpf_prog_free(fp);
440}
441
442void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
443{
444 /* We have to repoint aux->prog to self, as we don't
445 * know whether fp here is the clone or the original.
446 */
447 fp->aux->prog = fp;
448 bpf_prog_clone_free(fp_other);
449}
450
451struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
452{
453 struct bpf_insn insn_buff[16], aux[2];
454 struct bpf_prog *clone, *tmp;
455 int insn_delta, insn_cnt;
456 struct bpf_insn *insn;
457 int i, rewritten;
458
459 if (!bpf_jit_blinding_enabled())
460 return prog;
461
462 clone = bpf_prog_clone_create(prog, GFP_USER);
463 if (!clone)
464 return ERR_PTR(-ENOMEM);
465
466 insn_cnt = clone->len;
467 insn = clone->insnsi;
468
469 for (i = 0; i < insn_cnt; i++, insn++) {
470 /* We temporarily need to hold the original ld64 insn
471 * so that we can still access the first part in the
472 * second blinding run.
473 */
474 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
475 insn[1].code == 0)
476 memcpy(aux, insn, sizeof(aux));
477
478 rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
479 if (!rewritten)
480 continue;
481
482 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
483 if (!tmp) {
484 /* Patching may have repointed aux->prog during
485 * realloc from the original one, so we need to
486 * fix it up here on error.
487 */
488 bpf_jit_prog_release_other(prog, clone);
489 return ERR_PTR(-ENOMEM);
490 }
491
492 clone = tmp;
493 insn_delta = rewritten - 1;
494
495 /* Walk new program and skip insns we just inserted. */
496 insn = clone->insnsi + i + insn_delta;
497 insn_cnt += insn_delta;
498 i += insn_delta;
499 }
500
501 return clone;
502}
Daniel Borkmannb954d832014-09-10 15:01:02 +0200503#endif /* CONFIG_BPF_JIT */
Daniel Borkmann738cbe72014-09-08 08:04:47 +0200504
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700505/* Base function for offset calculation. Needs to go into .text section,
506 * therefore keeping it non-static as well; will also be used by JITs
507 * anyway later on, so do not let the compiler omit it.
508 */
509noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
510{
511 return 0;
512}
Alexei Starovoitov4d9c5c52015-07-20 20:34:19 -0700513EXPORT_SYMBOL_GPL(__bpf_call_base);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700514
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +0100515#ifndef CONFIG_BPF_JIT_ALWAYS_ON
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700516/**
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -0700517 * __bpf_prog_run - run eBPF program on a given context
518 * @ctx: is the data we are operating on
519 * @insn: is the array of eBPF instructions
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700520 *
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -0700521 * Decode and execute eBPF instructions.
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700522 */
Sami Tolvanen417637a2017-08-24 08:59:31 -0700523static unsigned int __bpf_prog_run(const struct sk_buff *ctx, const struct bpf_insn *insn)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700524{
525 u64 stack[MAX_BPF_STACK / sizeof(u64)];
526 u64 regs[MAX_BPF_REG], tmp;
527 static const void *jumptable[256] = {
528 [0 ... 255] = &&default_label,
529 /* Now overwrite non-defaults ... */
530 /* 32 bit ALU operations */
531 [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
532 [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
533 [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
534 [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
535 [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
536 [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
537 [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
538 [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
539 [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
540 [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
541 [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
542 [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
543 [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
544 [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
545 [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
546 [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
547 [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
548 [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
549 [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
550 [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
551 [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
552 [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
553 [BPF_ALU | BPF_NEG] = &&ALU_NEG,
554 [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
555 [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
556 /* 64 bit ALU operations */
557 [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
558 [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
559 [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
560 [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
561 [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
562 [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
563 [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
564 [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
565 [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
566 [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
567 [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
568 [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
569 [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
570 [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
571 [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
572 [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
573 [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
574 [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
575 [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
576 [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
577 [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
578 [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
579 [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
580 [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
581 [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
582 /* Call instruction */
583 [BPF_JMP | BPF_CALL] = &&JMP_CALL,
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700584 [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700585 /* Jumps */
586 [BPF_JMP | BPF_JA] = &&JMP_JA,
587 [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
588 [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
589 [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
590 [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
591 [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
592 [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
593 [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
594 [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
595 [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
596 [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
597 [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
598 [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
599 [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
600 [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
601 /* Program return */
602 [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
603 /* Store instructions */
604 [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
605 [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
606 [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
607 [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
608 [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
609 [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
610 [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
611 [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
612 [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
613 [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
614 /* Load instructions */
615 [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
616 [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
617 [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
618 [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
619 [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
620 [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
621 [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
622 [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
623 [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
624 [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
Alexei Starovoitov02ab6952014-09-04 22:17:17 -0700625 [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700626 };
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700627 u32 tail_call_cnt = 0;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700628 void *ptr;
629 int off;
630
631#define CONT ({ insn++; goto select_insn; })
632#define CONT_JMP ({ insn++; goto select_insn; })
633
634 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
635 ARG1 = (u64) (unsigned long) ctx;
636
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700637select_insn:
638 goto *jumptable[insn->code];
639
640 /* ALU */
641#define ALU(OPCODE, OP) \
642 ALU64_##OPCODE##_X: \
643 DST = DST OP SRC; \
644 CONT; \
645 ALU_##OPCODE##_X: \
646 DST = (u32) DST OP (u32) SRC; \
647 CONT; \
648 ALU64_##OPCODE##_K: \
649 DST = DST OP IMM; \
650 CONT; \
651 ALU_##OPCODE##_K: \
652 DST = (u32) DST OP (u32) IMM; \
653 CONT;
654
655 ALU(ADD, +)
656 ALU(SUB, -)
657 ALU(AND, &)
658 ALU(OR, |)
659 ALU(LSH, <<)
660 ALU(RSH, >>)
661 ALU(XOR, ^)
662 ALU(MUL, *)
663#undef ALU
664 ALU_NEG:
665 DST = (u32) -DST;
666 CONT;
667 ALU64_NEG:
668 DST = -DST;
669 CONT;
670 ALU_MOV_X:
671 DST = (u32) SRC;
672 CONT;
673 ALU_MOV_K:
674 DST = (u32) IMM;
675 CONT;
676 ALU64_MOV_X:
677 DST = SRC;
678 CONT;
679 ALU64_MOV_K:
680 DST = IMM;
681 CONT;
Alexei Starovoitov02ab6952014-09-04 22:17:17 -0700682 LD_IMM_DW:
683 DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
684 insn++;
685 CONT;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700686 ALU64_ARSH_X:
687 (*(s64 *) &DST) >>= SRC;
688 CONT;
689 ALU64_ARSH_K:
690 (*(s64 *) &DST) >>= IMM;
691 CONT;
692 ALU64_MOD_X:
693 if (unlikely(SRC == 0))
694 return 0;
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700695 div64_u64_rem(DST, SRC, &tmp);
696 DST = tmp;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700697 CONT;
698 ALU_MOD_X:
Eric Dumazet46060772018-01-29 02:48:59 +0100699 if (unlikely((u32)SRC == 0))
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700700 return 0;
701 tmp = (u32) DST;
702 DST = do_div(tmp, (u32) SRC);
703 CONT;
704 ALU64_MOD_K:
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700705 div64_u64_rem(DST, IMM, &tmp);
706 DST = tmp;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700707 CONT;
708 ALU_MOD_K:
709 tmp = (u32) DST;
710 DST = do_div(tmp, (u32) IMM);
711 CONT;
712 ALU64_DIV_X:
713 if (unlikely(SRC == 0))
714 return 0;
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700715 DST = div64_u64(DST, SRC);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700716 CONT;
717 ALU_DIV_X:
Eric Dumazet46060772018-01-29 02:48:59 +0100718 if (unlikely((u32)SRC == 0))
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700719 return 0;
720 tmp = (u32) DST;
721 do_div(tmp, (u32) SRC);
722 DST = (u32) tmp;
723 CONT;
724 ALU64_DIV_K:
Alexei Starovoitov876a7ae2015-04-27 14:40:37 -0700725 DST = div64_u64(DST, IMM);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700726 CONT;
727 ALU_DIV_K:
728 tmp = (u32) DST;
729 do_div(tmp, (u32) IMM);
730 DST = (u32) tmp;
731 CONT;
732 ALU_END_TO_BE:
733 switch (IMM) {
734 case 16:
735 DST = (__force u16) cpu_to_be16(DST);
736 break;
737 case 32:
738 DST = (__force u32) cpu_to_be32(DST);
739 break;
740 case 64:
741 DST = (__force u64) cpu_to_be64(DST);
742 break;
743 }
744 CONT;
745 ALU_END_TO_LE:
746 switch (IMM) {
747 case 16:
748 DST = (__force u16) cpu_to_le16(DST);
749 break;
750 case 32:
751 DST = (__force u32) cpu_to_le32(DST);
752 break;
753 case 64:
754 DST = (__force u64) cpu_to_le64(DST);
755 break;
756 }
757 CONT;
758
759 /* CALL */
760 JMP_CALL:
761 /* Function call scratches BPF_R1-BPF_R5 registers,
762 * preserves BPF_R6-BPF_R9, and stores return value
763 * into BPF_R0.
764 */
765 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
766 BPF_R4, BPF_R5);
767 CONT;
768
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700769 JMP_TAIL_CALL: {
770 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
771 struct bpf_array *array = container_of(map, struct bpf_array, map);
772 struct bpf_prog *prog;
Alexei Starovoitov5226bb32018-01-29 02:48:55 +0100773 u32 index = BPF_R3;
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700774
775 if (unlikely(index >= array->map.max_entries))
776 goto out;
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700777 if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
778 goto out;
779
780 tail_call_cnt++;
781
Wang Nan2a36f0b2015-08-06 07:02:33 +0000782 prog = READ_ONCE(array->ptrs[index]);
Daniel Borkmann1ca1cc92016-06-28 12:18:23 +0200783 if (!prog)
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700784 goto out;
785
Daniel Borkmannc4675f92015-07-13 20:49:32 +0200786 /* ARG1 at this point is guaranteed to point to CTX from
787 * the verifier side due to the fact that the tail call is
788 * handeled like a helper, that is, bpf_tail_call_proto,
789 * where arg1_type is ARG_PTR_TO_CTX.
790 */
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700791 insn = prog->insnsi;
792 goto select_insn;
793out:
794 CONT;
795 }
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700796 /* JMP */
797 JMP_JA:
798 insn += insn->off;
799 CONT;
800 JMP_JEQ_X:
801 if (DST == SRC) {
802 insn += insn->off;
803 CONT_JMP;
804 }
805 CONT;
806 JMP_JEQ_K:
807 if (DST == IMM) {
808 insn += insn->off;
809 CONT_JMP;
810 }
811 CONT;
812 JMP_JNE_X:
813 if (DST != SRC) {
814 insn += insn->off;
815 CONT_JMP;
816 }
817 CONT;
818 JMP_JNE_K:
819 if (DST != IMM) {
820 insn += insn->off;
821 CONT_JMP;
822 }
823 CONT;
824 JMP_JGT_X:
825 if (DST > SRC) {
826 insn += insn->off;
827 CONT_JMP;
828 }
829 CONT;
830 JMP_JGT_K:
831 if (DST > IMM) {
832 insn += insn->off;
833 CONT_JMP;
834 }
835 CONT;
836 JMP_JGE_X:
837 if (DST >= SRC) {
838 insn += insn->off;
839 CONT_JMP;
840 }
841 CONT;
842 JMP_JGE_K:
843 if (DST >= IMM) {
844 insn += insn->off;
845 CONT_JMP;
846 }
847 CONT;
848 JMP_JSGT_X:
849 if (((s64) DST) > ((s64) SRC)) {
850 insn += insn->off;
851 CONT_JMP;
852 }
853 CONT;
854 JMP_JSGT_K:
855 if (((s64) DST) > ((s64) IMM)) {
856 insn += insn->off;
857 CONT_JMP;
858 }
859 CONT;
860 JMP_JSGE_X:
861 if (((s64) DST) >= ((s64) SRC)) {
862 insn += insn->off;
863 CONT_JMP;
864 }
865 CONT;
866 JMP_JSGE_K:
867 if (((s64) DST) >= ((s64) IMM)) {
868 insn += insn->off;
869 CONT_JMP;
870 }
871 CONT;
872 JMP_JSET_X:
873 if (DST & SRC) {
874 insn += insn->off;
875 CONT_JMP;
876 }
877 CONT;
878 JMP_JSET_K:
879 if (DST & IMM) {
880 insn += insn->off;
881 CONT_JMP;
882 }
883 CONT;
884 JMP_EXIT:
885 return BPF_R0;
886
887 /* STX and ST and LDX*/
888#define LDST(SIZEOP, SIZE) \
889 STX_MEM_##SIZEOP: \
890 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
891 CONT; \
892 ST_MEM_##SIZEOP: \
893 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
894 CONT; \
895 LDX_MEM_##SIZEOP: \
896 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
897 CONT;
898
899 LDST(B, u8)
900 LDST(H, u16)
901 LDST(W, u32)
902 LDST(DW, u64)
903#undef LDST
904 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
905 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
906 (DST + insn->off));
907 CONT;
908 STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
909 atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
910 (DST + insn->off));
911 CONT;
912 LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
913 off = IMM;
914load_word:
915 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
916 * only appearing in the programs where ctx ==
917 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
Alexei Starovoitov8fb575c2014-07-30 20:34:15 -0700918 * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700919 * internal BPF verifier will check that BPF_R6 ==
920 * ctx.
921 *
922 * BPF_ABS and BPF_IND are wrappers of function calls,
923 * so they scratch BPF_R1-BPF_R5 registers, preserve
924 * BPF_R6-BPF_R9, and store return value into BPF_R0.
925 *
926 * Implicit input:
927 * ctx == skb == BPF_R6 == CTX
928 *
929 * Explicit input:
930 * SRC == any register
931 * IMM == 32-bit immediate
932 *
933 * Output:
934 * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
935 */
936
937 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
938 if (likely(ptr != NULL)) {
939 BPF_R0 = get_unaligned_be32(ptr);
940 CONT;
941 }
942
943 return 0;
944 LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
945 off = IMM;
946load_half:
947 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
948 if (likely(ptr != NULL)) {
949 BPF_R0 = get_unaligned_be16(ptr);
950 CONT;
951 }
952
953 return 0;
954 LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
955 off = IMM;
956load_byte:
957 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
958 if (likely(ptr != NULL)) {
959 BPF_R0 = *(u8 *)ptr;
960 CONT;
961 }
962
963 return 0;
964 LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
965 off = IMM + SRC;
966 goto load_word;
967 LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
968 off = IMM + SRC;
969 goto load_half;
970 LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
971 off = IMM + SRC;
972 goto load_byte;
973
974 default_label:
975 /* If we ever reach this, we have a bug somewhere. */
976 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
977 return 0;
978}
Josh Poimboeuf39853cc2016-02-28 22:22:37 -0600979STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700980
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +0100981#else
Daniel Borkmann5124abd2019-08-16 23:59:20 +0100982static unsigned int __bpf_prog_ret0_warn(void *ctx,
983 const struct bpf_insn *insn)
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +0100984{
Daniel Borkmann5124abd2019-08-16 23:59:20 +0100985 /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
986 * is not working properly, so warn about it!
987 */
988 WARN_ON_ONCE(1);
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +0100989 return 0;
990}
991#endif
992
Daniel Borkmann3324b582015-05-29 23:23:07 +0200993bool bpf_prog_array_compatible(struct bpf_array *array,
994 const struct bpf_prog *fp)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -0700995{
Daniel Borkmann3324b582015-05-29 23:23:07 +0200996 if (!array->owner_prog_type) {
997 /* There's no owner yet where we could check for
998 * compatibility.
999 */
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001000 array->owner_prog_type = fp->type;
1001 array->owner_jited = fp->jited;
Daniel Borkmann3324b582015-05-29 23:23:07 +02001002
1003 return true;
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001004 }
Daniel Borkmann3324b582015-05-29 23:23:07 +02001005
1006 return array->owner_prog_type == fp->type &&
1007 array->owner_jited == fp->jited;
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001008}
1009
Daniel Borkmann3324b582015-05-29 23:23:07 +02001010static int bpf_check_tail_call(const struct bpf_prog *fp)
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001011{
1012 struct bpf_prog_aux *aux = fp->aux;
1013 int i;
1014
1015 for (i = 0; i < aux->used_map_cnt; i++) {
Daniel Borkmann3324b582015-05-29 23:23:07 +02001016 struct bpf_map *map = aux->used_maps[i];
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001017 struct bpf_array *array;
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001018
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001019 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1020 continue;
Daniel Borkmann3324b582015-05-29 23:23:07 +02001021
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001022 array = container_of(map, struct bpf_array, map);
1023 if (!bpf_prog_array_compatible(array, fp))
1024 return -EINVAL;
1025 }
1026
1027 return 0;
1028}
1029
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001030/**
Daniel Borkmann3324b582015-05-29 23:23:07 +02001031 * bpf_prog_select_runtime - select exec runtime for BPF program
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001032 * @fp: bpf_prog populated with internal BPF program
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001033 * @err: pointer to error variable
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001034 *
Daniel Borkmann3324b582015-05-29 23:23:07 +02001035 * Try to JIT eBPF program, if JIT is not available, use interpreter.
1036 * The BPF program will be executed via BPF_PROG_RUN() macro.
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001037 */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001038struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001039{
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +01001040#ifndef CONFIG_BPF_JIT_ALWAYS_ON
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001041 fp->bpf_func = (void *) __bpf_prog_run;
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +01001042#else
Daniel Borkmann5124abd2019-08-16 23:59:20 +01001043 fp->bpf_func = (void *) __bpf_prog_ret0_warn;
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +01001044#endif
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001045
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001046 /* eBPF JITs can rewrite the program in case constant
1047 * blinding is active. However, in case of error during
1048 * blinding, bpf_int_jit_compile() must always return a
1049 * valid program, which in this case would simply not
1050 * be JITed, but falls back to the interpreter.
1051 */
1052 fp = bpf_int_jit_compile(fp);
Alexei Starovoitova3d6dd62018-01-29 02:48:56 +01001053#ifdef CONFIG_BPF_JIT_ALWAYS_ON
1054 if (!fp->jited) {
1055 *err = -ENOTSUPP;
1056 return fp;
1057 }
1058#endif
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001059 bpf_prog_lock_ro(fp);
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -07001060
Daniel Borkmann3324b582015-05-29 23:23:07 +02001061 /* The tail call compatibility check can only be done at
1062 * this late stage as we need to determine, if we deal
1063 * with JITed or non JITed program concatenations and not
1064 * all eBPF JITs might immediately support all features.
1065 */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001066 *err = bpf_check_tail_call(fp);
1067
1068 return fp;
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001069}
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001070EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001071
Yonghong Song265229d2017-10-23 23:53:08 -07001072static unsigned int __bpf_prog_ret1(const struct sk_buff *ctx,
1073 const struct bpf_insn *insn)
1074{
1075 return 1;
1076}
1077
1078static struct bpf_prog_dummy {
1079 struct bpf_prog prog;
1080} dummy_bpf_prog = {
1081 .prog = {
1082 .bpf_func = __bpf_prog_ret1,
1083 },
1084};
1085
Alexei Starovoitov148f1112017-10-02 22:50:21 -07001086/* to avoid allocating empty bpf_prog_array for cgroups that
1087 * don't have bpf program attached use one global 'empty_prog_array'
1088 * It will not be modified the caller of bpf_prog_array_alloc()
1089 * (since caller requested prog_cnt == 0)
1090 * that pointer should be 'freed' by bpf_prog_array_free()
1091 */
1092static struct {
1093 struct bpf_prog_array hdr;
1094 struct bpf_prog *null_prog;
1095} empty_prog_array = {
1096 .null_prog = NULL,
1097};
1098
1099struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
1100{
1101 if (prog_cnt)
1102 return kzalloc(sizeof(struct bpf_prog_array) +
1103 sizeof(struct bpf_prog *) * (prog_cnt + 1),
1104 flags);
1105
1106 return &empty_prog_array.hdr;
1107}
1108
1109void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
1110{
1111 if (!progs ||
1112 progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
1113 return;
1114 kfree_rcu(progs, rcu);
1115}
1116
Yonghong Song265229d2017-10-23 23:53:08 -07001117void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
1118 struct bpf_prog *old_prog)
1119{
1120 struct bpf_prog **prog = progs->progs;
1121
1122 for (; *prog; prog++)
1123 if (*prog == old_prog) {
1124 WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
1125 break;
1126 }
1127}
1128
1129int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
1130 struct bpf_prog *exclude_prog,
1131 struct bpf_prog *include_prog,
1132 struct bpf_prog_array **new_array)
1133{
1134 int new_prog_cnt, carry_prog_cnt = 0;
1135 struct bpf_prog **existing_prog;
1136 struct bpf_prog_array *array;
1137 int new_prog_idx = 0;
1138
1139 /* Figure out how many existing progs we need to carry over to
1140 * the new array.
1141 */
1142 if (old_array) {
1143 existing_prog = old_array->progs;
1144 for (; *existing_prog; existing_prog++) {
1145 if (*existing_prog != exclude_prog &&
1146 *existing_prog != &dummy_bpf_prog.prog)
1147 carry_prog_cnt++;
1148 if (*existing_prog == include_prog)
1149 return -EEXIST;
1150 }
1151 }
1152
1153 /* How many progs (not NULL) will be in the new array? */
1154 new_prog_cnt = carry_prog_cnt;
1155 if (include_prog)
1156 new_prog_cnt += 1;
1157
1158 /* Do we have any prog (not NULL) in the new array? */
1159 if (!new_prog_cnt) {
1160 *new_array = NULL;
1161 return 0;
1162 }
1163
1164 /* +1 as the end of prog_array is marked with NULL */
1165 array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
1166 if (!array)
1167 return -ENOMEM;
1168
1169 /* Fill in the new prog array */
1170 if (carry_prog_cnt) {
1171 existing_prog = old_array->progs;
1172 for (; *existing_prog; existing_prog++)
1173 if (*existing_prog != exclude_prog &&
1174 *existing_prog != &dummy_bpf_prog.prog)
1175 array->progs[new_prog_idx++] = *existing_prog;
1176 }
1177 if (include_prog)
1178 array->progs[new_prog_idx++] = include_prog;
1179 array->progs[new_prog_idx] = NULL;
1180 *new_array = array;
1181 return 0;
1182}
1183
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001184static void bpf_prog_free_deferred(struct work_struct *work)
1185{
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001186 struct bpf_prog_aux *aux;
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001187
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001188 aux = container_of(work, struct bpf_prog_aux, work);
1189 bpf_jit_free(aux->prog);
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001190}
1191
1192/* Free internal BPF program */
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001193void bpf_prog_free(struct bpf_prog *fp)
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001194{
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001195 struct bpf_prog_aux *aux = fp->aux;
Daniel Borkmann60a3b222014-09-02 22:53:44 +02001196
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001197 INIT_WORK(&aux->work, bpf_prog_free_deferred);
Alexei Starovoitov09756af2014-09-26 00:17:00 -07001198 schedule_work(&aux->work);
Alexei Starovoitovf5bffec2014-07-22 23:01:58 -07001199}
Alexei Starovoitov7ae457c2014-07-30 20:34:16 -07001200EXPORT_SYMBOL_GPL(bpf_prog_free);
Alexei Starovoitovf89b7752014-10-23 18:41:08 -07001201
Daniel Borkmann3ad00402015-10-08 01:20:39 +02001202/* RNG for unpriviledged user space with separated state from prandom_u32(). */
1203static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
1204
1205void bpf_user_rnd_init_once(void)
1206{
1207 prandom_init_once(&bpf_user_rnd_state);
1208}
1209
Daniel Borkmannf3694e02016-09-09 02:45:31 +02001210BPF_CALL_0(bpf_user_rnd_u32)
Daniel Borkmann3ad00402015-10-08 01:20:39 +02001211{
1212 /* Should someone ever have the rather unwise idea to use some
1213 * of the registers passed into this function, then note that
1214 * this function is called from native eBPF and classic-to-eBPF
1215 * transformations. Register assignments from both sides are
1216 * different, f.e. classic always sets fn(ctx, A, X) here.
1217 */
1218 struct rnd_state *state;
1219 u32 res;
1220
1221 state = &get_cpu_var(bpf_user_rnd_state);
1222 res = prandom_u32_state(state);
Shaohua Lib761fe22016-09-27 08:42:41 -07001223 put_cpu_var(bpf_user_rnd_state);
Daniel Borkmann3ad00402015-10-08 01:20:39 +02001224
1225 return res;
1226}
1227
Daniel Borkmann3ba67da2015-03-05 23:27:51 +01001228/* Weak definitions of helper functions in case we don't have bpf syscall. */
1229const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
1230const struct bpf_func_proto bpf_map_update_elem_proto __weak;
1231const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
1232
Daniel Borkmann03e69b52015-03-14 02:27:16 +01001233const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
Daniel Borkmannc04167c2015-03-14 02:27:17 +01001234const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
Daniel Borkmann17ca8cb2015-05-29 23:23:06 +02001235const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001236
Alexei Starovoitovffeedaf2015-06-12 19:39:12 -07001237const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
1238const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
1239const struct bpf_func_proto bpf_get_current_comm_proto __weak;
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001240
Alexei Starovoitov0756ea32015-06-12 19:39:13 -07001241const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
1242{
1243 return NULL;
1244}
Daniel Borkmann03e69b52015-03-14 02:27:16 +01001245
Daniel Borkmann555c8a82016-07-14 18:08:05 +02001246u64 __weak
1247bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
1248 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001249{
Daniel Borkmann555c8a82016-07-14 18:08:05 +02001250 return -ENOTSUPP;
Daniel Borkmannbd570ff2016-04-18 21:01:24 +02001251}
1252
Daniel Borkmann3324b582015-05-29 23:23:07 +02001253/* Always built-in helper functions. */
1254const struct bpf_func_proto bpf_tail_call_proto = {
1255 .func = NULL,
1256 .gpl_only = false,
1257 .ret_type = RET_VOID,
1258 .arg1_type = ARG_PTR_TO_CTX,
1259 .arg2_type = ARG_CONST_MAP_PTR,
1260 .arg3_type = ARG_ANYTHING,
1261};
1262
1263/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001264struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
Daniel Borkmann3324b582015-05-29 23:23:07 +02001265{
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +02001266 return prog;
Daniel Borkmann3324b582015-05-29 23:23:07 +02001267}
1268
Alexei Starovoitov969bf052016-05-05 19:49:10 -07001269bool __weak bpf_helper_changes_skb_data(void *func)
1270{
1271 return false;
1272}
1273
Alexei Starovoitovf89b7752014-10-23 18:41:08 -07001274/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
1275 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
1276 */
1277int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
1278 int len)
1279{
1280 return -EFAULT;
1281}