Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Linux Socket Filter Data Structures |
| 3 | */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 4 | #ifndef __LINUX_FILTER_H__ |
| 5 | #define __LINUX_FILTER_H__ |
| 6 | |
Arun Sharma | 60063497 | 2011-07-26 16:09:06 -0700 | [diff] [blame] | 7 | #include <linux/atomic.h> |
Will Drewry | 0c5fe1b | 2012-04-12 16:47:53 -0500 | [diff] [blame] | 8 | #include <linux/compat.h> |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 9 | #include <linux/workqueue.h> |
David Howells | 607ca46 | 2012-10-13 10:46:48 +0100 | [diff] [blame] | 10 | #include <uapi/linux/filter.h> |
Heiko Carstens | 792d4b5 | 2011-05-22 07:08:11 +0000 | [diff] [blame] | 11 | |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 12 | /* Internally used and optimized filter representation with extended |
| 13 | * instruction set based on top of classic BPF. |
Will Drewry | 0c5fe1b | 2012-04-12 16:47:53 -0500 | [diff] [blame] | 14 | */ |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 15 | |
| 16 | /* instruction classes */ |
| 17 | #define BPF_ALU64 0x07 /* alu mode in double word width */ |
| 18 | |
| 19 | /* ld/ldx fields */ |
| 20 | #define BPF_DW 0x18 /* double word */ |
| 21 | #define BPF_XADD 0xc0 /* exclusive add */ |
| 22 | |
| 23 | /* alu/jmp fields */ |
| 24 | #define BPF_MOV 0xb0 /* mov reg to reg */ |
| 25 | #define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ |
| 26 | |
| 27 | /* change endianness of a register */ |
| 28 | #define BPF_END 0xd0 /* flags for endianness conversion: */ |
| 29 | #define BPF_TO_LE 0x00 /* convert to little-endian */ |
| 30 | #define BPF_TO_BE 0x08 /* convert to big-endian */ |
| 31 | #define BPF_FROM_LE BPF_TO_LE |
| 32 | #define BPF_FROM_BE BPF_TO_BE |
| 33 | |
| 34 | #define BPF_JNE 0x50 /* jump != */ |
| 35 | #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ |
| 36 | #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ |
| 37 | #define BPF_CALL 0x80 /* function call */ |
| 38 | #define BPF_EXIT 0x90 /* function return */ |
| 39 | |
Daniel Borkmann | 3074383 | 2014-05-01 18:34:19 +0200 | [diff] [blame] | 40 | /* Register numbers */ |
| 41 | enum { |
| 42 | BPF_REG_0 = 0, |
| 43 | BPF_REG_1, |
| 44 | BPF_REG_2, |
| 45 | BPF_REG_3, |
| 46 | BPF_REG_4, |
| 47 | BPF_REG_5, |
| 48 | BPF_REG_6, |
| 49 | BPF_REG_7, |
| 50 | BPF_REG_8, |
| 51 | BPF_REG_9, |
| 52 | BPF_REG_10, |
| 53 | __MAX_BPF_REG, |
| 54 | }; |
| 55 | |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 56 | /* BPF has 10 general purpose 64-bit registers and stack frame. */ |
Daniel Borkmann | 3074383 | 2014-05-01 18:34:19 +0200 | [diff] [blame] | 57 | #define MAX_BPF_REG __MAX_BPF_REG |
| 58 | |
| 59 | /* ArgX, context and stack frame pointer register positions. Note, |
| 60 | * Arg1, Arg2, Arg3, etc are used as argument mappings of function |
| 61 | * calls in BPF_CALL instruction. |
| 62 | */ |
| 63 | #define BPF_REG_ARG1 BPF_REG_1 |
| 64 | #define BPF_REG_ARG2 BPF_REG_2 |
| 65 | #define BPF_REG_ARG3 BPF_REG_3 |
| 66 | #define BPF_REG_ARG4 BPF_REG_4 |
| 67 | #define BPF_REG_ARG5 BPF_REG_5 |
| 68 | #define BPF_REG_CTX BPF_REG_6 |
| 69 | #define BPF_REG_FP BPF_REG_10 |
| 70 | |
| 71 | /* Additional register mappings for converted user programs. */ |
| 72 | #define BPF_REG_A BPF_REG_0 |
| 73 | #define BPF_REG_X BPF_REG_7 |
| 74 | #define BPF_REG_TMP BPF_REG_8 |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 75 | |
| 76 | /* BPF program can access up to 512 bytes of stack space. */ |
| 77 | #define MAX_BPF_STACK 512 |
| 78 | |
Alexei Starovoitov | 9739eef | 2014-05-08 14:10:51 -0700 | [diff] [blame] | 79 | /* bpf_add|sub|...: a += x, bpf_mov: a = x */ |
| 80 | #define BPF_ALU64_REG(op, a, x) \ |
| 81 | ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0}) |
| 82 | #define BPF_ALU32_REG(op, a, x) \ |
| 83 | ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0}) |
| 84 | |
| 85 | /* bpf_add|sub|...: a += imm, bpf_mov: a = imm */ |
| 86 | #define BPF_ALU64_IMM(op, a, imm) \ |
| 87 | ((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm}) |
| 88 | #define BPF_ALU32_IMM(op, a, imm) \ |
| 89 | ((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm}) |
| 90 | |
| 91 | /* R0 = *(uint *) (skb->data + off) */ |
| 92 | #define BPF_LD_ABS(size, off) \ |
| 93 | ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off}) |
| 94 | |
| 95 | /* R0 = *(uint *) (skb->data + x + off) */ |
| 96 | #define BPF_LD_IND(size, x, off) \ |
| 97 | ((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off}) |
| 98 | |
| 99 | /* a = *(uint *) (x + off) */ |
| 100 | #define BPF_LDX_MEM(sz, a, x, off) \ |
| 101 | ((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0}) |
| 102 | |
| 103 | /* if (a 'op' x) goto pc+off */ |
| 104 | #define BPF_JMP_REG(op, a, x, off) \ |
| 105 | ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0}) |
| 106 | |
| 107 | /* if (a 'op' imm) goto pc+off */ |
| 108 | #define BPF_JMP_IMM(op, a, imm, off) \ |
| 109 | ((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm}) |
| 110 | |
| 111 | #define BPF_EXIT_INSN() \ |
| 112 | ((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0}) |
| 113 | |
| 114 | static inline int size_to_bpf(int size) |
| 115 | { |
| 116 | switch (size) { |
| 117 | case 1: |
| 118 | return BPF_B; |
| 119 | case 2: |
| 120 | return BPF_H; |
| 121 | case 4: |
| 122 | return BPF_W; |
| 123 | case 8: |
| 124 | return BPF_DW; |
| 125 | default: |
| 126 | return -EINVAL; |
| 127 | } |
| 128 | } |
| 129 | |
Daniel Borkmann | 3074383 | 2014-05-01 18:34:19 +0200 | [diff] [blame] | 130 | /* Macro to invoke filter function. */ |
| 131 | #define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 132 | |
| 133 | struct sock_filter_int { |
| 134 | __u8 code; /* opcode */ |
| 135 | __u8 a_reg:4; /* dest register */ |
| 136 | __u8 x_reg:4; /* source register */ |
| 137 | __s16 off; /* signed offset */ |
| 138 | __s32 imm; /* signed immediate constant */ |
| 139 | }; |
| 140 | |
| 141 | #ifdef CONFIG_COMPAT |
| 142 | /* A struct sock_filter is architecture independent. */ |
Will Drewry | 0c5fe1b | 2012-04-12 16:47:53 -0500 | [diff] [blame] | 143 | struct compat_sock_fprog { |
| 144 | u16 len; |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 145 | compat_uptr_t filter; /* struct sock_filter * */ |
Will Drewry | 0c5fe1b | 2012-04-12 16:47:53 -0500 | [diff] [blame] | 146 | }; |
| 147 | #endif |
| 148 | |
Daniel Borkmann | a3ea269 | 2014-03-28 18:58:19 +0100 | [diff] [blame] | 149 | struct sock_fprog_kern { |
| 150 | u16 len; |
| 151 | struct sock_filter *filter; |
| 152 | }; |
| 153 | |
Heiko Carstens | 792d4b5 | 2011-05-22 07:08:11 +0000 | [diff] [blame] | 154 | struct sk_buff; |
| 155 | struct sock; |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 156 | struct seccomp_data; |
Heiko Carstens | 792d4b5 | 2011-05-22 07:08:11 +0000 | [diff] [blame] | 157 | |
Daniel Borkmann | a3ea269 | 2014-03-28 18:58:19 +0100 | [diff] [blame] | 158 | struct sk_filter { |
Stephen Hemminger | b715631 | 2008-04-10 01:33:47 -0700 | [diff] [blame] | 159 | atomic_t refcnt; |
Daniel Borkmann | f8bbbfc | 2014-03-28 18:58:18 +0100 | [diff] [blame] | 160 | u32 jited:1, /* Is our filter JIT'ed? */ |
| 161 | len:31; /* Number of filter blocks */ |
Daniel Borkmann | a3ea269 | 2014-03-28 18:58:19 +0100 | [diff] [blame] | 162 | struct sock_fprog_kern *orig_prog; /* Original BPF program */ |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 163 | struct rcu_head rcu; |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 164 | unsigned int (*bpf_func)(const struct sk_buff *skb, |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 165 | const struct sock_filter_int *filter); |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 166 | union { |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 167 | struct sock_filter insns[0]; |
| 168 | struct sock_filter_int insnsi[0]; |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 169 | struct work_struct work; |
| 170 | }; |
Stephen Hemminger | b715631 | 2008-04-10 01:33:47 -0700 | [diff] [blame] | 171 | }; |
| 172 | |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 173 | static inline unsigned int sk_filter_size(unsigned int proglen) |
Stephen Hemminger | b715631 | 2008-04-10 01:33:47 -0700 | [diff] [blame] | 174 | { |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 175 | return max(sizeof(struct sk_filter), |
| 176 | offsetof(struct sk_filter, insns[proglen])); |
Stephen Hemminger | b715631 | 2008-04-10 01:33:47 -0700 | [diff] [blame] | 177 | } |
| 178 | |
Daniel Borkmann | a3ea269 | 2014-03-28 18:58:19 +0100 | [diff] [blame] | 179 | #define sk_filter_proglen(fprog) \ |
| 180 | (fprog->len * sizeof(fprog->filter[0])) |
| 181 | |
Daniel Borkmann | fbc907f | 2014-03-28 18:58:20 +0100 | [diff] [blame] | 182 | int sk_filter(struct sock *sk, struct sk_buff *skb); |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 183 | |
Alexei Starovoitov | 5fe821a | 2014-05-19 14:56:14 -0700 | [diff] [blame] | 184 | void sk_filter_select_runtime(struct sk_filter *fp); |
| 185 | void sk_filter_free(struct sk_filter *fp); |
Alexei Starovoitov | bd4cf0e | 2014-03-28 18:58:25 +0100 | [diff] [blame] | 186 | |
| 187 | int sk_convert_filter(struct sock_filter *prog, int len, |
| 188 | struct sock_filter_int *new_prog, int *new_len); |
Daniel Borkmann | a3ea269 | 2014-03-28 18:58:19 +0100 | [diff] [blame] | 189 | |
Daniel Borkmann | fbc907f | 2014-03-28 18:58:20 +0100 | [diff] [blame] | 190 | int sk_unattached_filter_create(struct sk_filter **pfp, |
Daniel Borkmann | b1fcd35 | 2014-05-23 18:43:58 +0200 | [diff] [blame^] | 191 | struct sock_fprog_kern *fprog); |
Daniel Borkmann | fbc907f | 2014-03-28 18:58:20 +0100 | [diff] [blame] | 192 | void sk_unattached_filter_destroy(struct sk_filter *fp); |
Daniel Borkmann | a3ea269 | 2014-03-28 18:58:19 +0100 | [diff] [blame] | 193 | |
Daniel Borkmann | fbc907f | 2014-03-28 18:58:20 +0100 | [diff] [blame] | 194 | int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); |
| 195 | int sk_detach_filter(struct sock *sk); |
Daniel Borkmann | a3ea269 | 2014-03-28 18:58:19 +0100 | [diff] [blame] | 196 | |
Daniel Borkmann | fbc907f | 2014-03-28 18:58:20 +0100 | [diff] [blame] | 197 | int sk_chk_filter(struct sock_filter *filter, unsigned int flen); |
| 198 | int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, |
| 199 | unsigned int len); |
| 200 | void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); |
| 201 | |
| 202 | void sk_filter_charge(struct sock *sk, struct sk_filter *fp); |
| 203 | void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 204 | |
Alexei Starovoitov | 6225827 | 2014-05-13 19:50:46 -0700 | [diff] [blame] | 205 | u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); |
| 206 | void bpf_int_jit_compile(struct sk_filter *fp); |
| 207 | |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 208 | #ifdef CONFIG_BPF_JIT |
Xi Wang | 20074f3 | 2013-05-01 16:24:08 -0400 | [diff] [blame] | 209 | #include <stdarg.h> |
Chen Gang | a691ce7 | 2013-03-28 15:24:53 +0000 | [diff] [blame] | 210 | #include <linux/linkage.h> |
| 211 | #include <linux/printk.h> |
| 212 | |
Daniel Borkmann | fbc907f | 2014-03-28 18:58:20 +0100 | [diff] [blame] | 213 | void bpf_jit_compile(struct sk_filter *fp); |
| 214 | void bpf_jit_free(struct sk_filter *fp); |
Daniel Borkmann | 7961780 | 2013-03-21 22:22:03 +0100 | [diff] [blame] | 215 | |
| 216 | static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, |
| 217 | u32 pass, void *image) |
| 218 | { |
Eric Dumazet | 1649544 | 2013-05-17 16:57:37 +0000 | [diff] [blame] | 219 | pr_err("flen=%u proglen=%u pass=%u image=%pK\n", |
Daniel Borkmann | 7961780 | 2013-03-21 22:22:03 +0100 | [diff] [blame] | 220 | flen, proglen, pass, image); |
| 221 | if (image) |
Eric Dumazet | 1649544 | 2013-05-17 16:57:37 +0000 | [diff] [blame] | 222 | print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, |
Daniel Borkmann | 7961780 | 2013-03-21 22:22:03 +0100 | [diff] [blame] | 223 | 16, 1, image, proglen, false); |
| 224 | } |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 225 | #else |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 226 | #include <linux/slab.h> |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 227 | static inline void bpf_jit_compile(struct sk_filter *fp) |
| 228 | { |
| 229 | } |
| 230 | static inline void bpf_jit_free(struct sk_filter *fp) |
| 231 | { |
Alexei Starovoitov | d45ed4a | 2013-10-04 00:14:06 -0700 | [diff] [blame] | 232 | kfree(fp); |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 233 | } |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 234 | #endif |
| 235 | |
Michal Sekletar | ea02f94 | 2014-01-17 17:09:45 +0100 | [diff] [blame] | 236 | static inline int bpf_tell_extensions(void) |
| 237 | { |
Daniel Borkmann | 3769229 | 2014-01-21 00:19:37 +0100 | [diff] [blame] | 238 | return SKF_AD_MAX; |
Michal Sekletar | ea02f94 | 2014-01-17 17:09:45 +0100 | [diff] [blame] | 239 | } |
| 240 | |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 241 | enum { |
| 242 | BPF_S_RET_K = 1, |
| 243 | BPF_S_RET_A, |
| 244 | BPF_S_ALU_ADD_K, |
| 245 | BPF_S_ALU_ADD_X, |
| 246 | BPF_S_ALU_SUB_K, |
| 247 | BPF_S_ALU_SUB_X, |
| 248 | BPF_S_ALU_MUL_K, |
| 249 | BPF_S_ALU_MUL_X, |
| 250 | BPF_S_ALU_DIV_X, |
Eric Dumazet | b6069a9 | 2012-09-07 22:03:35 +0000 | [diff] [blame] | 251 | BPF_S_ALU_MOD_K, |
| 252 | BPF_S_ALU_MOD_X, |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 253 | BPF_S_ALU_AND_K, |
| 254 | BPF_S_ALU_AND_X, |
| 255 | BPF_S_ALU_OR_K, |
| 256 | BPF_S_ALU_OR_X, |
Daniel Borkmann | 9e49e88 | 2012-09-24 02:23:59 +0000 | [diff] [blame] | 257 | BPF_S_ALU_XOR_K, |
| 258 | BPF_S_ALU_XOR_X, |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 259 | BPF_S_ALU_LSH_K, |
| 260 | BPF_S_ALU_LSH_X, |
| 261 | BPF_S_ALU_RSH_K, |
| 262 | BPF_S_ALU_RSH_X, |
| 263 | BPF_S_ALU_NEG, |
| 264 | BPF_S_LD_W_ABS, |
| 265 | BPF_S_LD_H_ABS, |
| 266 | BPF_S_LD_B_ABS, |
| 267 | BPF_S_LD_W_LEN, |
| 268 | BPF_S_LD_W_IND, |
| 269 | BPF_S_LD_H_IND, |
| 270 | BPF_S_LD_B_IND, |
| 271 | BPF_S_LD_IMM, |
| 272 | BPF_S_LDX_W_LEN, |
| 273 | BPF_S_LDX_B_MSH, |
| 274 | BPF_S_LDX_IMM, |
| 275 | BPF_S_MISC_TAX, |
| 276 | BPF_S_MISC_TXA, |
| 277 | BPF_S_ALU_DIV_K, |
| 278 | BPF_S_LD_MEM, |
| 279 | BPF_S_LDX_MEM, |
| 280 | BPF_S_ST, |
| 281 | BPF_S_STX, |
| 282 | BPF_S_JMP_JA, |
| 283 | BPF_S_JMP_JEQ_K, |
| 284 | BPF_S_JMP_JEQ_X, |
| 285 | BPF_S_JMP_JGE_K, |
| 286 | BPF_S_JMP_JGE_X, |
| 287 | BPF_S_JMP_JGT_K, |
| 288 | BPF_S_JMP_JGT_X, |
| 289 | BPF_S_JMP_JSET_K, |
| 290 | BPF_S_JMP_JSET_X, |
| 291 | /* Ancillary data */ |
| 292 | BPF_S_ANC_PROTOCOL, |
| 293 | BPF_S_ANC_PKTTYPE, |
| 294 | BPF_S_ANC_IFINDEX, |
| 295 | BPF_S_ANC_NLATTR, |
| 296 | BPF_S_ANC_NLATTR_NEST, |
| 297 | BPF_S_ANC_MARK, |
| 298 | BPF_S_ANC_QUEUE, |
| 299 | BPF_S_ANC_HATYPE, |
| 300 | BPF_S_ANC_RXHASH, |
| 301 | BPF_S_ANC_CPU, |
Jiri Pirko | ffe06c1 | 2012-03-31 11:01:20 +0000 | [diff] [blame] | 302 | BPF_S_ANC_ALU_XOR_X, |
Eric Dumazet | f333503 | 2012-10-27 02:26:17 +0000 | [diff] [blame] | 303 | BPF_S_ANC_VLAN_TAG, |
| 304 | BPF_S_ANC_VLAN_TAG_PRESENT, |
Daniel Borkmann | 3e5289d | 2013-03-19 06:39:31 +0000 | [diff] [blame] | 305 | BPF_S_ANC_PAY_OFFSET, |
Chema Gonzalez | 4cd3675 | 2014-04-21 09:21:24 -0700 | [diff] [blame] | 306 | BPF_S_ANC_RANDOM, |
Eric Dumazet | 0a14842 | 2011-04-20 09:27:32 +0000 | [diff] [blame] | 307 | }; |
| 308 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 309 | #endif /* __LINUX_FILTER_H__ */ |