blob: ab3c74e49f070fdcb7a46b427f2deb7fc661e562 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01004 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 *
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01007 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
Kris Katterjohn93699862006-01-04 13:58:36 -080021 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 */
23
24#include <linux/module.h>
25#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <linux/mm.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
29#include <linux/in.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/if_packet.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090033#include <linux/gfp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <net/ip.h>
35#include <net/protocol.h>
Patrick McHardy4738c1d2008-04-10 02:02:28 -070036#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/skbuff.h>
38#include <net/sock.h>
39#include <linux/errno.h>
40#include <linux/timer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <asm/uaccess.h>
Dmitry Mishin40daafc2006-04-18 14:50:10 -070042#include <asm/unaligned.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <linux/filter.h>
David S. Miller86e4ca62011-05-26 15:00:31 -040044#include <linux/ratelimit.h>
Will Drewry46b325c2012-04-12 16:47:52 -050045#include <linux/seccomp.h>
Eric Dumazetf3335032012-10-27 02:26:17 +000046#include <linux/if_vlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Jan Seiffertf03fb3f2012-03-30 05:08:19 +000048/* No hurry in this branch
49 *
50 * Exported for the bpf jit load helper.
51 */
52void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
Linus Torvalds1da177e2005-04-16 15:20:36 -070053{
54 u8 *ptr = NULL;
55
56 if (k >= SKF_NET_OFF)
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -070057 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 else if (k >= SKF_LL_OFF)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -070059 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Eric Dumazet4bc65dd2010-12-07 22:26:15 +000061 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 return ptr;
63 return NULL;
64}
65
Eric Dumazet62ab0812010-12-06 20:50:09 +000066static inline void *load_pointer(const struct sk_buff *skb, int k,
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +090067 unsigned int size, void *buffer)
Patrick McHardy0b05b2a2005-07-05 14:10:21 -070068{
69 if (k >= 0)
70 return skb_header_pointer(skb, k, size, buffer);
Jan Seiffertf03fb3f2012-03-30 05:08:19 +000071 return bpf_internal_load_pointer_neg_helper(skb, k, size);
Patrick McHardy0b05b2a2005-07-05 14:10:21 -070072}
73
Linus Torvalds1da177e2005-04-16 15:20:36 -070074/**
Stephen Hemminger43db6d62008-04-10 01:43:09 -070075 * sk_filter - run a packet through a socket filter
76 * @sk: sock associated with &sk_buff
77 * @skb: buffer to filter
Stephen Hemminger43db6d62008-04-10 01:43:09 -070078 *
79 * Run the filter code and then cut skb->data to correct size returned by
80 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
81 * than pkt_len we keep whole skb->data. This is the socket level
82 * wrapper to sk_run_filter. It returns 0 if the packet should
83 * be accepted or -EPERM if the packet should be tossed.
84 *
85 */
86int sk_filter(struct sock *sk, struct sk_buff *skb)
87{
88 int err;
89 struct sk_filter *filter;
90
Mel Gormanc93bdd02012-07-31 16:44:19 -070091 /*
92 * If the skb was allocated from pfmemalloc reserves, only
93 * allow SOCK_MEMALLOC sockets to use it as this socket is
94 * helping free memory
95 */
96 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
97 return -ENOMEM;
98
Stephen Hemminger43db6d62008-04-10 01:43:09 -070099 err = security_sock_rcv_skb(sk, skb);
100 if (err)
101 return err;
102
Eric Dumazet80f8f102011-01-18 07:46:52 +0000103 rcu_read_lock();
104 filter = rcu_dereference(sk->sk_filter);
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700105 if (filter) {
Eric Dumazet0a148422011-04-20 09:27:32 +0000106 unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
Eric Dumazet0d7da9d2010-10-25 03:47:05 +0000107
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700108 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
109 }
Eric Dumazet80f8f102011-01-18 07:46:52 +0000110 rcu_read_unlock();
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700111
112 return err;
113}
114EXPORT_SYMBOL(sk_filter);
115
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100116/* Base function for offset calculation. Needs to go into .text section,
117 * therefore keeping it non-static as well; will also be used by JITs
118 * anyway later on, so do not let the compiler omit it.
119 */
120noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
121{
122 return 0;
123}
124
Alexei Starovoitov83d5b7e2014-04-22 20:18:57 -0700125/* Register mappings for user programs. */
126#define A_REG 0
127#define X_REG 7
128#define TMP_REG 8
129#define ARG2_REG 2
130#define ARG3_REG 3
131
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700132/**
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100133 * __sk_run_filter - run a filter on a given context
134 * @ctx: buffer to run the filter on
Daniel Borkmann01d32f62014-04-01 19:38:01 +0200135 * @insn: filter to apply
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 *
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100137 * Decode and apply filter instructions to the skb->data. Return length to
Daniel Borkmann01d32f62014-04-01 19:38:01 +0200138 * keep, 0 for none. @ctx is the data we are operating on, @insn is the
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100139 * array of filter instructions.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 */
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100141unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142{
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100143 u64 stack[MAX_BPF_STACK / sizeof(u64)];
144 u64 regs[MAX_BPF_REG], tmp;
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700145 void *ptr;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100146 int off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100148#define K insn->imm
149#define A regs[insn->a_reg]
150#define X regs[insn->x_reg]
151#define R0 regs[0]
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900152
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100153#define CONT ({insn++; goto select_insn; })
154#define CONT_JMP ({insn++; goto select_insn; })
Patrick McHardy4738c1d2008-04-10 02:02:28 -0700155
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100156 static const void *jumptable[256] = {
157 [0 ... 255] = &&default_label,
158 /* Now overwrite non-defaults ... */
159#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C
160 DL(BPF_ALU, BPF_ADD, BPF_X),
161 DL(BPF_ALU, BPF_ADD, BPF_K),
162 DL(BPF_ALU, BPF_SUB, BPF_X),
163 DL(BPF_ALU, BPF_SUB, BPF_K),
164 DL(BPF_ALU, BPF_AND, BPF_X),
165 DL(BPF_ALU, BPF_AND, BPF_K),
166 DL(BPF_ALU, BPF_OR, BPF_X),
167 DL(BPF_ALU, BPF_OR, BPF_K),
168 DL(BPF_ALU, BPF_LSH, BPF_X),
169 DL(BPF_ALU, BPF_LSH, BPF_K),
170 DL(BPF_ALU, BPF_RSH, BPF_X),
171 DL(BPF_ALU, BPF_RSH, BPF_K),
172 DL(BPF_ALU, BPF_XOR, BPF_X),
173 DL(BPF_ALU, BPF_XOR, BPF_K),
174 DL(BPF_ALU, BPF_MUL, BPF_X),
175 DL(BPF_ALU, BPF_MUL, BPF_K),
176 DL(BPF_ALU, BPF_MOV, BPF_X),
177 DL(BPF_ALU, BPF_MOV, BPF_K),
178 DL(BPF_ALU, BPF_DIV, BPF_X),
179 DL(BPF_ALU, BPF_DIV, BPF_K),
180 DL(BPF_ALU, BPF_MOD, BPF_X),
181 DL(BPF_ALU, BPF_MOD, BPF_K),
182 DL(BPF_ALU, BPF_NEG, 0),
183 DL(BPF_ALU, BPF_END, BPF_TO_BE),
184 DL(BPF_ALU, BPF_END, BPF_TO_LE),
185 DL(BPF_ALU64, BPF_ADD, BPF_X),
186 DL(BPF_ALU64, BPF_ADD, BPF_K),
187 DL(BPF_ALU64, BPF_SUB, BPF_X),
188 DL(BPF_ALU64, BPF_SUB, BPF_K),
189 DL(BPF_ALU64, BPF_AND, BPF_X),
190 DL(BPF_ALU64, BPF_AND, BPF_K),
191 DL(BPF_ALU64, BPF_OR, BPF_X),
192 DL(BPF_ALU64, BPF_OR, BPF_K),
193 DL(BPF_ALU64, BPF_LSH, BPF_X),
194 DL(BPF_ALU64, BPF_LSH, BPF_K),
195 DL(BPF_ALU64, BPF_RSH, BPF_X),
196 DL(BPF_ALU64, BPF_RSH, BPF_K),
197 DL(BPF_ALU64, BPF_XOR, BPF_X),
198 DL(BPF_ALU64, BPF_XOR, BPF_K),
199 DL(BPF_ALU64, BPF_MUL, BPF_X),
200 DL(BPF_ALU64, BPF_MUL, BPF_K),
201 DL(BPF_ALU64, BPF_MOV, BPF_X),
202 DL(BPF_ALU64, BPF_MOV, BPF_K),
203 DL(BPF_ALU64, BPF_ARSH, BPF_X),
204 DL(BPF_ALU64, BPF_ARSH, BPF_K),
205 DL(BPF_ALU64, BPF_DIV, BPF_X),
206 DL(BPF_ALU64, BPF_DIV, BPF_K),
207 DL(BPF_ALU64, BPF_MOD, BPF_X),
208 DL(BPF_ALU64, BPF_MOD, BPF_K),
209 DL(BPF_ALU64, BPF_NEG, 0),
210 DL(BPF_JMP, BPF_CALL, 0),
211 DL(BPF_JMP, BPF_JA, 0),
212 DL(BPF_JMP, BPF_JEQ, BPF_X),
213 DL(BPF_JMP, BPF_JEQ, BPF_K),
214 DL(BPF_JMP, BPF_JNE, BPF_X),
215 DL(BPF_JMP, BPF_JNE, BPF_K),
216 DL(BPF_JMP, BPF_JGT, BPF_X),
217 DL(BPF_JMP, BPF_JGT, BPF_K),
218 DL(BPF_JMP, BPF_JGE, BPF_X),
219 DL(BPF_JMP, BPF_JGE, BPF_K),
220 DL(BPF_JMP, BPF_JSGT, BPF_X),
221 DL(BPF_JMP, BPF_JSGT, BPF_K),
222 DL(BPF_JMP, BPF_JSGE, BPF_X),
223 DL(BPF_JMP, BPF_JSGE, BPF_K),
224 DL(BPF_JMP, BPF_JSET, BPF_X),
225 DL(BPF_JMP, BPF_JSET, BPF_K),
226 DL(BPF_JMP, BPF_EXIT, 0),
227 DL(BPF_STX, BPF_MEM, BPF_B),
228 DL(BPF_STX, BPF_MEM, BPF_H),
229 DL(BPF_STX, BPF_MEM, BPF_W),
230 DL(BPF_STX, BPF_MEM, BPF_DW),
231 DL(BPF_STX, BPF_XADD, BPF_W),
232 DL(BPF_STX, BPF_XADD, BPF_DW),
233 DL(BPF_ST, BPF_MEM, BPF_B),
234 DL(BPF_ST, BPF_MEM, BPF_H),
235 DL(BPF_ST, BPF_MEM, BPF_W),
236 DL(BPF_ST, BPF_MEM, BPF_DW),
237 DL(BPF_LDX, BPF_MEM, BPF_B),
238 DL(BPF_LDX, BPF_MEM, BPF_H),
239 DL(BPF_LDX, BPF_MEM, BPF_W),
240 DL(BPF_LDX, BPF_MEM, BPF_DW),
241 DL(BPF_LD, BPF_ABS, BPF_W),
242 DL(BPF_LD, BPF_ABS, BPF_H),
243 DL(BPF_LD, BPF_ABS, BPF_B),
244 DL(BPF_LD, BPF_IND, BPF_W),
245 DL(BPF_LD, BPF_IND, BPF_H),
246 DL(BPF_LD, BPF_IND, BPF_B),
247#undef DL
248 };
Patrick McHardy4738c1d2008-04-10 02:02:28 -0700249
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100250 regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
251 regs[ARG1_REG] = (u64) (unsigned long) ctx;
Alexei Starovoitov83d5b7e2014-04-22 20:18:57 -0700252 regs[A_REG] = 0;
253 regs[X_REG] = 0;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100254
255select_insn:
256 goto *jumptable[insn->code];
257
258 /* ALU */
259#define ALU(OPCODE, OP) \
260 BPF_ALU64_##OPCODE##_BPF_X: \
261 A = A OP X; \
262 CONT; \
263 BPF_ALU_##OPCODE##_BPF_X: \
264 A = (u32) A OP (u32) X; \
265 CONT; \
266 BPF_ALU64_##OPCODE##_BPF_K: \
267 A = A OP K; \
268 CONT; \
269 BPF_ALU_##OPCODE##_BPF_K: \
270 A = (u32) A OP (u32) K; \
271 CONT;
272
273 ALU(BPF_ADD, +)
274 ALU(BPF_SUB, -)
275 ALU(BPF_AND, &)
276 ALU(BPF_OR, |)
277 ALU(BPF_LSH, <<)
278 ALU(BPF_RSH, >>)
279 ALU(BPF_XOR, ^)
280 ALU(BPF_MUL, *)
281#undef ALU
282 BPF_ALU_BPF_NEG_0:
283 A = (u32) -A;
284 CONT;
285 BPF_ALU64_BPF_NEG_0:
286 A = -A;
287 CONT;
288 BPF_ALU_BPF_MOV_BPF_X:
289 A = (u32) X;
290 CONT;
291 BPF_ALU_BPF_MOV_BPF_K:
292 A = (u32) K;
293 CONT;
294 BPF_ALU64_BPF_MOV_BPF_X:
295 A = X;
296 CONT;
297 BPF_ALU64_BPF_MOV_BPF_K:
298 A = K;
299 CONT;
300 BPF_ALU64_BPF_ARSH_BPF_X:
301 (*(s64 *) &A) >>= X;
302 CONT;
303 BPF_ALU64_BPF_ARSH_BPF_K:
304 (*(s64 *) &A) >>= K;
305 CONT;
306 BPF_ALU64_BPF_MOD_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200307 if (unlikely(X == 0))
308 return 0;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100309 tmp = A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200310 A = do_div(tmp, X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100311 CONT;
312 BPF_ALU_BPF_MOD_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200313 if (unlikely(X == 0))
314 return 0;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100315 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200316 A = do_div(tmp, (u32) X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100317 CONT;
318 BPF_ALU64_BPF_MOD_BPF_K:
319 tmp = A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200320 A = do_div(tmp, K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100321 CONT;
322 BPF_ALU_BPF_MOD_BPF_K:
323 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200324 A = do_div(tmp, (u32) K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100325 CONT;
326 BPF_ALU64_BPF_DIV_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200327 if (unlikely(X == 0))
328 return 0;
329 do_div(A, X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100330 CONT;
331 BPF_ALU_BPF_DIV_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200332 if (unlikely(X == 0))
333 return 0;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100334 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200335 do_div(tmp, (u32) X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100336 A = (u32) tmp;
337 CONT;
338 BPF_ALU64_BPF_DIV_BPF_K:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200339 do_div(A, K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100340 CONT;
341 BPF_ALU_BPF_DIV_BPF_K:
342 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200343 do_div(tmp, (u32) K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100344 A = (u32) tmp;
345 CONT;
346 BPF_ALU_BPF_END_BPF_TO_BE:
347 switch (K) {
348 case 16:
349 A = (__force u16) cpu_to_be16(A);
350 break;
351 case 32:
352 A = (__force u32) cpu_to_be32(A);
353 break;
354 case 64:
355 A = (__force u64) cpu_to_be64(A);
356 break;
Patrick McHardy4738c1d2008-04-10 02:02:28 -0700357 }
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100358 CONT;
359 BPF_ALU_BPF_END_BPF_TO_LE:
360 switch (K) {
361 case 16:
362 A = (__force u16) cpu_to_le16(A);
363 break;
364 case 32:
365 A = (__force u32) cpu_to_le32(A);
366 break;
367 case 64:
368 A = (__force u64) cpu_to_le64(A);
369 break;
Pablo Neira Ayusod214c752008-11-20 00:49:27 -0800370 }
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100371 CONT;
372
373 /* CALL */
374 BPF_JMP_BPF_CALL_0:
375 /* Function call scratches R1-R5 registers, preserves R6-R9,
376 * and stores return value into R0.
377 */
378 R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
379 regs[4], regs[5]);
380 CONT;
381
382 /* JMP */
383 BPF_JMP_BPF_JA_0:
384 insn += insn->off;
385 CONT;
386 BPF_JMP_BPF_JEQ_BPF_X:
387 if (A == X) {
388 insn += insn->off;
389 CONT_JMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 }
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100391 CONT;
392 BPF_JMP_BPF_JEQ_BPF_K:
393 if (A == K) {
394 insn += insn->off;
395 CONT_JMP;
396 }
397 CONT;
398 BPF_JMP_BPF_JNE_BPF_X:
399 if (A != X) {
400 insn += insn->off;
401 CONT_JMP;
402 }
403 CONT;
404 BPF_JMP_BPF_JNE_BPF_K:
405 if (A != K) {
406 insn += insn->off;
407 CONT_JMP;
408 }
409 CONT;
410 BPF_JMP_BPF_JGT_BPF_X:
411 if (A > X) {
412 insn += insn->off;
413 CONT_JMP;
414 }
415 CONT;
416 BPF_JMP_BPF_JGT_BPF_K:
417 if (A > K) {
418 insn += insn->off;
419 CONT_JMP;
420 }
421 CONT;
422 BPF_JMP_BPF_JGE_BPF_X:
423 if (A >= X) {
424 insn += insn->off;
425 CONT_JMP;
426 }
427 CONT;
428 BPF_JMP_BPF_JGE_BPF_K:
429 if (A >= K) {
430 insn += insn->off;
431 CONT_JMP;
432 }
433 CONT;
434 BPF_JMP_BPF_JSGT_BPF_X:
435 if (((s64)A) > ((s64)X)) {
436 insn += insn->off;
437 CONT_JMP;
438 }
439 CONT;
440 BPF_JMP_BPF_JSGT_BPF_K:
441 if (((s64)A) > ((s64)K)) {
442 insn += insn->off;
443 CONT_JMP;
444 }
445 CONT;
446 BPF_JMP_BPF_JSGE_BPF_X:
447 if (((s64)A) >= ((s64)X)) {
448 insn += insn->off;
449 CONT_JMP;
450 }
451 CONT;
452 BPF_JMP_BPF_JSGE_BPF_K:
453 if (((s64)A) >= ((s64)K)) {
454 insn += insn->off;
455 CONT_JMP;
456 }
457 CONT;
458 BPF_JMP_BPF_JSET_BPF_X:
459 if (A & X) {
460 insn += insn->off;
461 CONT_JMP;
462 }
463 CONT;
464 BPF_JMP_BPF_JSET_BPF_K:
465 if (A & K) {
466 insn += insn->off;
467 CONT_JMP;
468 }
469 CONT;
470 BPF_JMP_BPF_EXIT_0:
471 return R0;
472
473 /* STX and ST and LDX*/
474#define LDST(SIZEOP, SIZE) \
475 BPF_STX_BPF_MEM_##SIZEOP: \
476 *(SIZE *)(unsigned long) (A + insn->off) = X; \
477 CONT; \
478 BPF_ST_BPF_MEM_##SIZEOP: \
479 *(SIZE *)(unsigned long) (A + insn->off) = K; \
480 CONT; \
481 BPF_LDX_BPF_MEM_##SIZEOP: \
482 A = *(SIZE *)(unsigned long) (X + insn->off); \
483 CONT;
484
485 LDST(BPF_B, u8)
486 LDST(BPF_H, u16)
487 LDST(BPF_W, u32)
488 LDST(BPF_DW, u64)
489#undef LDST
490 BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
491 atomic_add((u32) X, (atomic_t *)(unsigned long)
492 (A + insn->off));
493 CONT;
494 BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
495 atomic64_add((u64) X, (atomic64_t *)(unsigned long)
496 (A + insn->off));
497 CONT;
498 BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
499 off = K;
500load_word:
501 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
502 * appearing in the programs where ctx == skb. All programs
503 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
504 * saves it in R6, internal BPF verifier will check that
505 * R6 == ctx.
506 *
507 * BPF_ABS and BPF_IND are wrappers of function calls, so
508 * they scratch R1-R5 registers, preserve R6-R9, and store
509 * return value into R0.
510 *
511 * Implicit input:
512 * ctx
513 *
514 * Explicit input:
515 * X == any register
516 * K == 32-bit immediate
517 *
518 * Output:
519 * R0 - 8/16/32-bit skb data converted to cpu endianness
520 */
521 ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
522 if (likely(ptr != NULL)) {
523 R0 = get_unaligned_be32(ptr);
524 CONT;
525 }
526 return 0;
527 BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
528 off = K;
529load_half:
530 ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
531 if (likely(ptr != NULL)) {
532 R0 = get_unaligned_be16(ptr);
533 CONT;
534 }
535 return 0;
536 BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
537 off = K;
538load_byte:
539 ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
540 if (likely(ptr != NULL)) {
541 R0 = *(u8 *)ptr;
542 CONT;
543 }
544 return 0;
545 BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
546 off = K + X;
547 goto load_word;
548 BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
549 off = K + X;
550 goto load_half;
551 BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
552 off = K + X;
553 goto load_byte;
554
555 default_label:
556 /* If we ever reach this, we have a bug somewhere. */
557 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
558 return 0;
559#undef CONT_JMP
560#undef CONT
561
562#undef R0
563#undef X
564#undef A
565#undef K
566}
567
568u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
569 const struct sock_filter_int *insni)
570 __attribute__ ((alias ("__sk_run_filter")));
571
572u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
573 const struct sock_filter_int *insni)
574 __attribute__ ((alias ("__sk_run_filter")));
575EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
576
577/* Helper to find the offset of pkt_type in sk_buff structure. We want
578 * to make sure its still a 3bit field starting at a byte boundary;
579 * taken from arch/x86/net/bpf_jit_comp.c.
580 */
Alexei Starovoitov0dcceab2014-06-05 14:39:36 -0700581#ifdef __BIG_ENDIAN_BITFIELD
582#define PKT_TYPE_MAX (7 << 5)
583#else
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100584#define PKT_TYPE_MAX 7
Alexei Starovoitov0dcceab2014-06-05 14:39:36 -0700585#endif
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100586static unsigned int pkt_type_offset(void)
587{
588 struct sk_buff skb_probe = { .pkt_type = ~0, };
589 u8 *ct = (u8 *) &skb_probe;
590 unsigned int off;
591
592 for (off = 0; off < sizeof(struct sk_buff); off++) {
593 if (ct[off] == PKT_TYPE_MAX)
594 return off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 }
596
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100597 pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
598 return -1;
599}
600
601static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
602{
603 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
604
605 return __skb_get_poff(skb);
606}
607
608static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
609{
610 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
611 struct nlattr *nla;
612
613 if (skb_is_nonlinear(skb))
614 return 0;
615
Mathias Krause05ab8f22014-04-13 18:23:33 +0200616 if (skb->len < sizeof(struct nlattr))
617 return 0;
618
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100619 if (A > skb->len - sizeof(struct nlattr))
620 return 0;
621
622 nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
623 if (nla)
624 return (void *) nla - (void *) skb->data;
625
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 return 0;
627}
628
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100629static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
630{
631 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
632 struct nlattr *nla;
633
634 if (skb_is_nonlinear(skb))
635 return 0;
636
Mathias Krause05ab8f22014-04-13 18:23:33 +0200637 if (skb->len < sizeof(struct nlattr))
638 return 0;
639
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100640 if (A > skb->len - sizeof(struct nlattr))
641 return 0;
642
643 nla = (struct nlattr *) &skb->data[A];
Mathias Krause05ab8f22014-04-13 18:23:33 +0200644 if (nla->nla_len > skb->len - A)
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100645 return 0;
646
647 nla = nla_find_nested(nla, X);
648 if (nla)
649 return (void *) nla - (void *) skb->data;
650
651 return 0;
652}
653
654static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
655{
656 return raw_smp_processor_id();
657}
658
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100659static bool convert_bpf_extensions(struct sock_filter *fp,
660 struct sock_filter_int **insnp)
661{
662 struct sock_filter_int *insn = *insnp;
663
664 switch (fp->k) {
665 case SKF_AD_OFF + SKF_AD_PROTOCOL:
666 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
667
668 insn->code = BPF_LDX | BPF_MEM | BPF_H;
669 insn->a_reg = A_REG;
670 insn->x_reg = CTX_REG;
671 insn->off = offsetof(struct sk_buff, protocol);
672 insn++;
673
674 /* A = ntohs(A) [emitting a nop or swap16] */
675 insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
676 insn->a_reg = A_REG;
677 insn->imm = 16;
678 break;
679
680 case SKF_AD_OFF + SKF_AD_PKTTYPE:
681 insn->code = BPF_LDX | BPF_MEM | BPF_B;
682 insn->a_reg = A_REG;
683 insn->x_reg = CTX_REG;
684 insn->off = pkt_type_offset();
685 if (insn->off < 0)
686 return false;
687 insn++;
688
689 insn->code = BPF_ALU | BPF_AND | BPF_K;
690 insn->a_reg = A_REG;
691 insn->imm = PKT_TYPE_MAX;
Alexei Starovoitov0dcceab2014-06-05 14:39:36 -0700692#ifdef __BIG_ENDIAN_BITFIELD
693 insn++;
694
695 insn->code = BPF_ALU | BPF_RSH | BPF_K;
696 insn->a_reg = A_REG;
697 insn->imm = 5;
698#endif
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100699 break;
700
701 case SKF_AD_OFF + SKF_AD_IFINDEX:
702 case SKF_AD_OFF + SKF_AD_HATYPE:
703 if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
704 insn->code = BPF_LDX | BPF_MEM | BPF_DW;
705 else
706 insn->code = BPF_LDX | BPF_MEM | BPF_W;
707 insn->a_reg = TMP_REG;
708 insn->x_reg = CTX_REG;
709 insn->off = offsetof(struct sk_buff, dev);
710 insn++;
711
712 insn->code = BPF_JMP | BPF_JNE | BPF_K;
713 insn->a_reg = TMP_REG;
714 insn->imm = 0;
715 insn->off = 1;
716 insn++;
717
718 insn->code = BPF_JMP | BPF_EXIT;
719 insn++;
720
721 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
722 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
723
724 insn->a_reg = A_REG;
725 insn->x_reg = TMP_REG;
726
727 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
728 insn->code = BPF_LDX | BPF_MEM | BPF_W;
729 insn->off = offsetof(struct net_device, ifindex);
730 } else {
731 insn->code = BPF_LDX | BPF_MEM | BPF_H;
732 insn->off = offsetof(struct net_device, type);
733 }
734 break;
735
736 case SKF_AD_OFF + SKF_AD_MARK:
737 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
738
739 insn->code = BPF_LDX | BPF_MEM | BPF_W;
740 insn->a_reg = A_REG;
741 insn->x_reg = CTX_REG;
742 insn->off = offsetof(struct sk_buff, mark);
743 break;
744
745 case SKF_AD_OFF + SKF_AD_RXHASH:
746 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
747
748 insn->code = BPF_LDX | BPF_MEM | BPF_W;
749 insn->a_reg = A_REG;
750 insn->x_reg = CTX_REG;
751 insn->off = offsetof(struct sk_buff, hash);
752 break;
753
754 case SKF_AD_OFF + SKF_AD_QUEUE:
755 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
756
757 insn->code = BPF_LDX | BPF_MEM | BPF_H;
758 insn->a_reg = A_REG;
759 insn->x_reg = CTX_REG;
760 insn->off = offsetof(struct sk_buff, queue_mapping);
761 break;
762
763 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
764 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
765 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
766
767 insn->code = BPF_LDX | BPF_MEM | BPF_H;
768 insn->a_reg = A_REG;
769 insn->x_reg = CTX_REG;
770 insn->off = offsetof(struct sk_buff, vlan_tci);
771 insn++;
772
773 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
774
775 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
776 insn->code = BPF_ALU | BPF_AND | BPF_K;
777 insn->a_reg = A_REG;
778 insn->imm = ~VLAN_TAG_PRESENT;
779 } else {
780 insn->code = BPF_ALU | BPF_RSH | BPF_K;
781 insn->a_reg = A_REG;
782 insn->imm = 12;
783 insn++;
784
785 insn->code = BPF_ALU | BPF_AND | BPF_K;
786 insn->a_reg = A_REG;
787 insn->imm = 1;
788 }
789 break;
790
791 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
792 case SKF_AD_OFF + SKF_AD_NLATTR:
793 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
794 case SKF_AD_OFF + SKF_AD_CPU:
795 /* arg1 = ctx */
796 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
797 insn->a_reg = ARG1_REG;
798 insn->x_reg = CTX_REG;
799 insn++;
800
801 /* arg2 = A */
802 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
803 insn->a_reg = ARG2_REG;
804 insn->x_reg = A_REG;
805 insn++;
806
807 /* arg3 = X */
808 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
809 insn->a_reg = ARG3_REG;
810 insn->x_reg = X_REG;
811 insn++;
812
813 /* Emit call(ctx, arg2=A, arg3=X) */
814 insn->code = BPF_JMP | BPF_CALL;
815 switch (fp->k) {
816 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
817 insn->imm = __skb_get_pay_offset - __bpf_call_base;
818 break;
819 case SKF_AD_OFF + SKF_AD_NLATTR:
820 insn->imm = __skb_get_nlattr - __bpf_call_base;
821 break;
822 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
823 insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
824 break;
825 case SKF_AD_OFF + SKF_AD_CPU:
826 insn->imm = __get_raw_cpu_id - __bpf_call_base;
827 break;
828 }
829 break;
830
831 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
832 insn->code = BPF_ALU | BPF_XOR | BPF_X;
833 insn->a_reg = A_REG;
834 insn->x_reg = X_REG;
835 break;
836
837 default:
838 /* This is just a dummy call to avoid letting the compiler
839 * evict __bpf_call_base() as an optimization. Placed here
840 * where no-one bothers.
841 */
842 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
843 return false;
844 }
845
846 *insnp = insn;
847 return true;
848}
849
850/**
851 * sk_convert_filter - convert filter program
852 * @prog: the user passed filter program
853 * @len: the length of the user passed filter program
854 * @new_prog: buffer where converted program will be stored
855 * @new_len: pointer to store length of converted program
856 *
857 * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
858 * Conversion workflow:
859 *
860 * 1) First pass for calculating the new program length:
861 * sk_convert_filter(old_prog, old_len, NULL, &new_len)
862 *
863 * 2) 2nd pass to remap in two passes: 1st pass finds new
864 * jump offsets, 2nd pass remapping:
865 * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
866 * sk_convert_filter(old_prog, old_len, new_prog, &new_len);
867 *
868 * User BPF's register A is mapped to our BPF register 6, user BPF
869 * register X is mapped to BPF register 7; frame pointer is always
870 * register 10; Context 'void *ctx' is stored in register 1, that is,
871 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
872 * ctx == 'struct seccomp_data *'.
873 */
874int sk_convert_filter(struct sock_filter *prog, int len,
875 struct sock_filter_int *new_prog, int *new_len)
876{
877 int new_flen = 0, pass = 0, target, i;
878 struct sock_filter_int *new_insn;
879 struct sock_filter *fp;
880 int *addrs = NULL;
881 u8 bpf_src;
882
883 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
884 BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
885
886 if (len <= 0 || len >= BPF_MAXINSNS)
887 return -EINVAL;
888
889 if (new_prog) {
890 addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL);
891 if (!addrs)
892 return -ENOMEM;
893 }
894
895do_pass:
896 new_insn = new_prog;
897 fp = prog;
898
899 if (new_insn) {
900 new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
901 new_insn->a_reg = CTX_REG;
902 new_insn->x_reg = ARG1_REG;
903 }
904 new_insn++;
905
906 for (i = 0; i < len; fp++, i++) {
907 struct sock_filter_int tmp_insns[6] = { };
908 struct sock_filter_int *insn = tmp_insns;
909
910 if (addrs)
911 addrs[i] = new_insn - new_prog;
912
913 switch (fp->code) {
914 /* All arithmetic insns and skb loads map as-is. */
915 case BPF_ALU | BPF_ADD | BPF_X:
916 case BPF_ALU | BPF_ADD | BPF_K:
917 case BPF_ALU | BPF_SUB | BPF_X:
918 case BPF_ALU | BPF_SUB | BPF_K:
919 case BPF_ALU | BPF_AND | BPF_X:
920 case BPF_ALU | BPF_AND | BPF_K:
921 case BPF_ALU | BPF_OR | BPF_X:
922 case BPF_ALU | BPF_OR | BPF_K:
923 case BPF_ALU | BPF_LSH | BPF_X:
924 case BPF_ALU | BPF_LSH | BPF_K:
925 case BPF_ALU | BPF_RSH | BPF_X:
926 case BPF_ALU | BPF_RSH | BPF_K:
927 case BPF_ALU | BPF_XOR | BPF_X:
928 case BPF_ALU | BPF_XOR | BPF_K:
929 case BPF_ALU | BPF_MUL | BPF_X:
930 case BPF_ALU | BPF_MUL | BPF_K:
931 case BPF_ALU | BPF_DIV | BPF_X:
932 case BPF_ALU | BPF_DIV | BPF_K:
933 case BPF_ALU | BPF_MOD | BPF_X:
934 case BPF_ALU | BPF_MOD | BPF_K:
935 case BPF_ALU | BPF_NEG:
936 case BPF_LD | BPF_ABS | BPF_W:
937 case BPF_LD | BPF_ABS | BPF_H:
938 case BPF_LD | BPF_ABS | BPF_B:
939 case BPF_LD | BPF_IND | BPF_W:
940 case BPF_LD | BPF_IND | BPF_H:
941 case BPF_LD | BPF_IND | BPF_B:
942 /* Check for overloaded BPF extension and
943 * directly convert it if found, otherwise
944 * just move on with mapping.
945 */
946 if (BPF_CLASS(fp->code) == BPF_LD &&
947 BPF_MODE(fp->code) == BPF_ABS &&
948 convert_bpf_extensions(fp, &insn))
949 break;
950
951 insn->code = fp->code;
952 insn->a_reg = A_REG;
953 insn->x_reg = X_REG;
954 insn->imm = fp->k;
955 break;
956
957 /* Jump opcodes map as-is, but offsets need adjustment. */
958 case BPF_JMP | BPF_JA:
959 target = i + fp->k + 1;
960 insn->code = fp->code;
961#define EMIT_JMP \
962 do { \
963 if (target >= len || target < 0) \
964 goto err; \
965 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
966 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
967 insn->off -= insn - tmp_insns; \
968 } while (0)
969
970 EMIT_JMP;
971 break;
972
973 case BPF_JMP | BPF_JEQ | BPF_K:
974 case BPF_JMP | BPF_JEQ | BPF_X:
975 case BPF_JMP | BPF_JSET | BPF_K:
976 case BPF_JMP | BPF_JSET | BPF_X:
977 case BPF_JMP | BPF_JGT | BPF_K:
978 case BPF_JMP | BPF_JGT | BPF_X:
979 case BPF_JMP | BPF_JGE | BPF_K:
980 case BPF_JMP | BPF_JGE | BPF_X:
981 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
982 /* BPF immediates are signed, zero extend
983 * immediate into tmp register and use it
984 * in compare insn.
985 */
986 insn->code = BPF_ALU | BPF_MOV | BPF_K;
987 insn->a_reg = TMP_REG;
988 insn->imm = fp->k;
989 insn++;
990
991 insn->a_reg = A_REG;
992 insn->x_reg = TMP_REG;
993 bpf_src = BPF_X;
994 } else {
995 insn->a_reg = A_REG;
996 insn->x_reg = X_REG;
997 insn->imm = fp->k;
998 bpf_src = BPF_SRC(fp->code);
999 }
1000
1001 /* Common case where 'jump_false' is next insn. */
1002 if (fp->jf == 0) {
1003 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
1004 target = i + fp->jt + 1;
1005 EMIT_JMP;
1006 break;
1007 }
1008
1009 /* Convert JEQ into JNE when 'jump_true' is next insn. */
1010 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
1011 insn->code = BPF_JMP | BPF_JNE | bpf_src;
1012 target = i + fp->jf + 1;
1013 EMIT_JMP;
1014 break;
1015 }
1016
1017 /* Other jumps are mapped into two insns: Jxx and JA. */
1018 target = i + fp->jt + 1;
1019 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
1020 EMIT_JMP;
1021 insn++;
1022
1023 insn->code = BPF_JMP | BPF_JA;
1024 target = i + fp->jf + 1;
1025 EMIT_JMP;
1026 break;
1027
1028 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
1029 case BPF_LDX | BPF_MSH | BPF_B:
1030 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1031 insn->a_reg = TMP_REG;
1032 insn->x_reg = A_REG;
1033 insn++;
1034
1035 insn->code = BPF_LD | BPF_ABS | BPF_B;
1036 insn->a_reg = A_REG;
1037 insn->imm = fp->k;
1038 insn++;
1039
1040 insn->code = BPF_ALU | BPF_AND | BPF_K;
1041 insn->a_reg = A_REG;
1042 insn->imm = 0xf;
1043 insn++;
1044
1045 insn->code = BPF_ALU | BPF_LSH | BPF_K;
1046 insn->a_reg = A_REG;
1047 insn->imm = 2;
1048 insn++;
1049
1050 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1051 insn->a_reg = X_REG;
1052 insn->x_reg = A_REG;
1053 insn++;
1054
1055 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1056 insn->a_reg = A_REG;
1057 insn->x_reg = TMP_REG;
1058 break;
1059
1060 /* RET_K, RET_A are remaped into 2 insns. */
1061 case BPF_RET | BPF_A:
1062 case BPF_RET | BPF_K:
1063 insn->code = BPF_ALU | BPF_MOV |
1064 (BPF_RVAL(fp->code) == BPF_K ?
1065 BPF_K : BPF_X);
1066 insn->a_reg = 0;
1067 insn->x_reg = A_REG;
1068 insn->imm = fp->k;
1069 insn++;
1070
1071 insn->code = BPF_JMP | BPF_EXIT;
1072 break;
1073
1074 /* Store to stack. */
1075 case BPF_ST:
1076 case BPF_STX:
1077 insn->code = BPF_STX | BPF_MEM | BPF_W;
1078 insn->a_reg = FP_REG;
1079 insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
1080 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1081 break;
1082
1083 /* Load from stack. */
1084 case BPF_LD | BPF_MEM:
1085 case BPF_LDX | BPF_MEM:
1086 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1087 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
1088 A_REG : X_REG;
1089 insn->x_reg = FP_REG;
1090 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1091 break;
1092
1093 /* A = K or X = K */
1094 case BPF_LD | BPF_IMM:
1095 case BPF_LDX | BPF_IMM:
1096 insn->code = BPF_ALU | BPF_MOV | BPF_K;
1097 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
1098 A_REG : X_REG;
1099 insn->imm = fp->k;
1100 break;
1101
1102 /* X = A */
1103 case BPF_MISC | BPF_TAX:
1104 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1105 insn->a_reg = X_REG;
1106 insn->x_reg = A_REG;
1107 break;
1108
1109 /* A = X */
1110 case BPF_MISC | BPF_TXA:
1111 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1112 insn->a_reg = A_REG;
1113 insn->x_reg = X_REG;
1114 break;
1115
1116 /* A = skb->len or X = skb->len */
1117 case BPF_LD | BPF_W | BPF_LEN:
1118 case BPF_LDX | BPF_W | BPF_LEN:
1119 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1120 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
1121 A_REG : X_REG;
1122 insn->x_reg = CTX_REG;
1123 insn->off = offsetof(struct sk_buff, len);
1124 break;
1125
1126 /* access seccomp_data fields */
1127 case BPF_LDX | BPF_ABS | BPF_W:
1128 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1129 insn->a_reg = A_REG;
1130 insn->x_reg = CTX_REG;
1131 insn->off = fp->k;
1132 break;
1133
1134 default:
1135 goto err;
1136 }
1137
1138 insn++;
1139 if (new_prog)
1140 memcpy(new_insn, tmp_insns,
1141 sizeof(*insn) * (insn - tmp_insns));
1142
1143 new_insn += insn - tmp_insns;
1144 }
1145
1146 if (!new_prog) {
1147 /* Only calculating new length. */
1148 *new_len = new_insn - new_prog;
1149 return 0;
1150 }
1151
1152 pass++;
1153 if (new_flen != new_insn - new_prog) {
1154 new_flen = new_insn - new_prog;
1155 if (pass > 2)
1156 goto err;
1157
1158 goto do_pass;
1159 }
1160
1161 kfree(addrs);
1162 BUG_ON(*new_len != new_flen);
1163 return 0;
1164err:
1165 kfree(addrs);
1166 return -EINVAL;
1167}
1168
1169/* Security:
1170 *
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001171 * A BPF program is able to use 16 cells of memory to store intermediate
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001172 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
1173 *
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001174 * As we dont want to clear mem[] array for each packet going through
1175 * sk_run_filter(), we check that filter loaded by user never try to read
1176 * a cell if not previously written, and we check all branches to be sure
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001177 * a malicious user doesn't try to abuse us.
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001178 */
1179static int check_load_and_stores(struct sock_filter *filter, int flen)
1180{
1181 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
1182 int pc, ret = 0;
1183
1184 BUILD_BUG_ON(BPF_MEMWORDS > 16);
1185 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
1186 if (!masks)
1187 return -ENOMEM;
1188 memset(masks, 0xff, flen * sizeof(*masks));
1189
1190 for (pc = 0; pc < flen; pc++) {
1191 memvalid &= masks[pc];
1192
1193 switch (filter[pc].code) {
1194 case BPF_S_ST:
1195 case BPF_S_STX:
1196 memvalid |= (1 << filter[pc].k);
1197 break;
1198 case BPF_S_LD_MEM:
1199 case BPF_S_LDX_MEM:
1200 if (!(memvalid & (1 << filter[pc].k))) {
1201 ret = -EINVAL;
1202 goto error;
1203 }
1204 break;
1205 case BPF_S_JMP_JA:
1206 /* a jump must set masks on target */
1207 masks[pc + 1 + filter[pc].k] &= memvalid;
1208 memvalid = ~0;
1209 break;
1210 case BPF_S_JMP_JEQ_K:
1211 case BPF_S_JMP_JEQ_X:
1212 case BPF_S_JMP_JGE_K:
1213 case BPF_S_JMP_JGE_X:
1214 case BPF_S_JMP_JGT_K:
1215 case BPF_S_JMP_JGT_X:
1216 case BPF_S_JMP_JSET_X:
1217 case BPF_S_JMP_JSET_K:
1218 /* a jump must set masks on targets */
1219 masks[pc + 1 + filter[pc].jt] &= memvalid;
1220 masks[pc + 1 + filter[pc].jf] &= memvalid;
1221 memvalid = ~0;
1222 break;
1223 }
1224 }
1225error:
1226 kfree(masks);
1227 return ret;
1228}
1229
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230/**
1231 * sk_chk_filter - verify socket filter code
1232 * @filter: filter to verify
1233 * @flen: length of filter
1234 *
1235 * Check the user's filter code. If we let some ugly
1236 * filter code slip through kaboom! The filter must contain
Kris Katterjohn93699862006-01-04 13:58:36 -08001237 * no references or jumps that are out of range, no illegal
1238 * instructions, and must end with a RET instruction.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 *
Kris Katterjohn7b11f692006-01-13 14:33:06 -08001240 * All jumps are forward as they are not signed.
1241 *
1242 * Returns 0 if the rule set is legal or -EINVAL if not.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 */
Dan Carpenter4f25af22011-10-17 21:04:20 +00001244int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245{
Tetsuo Handacba328f2010-11-16 15:19:51 +00001246 /*
1247 * Valid instructions are initialized to non-0.
1248 * Invalid instructions are initialized to 0.
1249 */
1250 static const u8 codes[] = {
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001251 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
1252 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
1253 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
1254 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
1255 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
1256 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
1257 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
Eric Dumazetb6069a92012-09-07 22:03:35 +00001258 [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
1259 [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001260 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
1261 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
1262 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
1263 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
Daniel Borkmann9e49e882012-09-24 02:23:59 +00001264 [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
1265 [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001266 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
1267 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
1268 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
1269 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
1270 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
1271 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
1272 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
1273 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
1274 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
1275 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
1276 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
1277 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
1278 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
1279 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
1280 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
1281 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
1282 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
1283 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
1284 [BPF_RET|BPF_K] = BPF_S_RET_K,
1285 [BPF_RET|BPF_A] = BPF_S_RET_A,
1286 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
1287 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
1288 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
1289 [BPF_ST] = BPF_S_ST,
1290 [BPF_STX] = BPF_S_STX,
1291 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
1292 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
1293 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
1294 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
1295 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
1296 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
1297 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
1298 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
1299 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
Tetsuo Handacba328f2010-11-16 15:19:51 +00001300 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 int pc;
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001302 bool anc_found;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
David S. Miller1b93ae642005-12-27 13:57:59 -08001304 if (flen == 0 || flen > BPF_MAXINSNS)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 return -EINVAL;
1306
1307 /* check the filter code now */
1308 for (pc = 0; pc < flen; pc++) {
Tetsuo Handacba328f2010-11-16 15:19:51 +00001309 struct sock_filter *ftest = &filter[pc];
1310 u16 code = ftest->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311
Tetsuo Handacba328f2010-11-16 15:19:51 +00001312 if (code >= ARRAY_SIZE(codes))
1313 return -EINVAL;
1314 code = codes[code];
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001315 if (!code)
Tetsuo Handacba328f2010-11-16 15:19:51 +00001316 return -EINVAL;
Kris Katterjohn93699862006-01-04 13:58:36 -08001317 /* Some instructions need special checks */
Tetsuo Handacba328f2010-11-16 15:19:51 +00001318 switch (code) {
1319 case BPF_S_ALU_DIV_K:
Eric Dumazetb6069a92012-09-07 22:03:35 +00001320 case BPF_S_ALU_MOD_K:
1321 /* check for division by zero */
1322 if (ftest->k == 0)
1323 return -EINVAL;
1324 break;
Tetsuo Handacba328f2010-11-16 15:19:51 +00001325 case BPF_S_LD_MEM:
1326 case BPF_S_LDX_MEM:
1327 case BPF_S_ST:
1328 case BPF_S_STX:
1329 /* check for invalid memory addresses */
Kris Katterjohn93699862006-01-04 13:58:36 -08001330 if (ftest->k >= BPF_MEMWORDS)
1331 return -EINVAL;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001332 break;
Tetsuo Handacba328f2010-11-16 15:19:51 +00001333 case BPF_S_JMP_JA:
Kris Katterjohn93699862006-01-04 13:58:36 -08001334 /*
1335 * Note, the large ftest->k might cause loops.
1336 * Compare this with conditional jumps below,
1337 * where offsets are limited. --ANK (981016)
1338 */
Eric Dumazet95c96172012-04-15 05:58:06 +00001339 if (ftest->k >= (unsigned int)(flen-pc-1))
Kris Katterjohn93699862006-01-04 13:58:36 -08001340 return -EINVAL;
1341 break;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001342 case BPF_S_JMP_JEQ_K:
1343 case BPF_S_JMP_JEQ_X:
1344 case BPF_S_JMP_JGE_K:
1345 case BPF_S_JMP_JGE_X:
1346 case BPF_S_JMP_JGT_K:
1347 case BPF_S_JMP_JGT_X:
1348 case BPF_S_JMP_JSET_X:
1349 case BPF_S_JMP_JSET_K:
Tetsuo Handacba328f2010-11-16 15:19:51 +00001350 /* for conditionals both must be safe */
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001351 if (pc + ftest->jt + 1 >= flen ||
1352 pc + ftest->jf + 1 >= flen)
1353 return -EINVAL;
Tetsuo Handacba328f2010-11-16 15:19:51 +00001354 break;
Eric Dumazet12b16da2010-12-15 19:45:28 +00001355 case BPF_S_LD_W_ABS:
1356 case BPF_S_LD_H_ABS:
1357 case BPF_S_LD_B_ABS:
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001358 anc_found = false;
Eric Dumazet12b16da2010-12-15 19:45:28 +00001359#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
1360 code = BPF_S_ANC_##CODE; \
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001361 anc_found = true; \
Eric Dumazet12b16da2010-12-15 19:45:28 +00001362 break
1363 switch (ftest->k) {
1364 ANCILLARY(PROTOCOL);
1365 ANCILLARY(PKTTYPE);
1366 ANCILLARY(IFINDEX);
1367 ANCILLARY(NLATTR);
1368 ANCILLARY(NLATTR_NEST);
1369 ANCILLARY(MARK);
1370 ANCILLARY(QUEUE);
1371 ANCILLARY(HATYPE);
1372 ANCILLARY(RXHASH);
1373 ANCILLARY(CPU);
Jiri Pirkoffe06c12012-03-31 11:01:20 +00001374 ANCILLARY(ALU_XOR_X);
Eric Dumazetf3335032012-10-27 02:26:17 +00001375 ANCILLARY(VLAN_TAG);
1376 ANCILLARY(VLAN_TAG_PRESENT);
Daniel Borkmann3e5289d2013-03-19 06:39:31 +00001377 ANCILLARY(PAY_OFFSET);
Eric Dumazet12b16da2010-12-15 19:45:28 +00001378 }
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001379
1380 /* ancillary operation unknown or unsupported */
1381 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1382 return -EINVAL;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001383 }
Tetsuo Handacba328f2010-11-16 15:19:51 +00001384 ftest->code = code;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 }
1386
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001387 /* last instruction must be a RET code */
1388 switch (filter[flen - 1].code) {
1389 case BPF_S_RET_K:
1390 case BPF_S_RET_A:
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001391 return check_load_and_stores(filter, flen);
Tetsuo Handacba328f2010-11-16 15:19:51 +00001392 }
1393 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394}
Stephen Hemmingerb7156312008-04-10 01:33:47 -07001395EXPORT_SYMBOL(sk_chk_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001397static int sk_store_orig_filter(struct sk_filter *fp,
1398 const struct sock_fprog *fprog)
1399{
1400 unsigned int fsize = sk_filter_proglen(fprog);
1401 struct sock_fprog_kern *fkprog;
1402
1403 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1404 if (!fp->orig_prog)
1405 return -ENOMEM;
1406
1407 fkprog = fp->orig_prog;
1408 fkprog->len = fprog->len;
1409 fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
1410 if (!fkprog->filter) {
1411 kfree(fp->orig_prog);
1412 return -ENOMEM;
1413 }
1414
1415 return 0;
1416}
1417
1418static void sk_release_orig_filter(struct sk_filter *fp)
1419{
1420 struct sock_fprog_kern *fprog = fp->orig_prog;
1421
1422 if (fprog) {
1423 kfree(fprog->filter);
1424 kfree(fprog);
1425 }
1426}
1427
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428/**
Eric Dumazet46bcf142010-12-06 09:29:43 -08001429 * sk_filter_release_rcu - Release a socket filter by rcu_head
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001430 * @rcu: rcu_head that contains the sk_filter to free
1431 */
Daniel Borkmannfbc907f2014-03-28 18:58:20 +01001432static void sk_filter_release_rcu(struct rcu_head *rcu)
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001433{
1434 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1435
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001436 sk_release_orig_filter(fp);
Eric Dumazet0a148422011-04-20 09:27:32 +00001437 bpf_jit_free(fp);
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001438}
Daniel Borkmannfbc907f2014-03-28 18:58:20 +01001439
1440/**
1441 * sk_filter_release - release a socket filter
1442 * @fp: filter to remove
1443 *
1444 * Remove a filter from a socket and release its resources.
1445 */
1446static void sk_filter_release(struct sk_filter *fp)
1447{
1448 if (atomic_dec_and_test(&fp->refcnt))
1449 call_rcu(&fp->rcu, sk_filter_release_rcu);
1450}
1451
1452void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1453{
1454 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1455 sk_filter_release(fp);
1456}
1457
1458void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1459{
1460 atomic_inc(&fp->refcnt);
1461 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1462}
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001463
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001464static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
1465 struct sock *sk,
1466 unsigned int len)
1467{
1468 struct sk_filter *fp_new;
1469
1470 if (sk == NULL)
1471 return krealloc(fp, len, GFP_KERNEL);
1472
1473 fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
1474 if (fp_new) {
1475 memcpy(fp_new, fp, sizeof(struct sk_filter));
1476 /* As we're kepping orig_prog in fp_new along,
1477 * we need to make sure we're not evicting it
1478 * from the old fp.
1479 */
1480 fp->orig_prog = NULL;
1481 sk_filter_uncharge(sk, fp);
1482 }
1483
1484 return fp_new;
1485}
1486
1487static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1488 struct sock *sk)
1489{
1490 struct sock_filter *old_prog;
1491 struct sk_filter *old_fp;
1492 int i, err, new_len, old_len = fp->len;
1493
1494 /* We are free to overwrite insns et al right here as it
1495 * won't be used at this point in time anymore internally
1496 * after the migration to the internal BPF instruction
1497 * representation.
1498 */
1499 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1500 sizeof(struct sock_filter_int));
1501
1502 /* For now, we need to unfiddle BPF_S_* identifiers in place.
1503 * This can sooner or later on be subject to removal, e.g. when
1504 * JITs have been converted.
1505 */
1506 for (i = 0; i < fp->len; i++)
1507 sk_decode_filter(&fp->insns[i], &fp->insns[i]);
1508
1509 /* Conversion cannot happen on overlapping memory areas,
1510 * so we need to keep the user BPF around until the 2nd
1511 * pass. At this time, the user BPF is stored in fp->insns.
1512 */
1513 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1514 GFP_KERNEL);
1515 if (!old_prog) {
1516 err = -ENOMEM;
1517 goto out_err;
1518 }
1519
1520 /* 1st pass: calculate the new program length. */
1521 err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
1522 if (err)
1523 goto out_err_free;
1524
1525 /* Expand fp for appending the new filter representation. */
1526 old_fp = fp;
1527 fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
1528 if (!fp) {
1529 /* The old_fp is still around in case we couldn't
1530 * allocate new memory, so uncharge on that one.
1531 */
1532 fp = old_fp;
1533 err = -ENOMEM;
1534 goto out_err_free;
1535 }
1536
1537 fp->bpf_func = sk_run_filter_int_skb;
1538 fp->len = new_len;
1539
1540 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
1541 err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
1542 if (err)
1543 /* 2nd sk_convert_filter() can fail only if it fails
1544 * to allocate memory, remapping must succeed. Note,
1545 * that at this time old_fp has already been released
1546 * by __sk_migrate_realloc().
1547 */
1548 goto out_err_free;
1549
1550 kfree(old_prog);
1551 return fp;
1552
1553out_err_free:
1554 kfree(old_prog);
1555out_err:
1556 /* Rollback filter setup. */
1557 if (sk != NULL)
1558 sk_filter_uncharge(sk, fp);
1559 else
1560 kfree(fp);
1561 return ERR_PTR(err);
1562}
1563
1564static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1565 struct sock *sk)
Jiri Pirko302d6632012-03-31 11:01:19 +00001566{
1567 int err;
1568
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001569 fp->bpf_func = NULL;
Daniel Borkmannf8bbbfc2014-03-28 18:58:18 +01001570 fp->jited = 0;
Jiri Pirko302d6632012-03-31 11:01:19 +00001571
1572 err = sk_chk_filter(fp->insns, fp->len);
Leon Yu418c96a2014-06-01 05:37:25 +00001573 if (err) {
1574 if (sk != NULL)
1575 sk_filter_uncharge(sk, fp);
1576 else
1577 kfree(fp);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001578 return ERR_PTR(err);
Leon Yu418c96a2014-06-01 05:37:25 +00001579 }
Jiri Pirko302d6632012-03-31 11:01:19 +00001580
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001581 /* Probe if we can JIT compile the filter and if so, do
1582 * the compilation of the filter.
1583 */
Jiri Pirko302d6632012-03-31 11:01:19 +00001584 bpf_jit_compile(fp);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001585
1586 /* JIT compiler couldn't process this filter, so do the
1587 * internal BPF translation for the optimized interpreter.
1588 */
1589 if (!fp->jited)
1590 fp = __sk_migrate_filter(fp, sk);
1591
1592 return fp;
Jiri Pirko302d6632012-03-31 11:01:19 +00001593}
1594
1595/**
1596 * sk_unattached_filter_create - create an unattached filter
1597 * @fprog: the filter program
Randy Dunlapc6c4b972012-06-08 14:01:44 +00001598 * @pfp: the unattached filter that is created
Jiri Pirko302d6632012-03-31 11:01:19 +00001599 *
Randy Dunlapc6c4b972012-06-08 14:01:44 +00001600 * Create a filter independent of any socket. We first run some
Jiri Pirko302d6632012-03-31 11:01:19 +00001601 * sanity checks on it to make sure it does not explode on us later.
1602 * If an error occurs or there is insufficient memory for the filter
1603 * a negative errno code is returned. On success the return is zero.
1604 */
1605int sk_unattached_filter_create(struct sk_filter **pfp,
1606 struct sock_fprog *fprog)
1607{
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001608 unsigned int fsize = sk_filter_proglen(fprog);
Jiri Pirko302d6632012-03-31 11:01:19 +00001609 struct sk_filter *fp;
Jiri Pirko302d6632012-03-31 11:01:19 +00001610
1611 /* Make sure new filter is there and in the right amounts. */
1612 if (fprog->filter == NULL)
1613 return -EINVAL;
1614
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001615 fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
Jiri Pirko302d6632012-03-31 11:01:19 +00001616 if (!fp)
1617 return -ENOMEM;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001618
Jiri Pirko302d6632012-03-31 11:01:19 +00001619 memcpy(fp->insns, fprog->filter, fsize);
1620
1621 atomic_set(&fp->refcnt, 1);
1622 fp->len = fprog->len;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001623 /* Since unattached filters are not copied back to user
1624 * space through sk_get_filter(), we do not need to hold
1625 * a copy here, and can spare us the work.
1626 */
1627 fp->orig_prog = NULL;
Jiri Pirko302d6632012-03-31 11:01:19 +00001628
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001629 /* __sk_prepare_filter() already takes care of uncharging
1630 * memory in case something goes wrong.
1631 */
1632 fp = __sk_prepare_filter(fp, NULL);
1633 if (IS_ERR(fp))
1634 return PTR_ERR(fp);
Jiri Pirko302d6632012-03-31 11:01:19 +00001635
1636 *pfp = fp;
1637 return 0;
Jiri Pirko302d6632012-03-31 11:01:19 +00001638}
1639EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
1640
1641void sk_unattached_filter_destroy(struct sk_filter *fp)
1642{
1643 sk_filter_release(fp);
1644}
1645EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
1646
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001647/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 * sk_attach_filter - attach a socket filter
1649 * @fprog: the filter program
1650 * @sk: the socket to use
1651 *
1652 * Attach the user's filter code. We first run some sanity checks on
1653 * it to make sure it does not explode on us later. If an error
1654 * occurs or there is insufficient memory for the filter a negative
1655 * errno code is returned. On success the return is zero.
1656 */
1657int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1658{
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001659 struct sk_filter *fp, *old_fp;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001660 unsigned int fsize = sk_filter_proglen(fprog);
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001661 unsigned int sk_fsize = sk_filter_size(fprog->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 int err;
1663
Vincent Bernatd59577b2013-01-16 22:55:49 +01001664 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1665 return -EPERM;
1666
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 /* Make sure new filter is there and in the right amounts. */
Kris Katterjohne35bedf2006-01-17 02:25:52 -08001668 if (fprog->filter == NULL)
1669 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001671 fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 if (!fp)
1673 return -ENOMEM;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001674
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001676 sock_kfree_s(sk, fp, sk_fsize);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 return -EFAULT;
1678 }
1679
1680 atomic_set(&fp->refcnt, 1);
1681 fp->len = fprog->len;
1682
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001683 err = sk_store_orig_filter(fp, fprog);
1684 if (err) {
1685 sk_filter_uncharge(sk, fp);
1686 return -ENOMEM;
1687 }
1688
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001689 /* __sk_prepare_filter() already takes care of uncharging
1690 * memory in case something goes wrong.
1691 */
1692 fp = __sk_prepare_filter(fp, sk);
1693 if (IS_ERR(fp))
1694 return PTR_ERR(fp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695
Eric Dumazetf91ff5b2010-09-27 06:07:30 +00001696 old_fp = rcu_dereference_protected(sk->sk_filter,
1697 sock_owned_by_user(sk));
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001698 rcu_assign_pointer(sk->sk_filter, fp);
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001699
Olof Johansson9b013e02007-10-18 21:48:39 -07001700 if (old_fp)
Eric Dumazet46bcf142010-12-06 09:29:43 -08001701 sk_filter_uncharge(sk, old_fp);
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001702
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001703 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704}
Michael S. Tsirkin5ff3f072010-02-14 01:01:00 +00001705EXPORT_SYMBOL_GPL(sk_attach_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001707int sk_detach_filter(struct sock *sk)
1708{
1709 int ret = -ENOENT;
1710 struct sk_filter *filter;
1711
Vincent Bernatd59577b2013-01-16 22:55:49 +01001712 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1713 return -EPERM;
1714
Eric Dumazetf91ff5b2010-09-27 06:07:30 +00001715 filter = rcu_dereference_protected(sk->sk_filter,
1716 sock_owned_by_user(sk));
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001717 if (filter) {
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001718 RCU_INIT_POINTER(sk->sk_filter, NULL);
Eric Dumazet46bcf142010-12-06 09:29:43 -08001719 sk_filter_uncharge(sk, filter);
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001720 ret = 0;
1721 }
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001722
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001723 return ret;
1724}
Michael S. Tsirkin5ff3f072010-02-14 01:01:00 +00001725EXPORT_SYMBOL_GPL(sk_detach_filter);
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001726
Nicolas Dichteled139982013-06-05 15:30:55 +02001727void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001728{
1729 static const u16 decodes[] = {
1730 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
1731 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
1732 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
1733 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
1734 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
1735 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
1736 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
1737 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
1738 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
1739 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
1740 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
1741 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
1742 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
1743 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
1744 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
1745 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
1746 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
1747 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
1748 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
1749 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
1750 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
1751 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
1752 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
1753 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
1754 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
1755 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
1756 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
1757 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
1758 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
1759 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
1760 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
1761 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
1762 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
1763 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001764 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
1765 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
Daniel Borkmann3e5289d2013-03-19 06:39:31 +00001766 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001767 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
1768 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
1769 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
1770 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
1771 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
1772 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
1773 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
1774 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
1775 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
1776 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
1777 [BPF_S_RET_K] = BPF_RET|BPF_K,
1778 [BPF_S_RET_A] = BPF_RET|BPF_A,
1779 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
1780 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
1781 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
1782 [BPF_S_ST] = BPF_ST,
1783 [BPF_S_STX] = BPF_STX,
1784 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
1785 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
1786 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
1787 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
1788 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
1789 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
1790 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
1791 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
1792 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
1793 };
1794 u16 code;
1795
1796 code = filt->code;
1797
1798 to->code = decodes[code];
1799 to->jt = filt->jt;
1800 to->jf = filt->jf;
Eric Dumazetaee636c2014-01-15 06:50:07 -08001801 to->k = filt->k;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001802}
1803
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001804int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1805 unsigned int len)
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001806{
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001807 struct sock_fprog_kern *fprog;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001808 struct sk_filter *filter;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001809 int ret = 0;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001810
1811 lock_sock(sk);
1812 filter = rcu_dereference_protected(sk->sk_filter,
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001813 sock_owned_by_user(sk));
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001814 if (!filter)
1815 goto out;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001816
1817 /* We're copying the filter that has been originally attached,
1818 * so no conversion/decode needed anymore.
1819 */
1820 fprog = filter->orig_prog;
1821
1822 ret = fprog->len;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001823 if (!len)
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001824 /* User space only enquires number of filter blocks. */
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001825 goto out;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001826
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001827 ret = -EINVAL;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001828 if (len < fprog->len)
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001829 goto out;
1830
1831 ret = -EFAULT;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001832 if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog)))
1833 goto out;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001834
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001835 /* Instead of bytes, the API requests to return the number
1836 * of filter blocks.
1837 */
1838 ret = fprog->len;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001839out:
1840 release_sock(sk);
1841 return ret;
1842}