blob: 78a636e60a0b182061358a068c2c2ed4d36051cb [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01004 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 *
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01007 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
Kris Katterjohn93699862006-01-04 13:58:36 -080021 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 */
23
24#include <linux/module.h>
25#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include <linux/mm.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
29#include <linux/in.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/if_packet.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090033#include <linux/gfp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <net/ip.h>
35#include <net/protocol.h>
Patrick McHardy4738c1d2008-04-10 02:02:28 -070036#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/skbuff.h>
38#include <net/sock.h>
39#include <linux/errno.h>
40#include <linux/timer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <asm/uaccess.h>
Dmitry Mishin40daafc2006-04-18 14:50:10 -070042#include <asm/unaligned.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <linux/filter.h>
David S. Miller86e4ca62011-05-26 15:00:31 -040044#include <linux/ratelimit.h>
Will Drewry46b325c2012-04-12 16:47:52 -050045#include <linux/seccomp.h>
Eric Dumazetf3335032012-10-27 02:26:17 +000046#include <linux/if_vlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Jan Seiffertf03fb3f2012-03-30 05:08:19 +000048/* No hurry in this branch
49 *
50 * Exported for the bpf jit load helper.
51 */
52void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
Linus Torvalds1da177e2005-04-16 15:20:36 -070053{
54 u8 *ptr = NULL;
55
56 if (k >= SKF_NET_OFF)
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -070057 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 else if (k >= SKF_LL_OFF)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -070059 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Eric Dumazet4bc65dd2010-12-07 22:26:15 +000061 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 return ptr;
63 return NULL;
64}
65
Eric Dumazet62ab0812010-12-06 20:50:09 +000066static inline void *load_pointer(const struct sk_buff *skb, int k,
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +090067 unsigned int size, void *buffer)
Patrick McHardy0b05b2a2005-07-05 14:10:21 -070068{
69 if (k >= 0)
70 return skb_header_pointer(skb, k, size, buffer);
Jan Seiffertf03fb3f2012-03-30 05:08:19 +000071 return bpf_internal_load_pointer_neg_helper(skb, k, size);
Patrick McHardy0b05b2a2005-07-05 14:10:21 -070072}
73
Linus Torvalds1da177e2005-04-16 15:20:36 -070074/**
Stephen Hemminger43db6d62008-04-10 01:43:09 -070075 * sk_filter - run a packet through a socket filter
76 * @sk: sock associated with &sk_buff
77 * @skb: buffer to filter
Stephen Hemminger43db6d62008-04-10 01:43:09 -070078 *
79 * Run the filter code and then cut skb->data to correct size returned by
80 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
81 * than pkt_len we keep whole skb->data. This is the socket level
82 * wrapper to sk_run_filter. It returns 0 if the packet should
83 * be accepted or -EPERM if the packet should be tossed.
84 *
85 */
86int sk_filter(struct sock *sk, struct sk_buff *skb)
87{
88 int err;
89 struct sk_filter *filter;
90
Mel Gormanc93bdd02012-07-31 16:44:19 -070091 /*
92 * If the skb was allocated from pfmemalloc reserves, only
93 * allow SOCK_MEMALLOC sockets to use it as this socket is
94 * helping free memory
95 */
96 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
97 return -ENOMEM;
98
Stephen Hemminger43db6d62008-04-10 01:43:09 -070099 err = security_sock_rcv_skb(sk, skb);
100 if (err)
101 return err;
102
Eric Dumazet80f8f102011-01-18 07:46:52 +0000103 rcu_read_lock();
104 filter = rcu_dereference(sk->sk_filter);
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700105 if (filter) {
Eric Dumazet0a148422011-04-20 09:27:32 +0000106 unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
Eric Dumazet0d7da9d2010-10-25 03:47:05 +0000107
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700108 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
109 }
Eric Dumazet80f8f102011-01-18 07:46:52 +0000110 rcu_read_unlock();
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700111
112 return err;
113}
114EXPORT_SYMBOL(sk_filter);
115
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100116/* Base function for offset calculation. Needs to go into .text section,
117 * therefore keeping it non-static as well; will also be used by JITs
118 * anyway later on, so do not let the compiler omit it.
119 */
120noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
121{
122 return 0;
123}
124
Stephen Hemminger43db6d62008-04-10 01:43:09 -0700125/**
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100126 * __sk_run_filter - run a filter on a given context
127 * @ctx: buffer to run the filter on
Daniel Borkmann01d32f62014-04-01 19:38:01 +0200128 * @insn: filter to apply
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 *
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100130 * Decode and apply filter instructions to the skb->data. Return length to
Daniel Borkmann01d32f62014-04-01 19:38:01 +0200131 * keep, 0 for none. @ctx is the data we are operating on, @insn is the
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100132 * array of filter instructions.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 */
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100134unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135{
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100136 u64 stack[MAX_BPF_STACK / sizeof(u64)];
137 u64 regs[MAX_BPF_REG], tmp;
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700138 void *ptr;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100139 int off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100141#define K insn->imm
142#define A regs[insn->a_reg]
143#define X regs[insn->x_reg]
144#define R0 regs[0]
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900145
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100146#define CONT ({insn++; goto select_insn; })
147#define CONT_JMP ({insn++; goto select_insn; })
Patrick McHardy4738c1d2008-04-10 02:02:28 -0700148
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100149 static const void *jumptable[256] = {
150 [0 ... 255] = &&default_label,
151 /* Now overwrite non-defaults ... */
152#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C
153 DL(BPF_ALU, BPF_ADD, BPF_X),
154 DL(BPF_ALU, BPF_ADD, BPF_K),
155 DL(BPF_ALU, BPF_SUB, BPF_X),
156 DL(BPF_ALU, BPF_SUB, BPF_K),
157 DL(BPF_ALU, BPF_AND, BPF_X),
158 DL(BPF_ALU, BPF_AND, BPF_K),
159 DL(BPF_ALU, BPF_OR, BPF_X),
160 DL(BPF_ALU, BPF_OR, BPF_K),
161 DL(BPF_ALU, BPF_LSH, BPF_X),
162 DL(BPF_ALU, BPF_LSH, BPF_K),
163 DL(BPF_ALU, BPF_RSH, BPF_X),
164 DL(BPF_ALU, BPF_RSH, BPF_K),
165 DL(BPF_ALU, BPF_XOR, BPF_X),
166 DL(BPF_ALU, BPF_XOR, BPF_K),
167 DL(BPF_ALU, BPF_MUL, BPF_X),
168 DL(BPF_ALU, BPF_MUL, BPF_K),
169 DL(BPF_ALU, BPF_MOV, BPF_X),
170 DL(BPF_ALU, BPF_MOV, BPF_K),
171 DL(BPF_ALU, BPF_DIV, BPF_X),
172 DL(BPF_ALU, BPF_DIV, BPF_K),
173 DL(BPF_ALU, BPF_MOD, BPF_X),
174 DL(BPF_ALU, BPF_MOD, BPF_K),
175 DL(BPF_ALU, BPF_NEG, 0),
176 DL(BPF_ALU, BPF_END, BPF_TO_BE),
177 DL(BPF_ALU, BPF_END, BPF_TO_LE),
178 DL(BPF_ALU64, BPF_ADD, BPF_X),
179 DL(BPF_ALU64, BPF_ADD, BPF_K),
180 DL(BPF_ALU64, BPF_SUB, BPF_X),
181 DL(BPF_ALU64, BPF_SUB, BPF_K),
182 DL(BPF_ALU64, BPF_AND, BPF_X),
183 DL(BPF_ALU64, BPF_AND, BPF_K),
184 DL(BPF_ALU64, BPF_OR, BPF_X),
185 DL(BPF_ALU64, BPF_OR, BPF_K),
186 DL(BPF_ALU64, BPF_LSH, BPF_X),
187 DL(BPF_ALU64, BPF_LSH, BPF_K),
188 DL(BPF_ALU64, BPF_RSH, BPF_X),
189 DL(BPF_ALU64, BPF_RSH, BPF_K),
190 DL(BPF_ALU64, BPF_XOR, BPF_X),
191 DL(BPF_ALU64, BPF_XOR, BPF_K),
192 DL(BPF_ALU64, BPF_MUL, BPF_X),
193 DL(BPF_ALU64, BPF_MUL, BPF_K),
194 DL(BPF_ALU64, BPF_MOV, BPF_X),
195 DL(BPF_ALU64, BPF_MOV, BPF_K),
196 DL(BPF_ALU64, BPF_ARSH, BPF_X),
197 DL(BPF_ALU64, BPF_ARSH, BPF_K),
198 DL(BPF_ALU64, BPF_DIV, BPF_X),
199 DL(BPF_ALU64, BPF_DIV, BPF_K),
200 DL(BPF_ALU64, BPF_MOD, BPF_X),
201 DL(BPF_ALU64, BPF_MOD, BPF_K),
202 DL(BPF_ALU64, BPF_NEG, 0),
203 DL(BPF_JMP, BPF_CALL, 0),
204 DL(BPF_JMP, BPF_JA, 0),
205 DL(BPF_JMP, BPF_JEQ, BPF_X),
206 DL(BPF_JMP, BPF_JEQ, BPF_K),
207 DL(BPF_JMP, BPF_JNE, BPF_X),
208 DL(BPF_JMP, BPF_JNE, BPF_K),
209 DL(BPF_JMP, BPF_JGT, BPF_X),
210 DL(BPF_JMP, BPF_JGT, BPF_K),
211 DL(BPF_JMP, BPF_JGE, BPF_X),
212 DL(BPF_JMP, BPF_JGE, BPF_K),
213 DL(BPF_JMP, BPF_JSGT, BPF_X),
214 DL(BPF_JMP, BPF_JSGT, BPF_K),
215 DL(BPF_JMP, BPF_JSGE, BPF_X),
216 DL(BPF_JMP, BPF_JSGE, BPF_K),
217 DL(BPF_JMP, BPF_JSET, BPF_X),
218 DL(BPF_JMP, BPF_JSET, BPF_K),
219 DL(BPF_JMP, BPF_EXIT, 0),
220 DL(BPF_STX, BPF_MEM, BPF_B),
221 DL(BPF_STX, BPF_MEM, BPF_H),
222 DL(BPF_STX, BPF_MEM, BPF_W),
223 DL(BPF_STX, BPF_MEM, BPF_DW),
224 DL(BPF_STX, BPF_XADD, BPF_W),
225 DL(BPF_STX, BPF_XADD, BPF_DW),
226 DL(BPF_ST, BPF_MEM, BPF_B),
227 DL(BPF_ST, BPF_MEM, BPF_H),
228 DL(BPF_ST, BPF_MEM, BPF_W),
229 DL(BPF_ST, BPF_MEM, BPF_DW),
230 DL(BPF_LDX, BPF_MEM, BPF_B),
231 DL(BPF_LDX, BPF_MEM, BPF_H),
232 DL(BPF_LDX, BPF_MEM, BPF_W),
233 DL(BPF_LDX, BPF_MEM, BPF_DW),
234 DL(BPF_LD, BPF_ABS, BPF_W),
235 DL(BPF_LD, BPF_ABS, BPF_H),
236 DL(BPF_LD, BPF_ABS, BPF_B),
237 DL(BPF_LD, BPF_IND, BPF_W),
238 DL(BPF_LD, BPF_IND, BPF_H),
239 DL(BPF_LD, BPF_IND, BPF_B),
240#undef DL
241 };
Patrick McHardy4738c1d2008-04-10 02:02:28 -0700242
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100243 regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
244 regs[ARG1_REG] = (u64) (unsigned long) ctx;
245
246select_insn:
247 goto *jumptable[insn->code];
248
249 /* ALU */
250#define ALU(OPCODE, OP) \
251 BPF_ALU64_##OPCODE##_BPF_X: \
252 A = A OP X; \
253 CONT; \
254 BPF_ALU_##OPCODE##_BPF_X: \
255 A = (u32) A OP (u32) X; \
256 CONT; \
257 BPF_ALU64_##OPCODE##_BPF_K: \
258 A = A OP K; \
259 CONT; \
260 BPF_ALU_##OPCODE##_BPF_K: \
261 A = (u32) A OP (u32) K; \
262 CONT;
263
264 ALU(BPF_ADD, +)
265 ALU(BPF_SUB, -)
266 ALU(BPF_AND, &)
267 ALU(BPF_OR, |)
268 ALU(BPF_LSH, <<)
269 ALU(BPF_RSH, >>)
270 ALU(BPF_XOR, ^)
271 ALU(BPF_MUL, *)
272#undef ALU
273 BPF_ALU_BPF_NEG_0:
274 A = (u32) -A;
275 CONT;
276 BPF_ALU64_BPF_NEG_0:
277 A = -A;
278 CONT;
279 BPF_ALU_BPF_MOV_BPF_X:
280 A = (u32) X;
281 CONT;
282 BPF_ALU_BPF_MOV_BPF_K:
283 A = (u32) K;
284 CONT;
285 BPF_ALU64_BPF_MOV_BPF_X:
286 A = X;
287 CONT;
288 BPF_ALU64_BPF_MOV_BPF_K:
289 A = K;
290 CONT;
291 BPF_ALU64_BPF_ARSH_BPF_X:
292 (*(s64 *) &A) >>= X;
293 CONT;
294 BPF_ALU64_BPF_ARSH_BPF_K:
295 (*(s64 *) &A) >>= K;
296 CONT;
297 BPF_ALU64_BPF_MOD_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200298 if (unlikely(X == 0))
299 return 0;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100300 tmp = A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200301 A = do_div(tmp, X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100302 CONT;
303 BPF_ALU_BPF_MOD_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200304 if (unlikely(X == 0))
305 return 0;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100306 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200307 A = do_div(tmp, (u32) X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100308 CONT;
309 BPF_ALU64_BPF_MOD_BPF_K:
310 tmp = A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200311 A = do_div(tmp, K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100312 CONT;
313 BPF_ALU_BPF_MOD_BPF_K:
314 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200315 A = do_div(tmp, (u32) K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100316 CONT;
317 BPF_ALU64_BPF_DIV_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200318 if (unlikely(X == 0))
319 return 0;
320 do_div(A, X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100321 CONT;
322 BPF_ALU_BPF_DIV_BPF_X:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200323 if (unlikely(X == 0))
324 return 0;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100325 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200326 do_div(tmp, (u32) X);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100327 A = (u32) tmp;
328 CONT;
329 BPF_ALU64_BPF_DIV_BPF_K:
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200330 do_div(A, K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100331 CONT;
332 BPF_ALU_BPF_DIV_BPF_K:
333 tmp = (u32) A;
Daniel Borkmann5f9fde52014-04-05 01:04:03 +0200334 do_div(tmp, (u32) K);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100335 A = (u32) tmp;
336 CONT;
337 BPF_ALU_BPF_END_BPF_TO_BE:
338 switch (K) {
339 case 16:
340 A = (__force u16) cpu_to_be16(A);
341 break;
342 case 32:
343 A = (__force u32) cpu_to_be32(A);
344 break;
345 case 64:
346 A = (__force u64) cpu_to_be64(A);
347 break;
Patrick McHardy4738c1d2008-04-10 02:02:28 -0700348 }
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100349 CONT;
350 BPF_ALU_BPF_END_BPF_TO_LE:
351 switch (K) {
352 case 16:
353 A = (__force u16) cpu_to_le16(A);
354 break;
355 case 32:
356 A = (__force u32) cpu_to_le32(A);
357 break;
358 case 64:
359 A = (__force u64) cpu_to_le64(A);
360 break;
Pablo Neira Ayusod214c752008-11-20 00:49:27 -0800361 }
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100362 CONT;
363
364 /* CALL */
365 BPF_JMP_BPF_CALL_0:
366 /* Function call scratches R1-R5 registers, preserves R6-R9,
367 * and stores return value into R0.
368 */
369 R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
370 regs[4], regs[5]);
371 CONT;
372
373 /* JMP */
374 BPF_JMP_BPF_JA_0:
375 insn += insn->off;
376 CONT;
377 BPF_JMP_BPF_JEQ_BPF_X:
378 if (A == X) {
379 insn += insn->off;
380 CONT_JMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 }
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100382 CONT;
383 BPF_JMP_BPF_JEQ_BPF_K:
384 if (A == K) {
385 insn += insn->off;
386 CONT_JMP;
387 }
388 CONT;
389 BPF_JMP_BPF_JNE_BPF_X:
390 if (A != X) {
391 insn += insn->off;
392 CONT_JMP;
393 }
394 CONT;
395 BPF_JMP_BPF_JNE_BPF_K:
396 if (A != K) {
397 insn += insn->off;
398 CONT_JMP;
399 }
400 CONT;
401 BPF_JMP_BPF_JGT_BPF_X:
402 if (A > X) {
403 insn += insn->off;
404 CONT_JMP;
405 }
406 CONT;
407 BPF_JMP_BPF_JGT_BPF_K:
408 if (A > K) {
409 insn += insn->off;
410 CONT_JMP;
411 }
412 CONT;
413 BPF_JMP_BPF_JGE_BPF_X:
414 if (A >= X) {
415 insn += insn->off;
416 CONT_JMP;
417 }
418 CONT;
419 BPF_JMP_BPF_JGE_BPF_K:
420 if (A >= K) {
421 insn += insn->off;
422 CONT_JMP;
423 }
424 CONT;
425 BPF_JMP_BPF_JSGT_BPF_X:
426 if (((s64)A) > ((s64)X)) {
427 insn += insn->off;
428 CONT_JMP;
429 }
430 CONT;
431 BPF_JMP_BPF_JSGT_BPF_K:
432 if (((s64)A) > ((s64)K)) {
433 insn += insn->off;
434 CONT_JMP;
435 }
436 CONT;
437 BPF_JMP_BPF_JSGE_BPF_X:
438 if (((s64)A) >= ((s64)X)) {
439 insn += insn->off;
440 CONT_JMP;
441 }
442 CONT;
443 BPF_JMP_BPF_JSGE_BPF_K:
444 if (((s64)A) >= ((s64)K)) {
445 insn += insn->off;
446 CONT_JMP;
447 }
448 CONT;
449 BPF_JMP_BPF_JSET_BPF_X:
450 if (A & X) {
451 insn += insn->off;
452 CONT_JMP;
453 }
454 CONT;
455 BPF_JMP_BPF_JSET_BPF_K:
456 if (A & K) {
457 insn += insn->off;
458 CONT_JMP;
459 }
460 CONT;
461 BPF_JMP_BPF_EXIT_0:
462 return R0;
463
464 /* STX and ST and LDX*/
465#define LDST(SIZEOP, SIZE) \
466 BPF_STX_BPF_MEM_##SIZEOP: \
467 *(SIZE *)(unsigned long) (A + insn->off) = X; \
468 CONT; \
469 BPF_ST_BPF_MEM_##SIZEOP: \
470 *(SIZE *)(unsigned long) (A + insn->off) = K; \
471 CONT; \
472 BPF_LDX_BPF_MEM_##SIZEOP: \
473 A = *(SIZE *)(unsigned long) (X + insn->off); \
474 CONT;
475
476 LDST(BPF_B, u8)
477 LDST(BPF_H, u16)
478 LDST(BPF_W, u32)
479 LDST(BPF_DW, u64)
480#undef LDST
481 BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
482 atomic_add((u32) X, (atomic_t *)(unsigned long)
483 (A + insn->off));
484 CONT;
485 BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
486 atomic64_add((u64) X, (atomic64_t *)(unsigned long)
487 (A + insn->off));
488 CONT;
489 BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
490 off = K;
491load_word:
492 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
493 * appearing in the programs where ctx == skb. All programs
494 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
495 * saves it in R6, internal BPF verifier will check that
496 * R6 == ctx.
497 *
498 * BPF_ABS and BPF_IND are wrappers of function calls, so
499 * they scratch R1-R5 registers, preserve R6-R9, and store
500 * return value into R0.
501 *
502 * Implicit input:
503 * ctx
504 *
505 * Explicit input:
506 * X == any register
507 * K == 32-bit immediate
508 *
509 * Output:
510 * R0 - 8/16/32-bit skb data converted to cpu endianness
511 */
512 ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
513 if (likely(ptr != NULL)) {
514 R0 = get_unaligned_be32(ptr);
515 CONT;
516 }
517 return 0;
518 BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
519 off = K;
520load_half:
521 ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
522 if (likely(ptr != NULL)) {
523 R0 = get_unaligned_be16(ptr);
524 CONT;
525 }
526 return 0;
527 BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
528 off = K;
529load_byte:
530 ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
531 if (likely(ptr != NULL)) {
532 R0 = *(u8 *)ptr;
533 CONT;
534 }
535 return 0;
536 BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
537 off = K + X;
538 goto load_word;
539 BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
540 off = K + X;
541 goto load_half;
542 BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
543 off = K + X;
544 goto load_byte;
545
546 default_label:
547 /* If we ever reach this, we have a bug somewhere. */
548 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
549 return 0;
550#undef CONT_JMP
551#undef CONT
552
553#undef R0
554#undef X
555#undef A
556#undef K
557}
558
559u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
560 const struct sock_filter_int *insni)
561 __attribute__ ((alias ("__sk_run_filter")));
562
563u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
564 const struct sock_filter_int *insni)
565 __attribute__ ((alias ("__sk_run_filter")));
566EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
567
568/* Helper to find the offset of pkt_type in sk_buff structure. We want
569 * to make sure its still a 3bit field starting at a byte boundary;
570 * taken from arch/x86/net/bpf_jit_comp.c.
571 */
572#define PKT_TYPE_MAX 7
573static unsigned int pkt_type_offset(void)
574{
575 struct sk_buff skb_probe = { .pkt_type = ~0, };
576 u8 *ct = (u8 *) &skb_probe;
577 unsigned int off;
578
579 for (off = 0; off < sizeof(struct sk_buff); off++) {
580 if (ct[off] == PKT_TYPE_MAX)
581 return off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 }
583
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100584 pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
585 return -1;
586}
587
588static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
589{
590 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
591
592 return __skb_get_poff(skb);
593}
594
595static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
596{
597 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
598 struct nlattr *nla;
599
600 if (skb_is_nonlinear(skb))
601 return 0;
602
Mathias Krause05ab8f22014-04-13 18:23:33 +0200603 if (skb->len < sizeof(struct nlattr))
604 return 0;
605
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100606 if (A > skb->len - sizeof(struct nlattr))
607 return 0;
608
609 nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
610 if (nla)
611 return (void *) nla - (void *) skb->data;
612
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 return 0;
614}
615
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100616static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
617{
618 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
619 struct nlattr *nla;
620
621 if (skb_is_nonlinear(skb))
622 return 0;
623
Mathias Krause05ab8f22014-04-13 18:23:33 +0200624 if (skb->len < sizeof(struct nlattr))
625 return 0;
626
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100627 if (A > skb->len - sizeof(struct nlattr))
628 return 0;
629
630 nla = (struct nlattr *) &skb->data[A];
Mathias Krause05ab8f22014-04-13 18:23:33 +0200631 if (nla->nla_len > skb->len - A)
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100632 return 0;
633
634 nla = nla_find_nested(nla, X);
635 if (nla)
636 return (void *) nla - (void *) skb->data;
637
638 return 0;
639}
640
641static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
642{
643 return raw_smp_processor_id();
644}
645
Chema Gonzalez4cd36752014-04-21 09:21:24 -0700646/* note that this only generates 32-bit random numbers */
647static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
648{
649 return (u64)prandom_u32();
650}
651
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100652/* Register mappings for user programs. */
653#define A_REG 0
654#define X_REG 7
655#define TMP_REG 8
656#define ARG2_REG 2
657#define ARG3_REG 3
658
659static bool convert_bpf_extensions(struct sock_filter *fp,
660 struct sock_filter_int **insnp)
661{
662 struct sock_filter_int *insn = *insnp;
663
664 switch (fp->k) {
665 case SKF_AD_OFF + SKF_AD_PROTOCOL:
666 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
667
668 insn->code = BPF_LDX | BPF_MEM | BPF_H;
669 insn->a_reg = A_REG;
670 insn->x_reg = CTX_REG;
671 insn->off = offsetof(struct sk_buff, protocol);
672 insn++;
673
674 /* A = ntohs(A) [emitting a nop or swap16] */
675 insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
676 insn->a_reg = A_REG;
677 insn->imm = 16;
678 break;
679
680 case SKF_AD_OFF + SKF_AD_PKTTYPE:
681 insn->code = BPF_LDX | BPF_MEM | BPF_B;
682 insn->a_reg = A_REG;
683 insn->x_reg = CTX_REG;
684 insn->off = pkt_type_offset();
685 if (insn->off < 0)
686 return false;
687 insn++;
688
689 insn->code = BPF_ALU | BPF_AND | BPF_K;
690 insn->a_reg = A_REG;
691 insn->imm = PKT_TYPE_MAX;
692 break;
693
694 case SKF_AD_OFF + SKF_AD_IFINDEX:
695 case SKF_AD_OFF + SKF_AD_HATYPE:
696 if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
697 insn->code = BPF_LDX | BPF_MEM | BPF_DW;
698 else
699 insn->code = BPF_LDX | BPF_MEM | BPF_W;
700 insn->a_reg = TMP_REG;
701 insn->x_reg = CTX_REG;
702 insn->off = offsetof(struct sk_buff, dev);
703 insn++;
704
705 insn->code = BPF_JMP | BPF_JNE | BPF_K;
706 insn->a_reg = TMP_REG;
707 insn->imm = 0;
708 insn->off = 1;
709 insn++;
710
711 insn->code = BPF_JMP | BPF_EXIT;
712 insn++;
713
714 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
715 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
716
717 insn->a_reg = A_REG;
718 insn->x_reg = TMP_REG;
719
720 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
721 insn->code = BPF_LDX | BPF_MEM | BPF_W;
722 insn->off = offsetof(struct net_device, ifindex);
723 } else {
724 insn->code = BPF_LDX | BPF_MEM | BPF_H;
725 insn->off = offsetof(struct net_device, type);
726 }
727 break;
728
729 case SKF_AD_OFF + SKF_AD_MARK:
730 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
731
732 insn->code = BPF_LDX | BPF_MEM | BPF_W;
733 insn->a_reg = A_REG;
734 insn->x_reg = CTX_REG;
735 insn->off = offsetof(struct sk_buff, mark);
736 break;
737
738 case SKF_AD_OFF + SKF_AD_RXHASH:
739 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
740
741 insn->code = BPF_LDX | BPF_MEM | BPF_W;
742 insn->a_reg = A_REG;
743 insn->x_reg = CTX_REG;
744 insn->off = offsetof(struct sk_buff, hash);
745 break;
746
747 case SKF_AD_OFF + SKF_AD_QUEUE:
748 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
749
750 insn->code = BPF_LDX | BPF_MEM | BPF_H;
751 insn->a_reg = A_REG;
752 insn->x_reg = CTX_REG;
753 insn->off = offsetof(struct sk_buff, queue_mapping);
754 break;
755
756 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
757 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
758 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
759
760 insn->code = BPF_LDX | BPF_MEM | BPF_H;
761 insn->a_reg = A_REG;
762 insn->x_reg = CTX_REG;
763 insn->off = offsetof(struct sk_buff, vlan_tci);
764 insn++;
765
766 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
767
768 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
769 insn->code = BPF_ALU | BPF_AND | BPF_K;
770 insn->a_reg = A_REG;
771 insn->imm = ~VLAN_TAG_PRESENT;
772 } else {
773 insn->code = BPF_ALU | BPF_RSH | BPF_K;
774 insn->a_reg = A_REG;
775 insn->imm = 12;
776 insn++;
777
778 insn->code = BPF_ALU | BPF_AND | BPF_K;
779 insn->a_reg = A_REG;
780 insn->imm = 1;
781 }
782 break;
783
784 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
785 case SKF_AD_OFF + SKF_AD_NLATTR:
786 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
787 case SKF_AD_OFF + SKF_AD_CPU:
Chema Gonzalez4cd36752014-04-21 09:21:24 -0700788 case SKF_AD_OFF + SKF_AD_RANDOM:
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100789 /* arg1 = ctx */
790 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
791 insn->a_reg = ARG1_REG;
792 insn->x_reg = CTX_REG;
793 insn++;
794
795 /* arg2 = A */
796 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
797 insn->a_reg = ARG2_REG;
798 insn->x_reg = A_REG;
799 insn++;
800
801 /* arg3 = X */
802 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
803 insn->a_reg = ARG3_REG;
804 insn->x_reg = X_REG;
805 insn++;
806
807 /* Emit call(ctx, arg2=A, arg3=X) */
808 insn->code = BPF_JMP | BPF_CALL;
809 switch (fp->k) {
810 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
811 insn->imm = __skb_get_pay_offset - __bpf_call_base;
812 break;
813 case SKF_AD_OFF + SKF_AD_NLATTR:
814 insn->imm = __skb_get_nlattr - __bpf_call_base;
815 break;
816 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
817 insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
818 break;
819 case SKF_AD_OFF + SKF_AD_CPU:
820 insn->imm = __get_raw_cpu_id - __bpf_call_base;
821 break;
Chema Gonzalez4cd36752014-04-21 09:21:24 -0700822 case SKF_AD_OFF + SKF_AD_RANDOM:
823 insn->imm = __get_random_u32 - __bpf_call_base;
824 break;
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +0100825 }
826 break;
827
828 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
829 insn->code = BPF_ALU | BPF_XOR | BPF_X;
830 insn->a_reg = A_REG;
831 insn->x_reg = X_REG;
832 break;
833
834 default:
835 /* This is just a dummy call to avoid letting the compiler
836 * evict __bpf_call_base() as an optimization. Placed here
837 * where no-one bothers.
838 */
839 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
840 return false;
841 }
842
843 *insnp = insn;
844 return true;
845}
846
847/**
848 * sk_convert_filter - convert filter program
849 * @prog: the user passed filter program
850 * @len: the length of the user passed filter program
851 * @new_prog: buffer where converted program will be stored
852 * @new_len: pointer to store length of converted program
853 *
854 * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
855 * Conversion workflow:
856 *
857 * 1) First pass for calculating the new program length:
858 * sk_convert_filter(old_prog, old_len, NULL, &new_len)
859 *
860 * 2) 2nd pass to remap in two passes: 1st pass finds new
861 * jump offsets, 2nd pass remapping:
862 * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
863 * sk_convert_filter(old_prog, old_len, new_prog, &new_len);
864 *
865 * User BPF's register A is mapped to our BPF register 6, user BPF
866 * register X is mapped to BPF register 7; frame pointer is always
867 * register 10; Context 'void *ctx' is stored in register 1, that is,
868 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
869 * ctx == 'struct seccomp_data *'.
870 */
871int sk_convert_filter(struct sock_filter *prog, int len,
872 struct sock_filter_int *new_prog, int *new_len)
873{
874 int new_flen = 0, pass = 0, target, i;
875 struct sock_filter_int *new_insn;
876 struct sock_filter *fp;
877 int *addrs = NULL;
878 u8 bpf_src;
879
880 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
881 BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
882
883 if (len <= 0 || len >= BPF_MAXINSNS)
884 return -EINVAL;
885
886 if (new_prog) {
887 addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL);
888 if (!addrs)
889 return -ENOMEM;
890 }
891
892do_pass:
893 new_insn = new_prog;
894 fp = prog;
895
896 if (new_insn) {
897 new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
898 new_insn->a_reg = CTX_REG;
899 new_insn->x_reg = ARG1_REG;
900 }
901 new_insn++;
902
903 for (i = 0; i < len; fp++, i++) {
904 struct sock_filter_int tmp_insns[6] = { };
905 struct sock_filter_int *insn = tmp_insns;
906
907 if (addrs)
908 addrs[i] = new_insn - new_prog;
909
910 switch (fp->code) {
911 /* All arithmetic insns and skb loads map as-is. */
912 case BPF_ALU | BPF_ADD | BPF_X:
913 case BPF_ALU | BPF_ADD | BPF_K:
914 case BPF_ALU | BPF_SUB | BPF_X:
915 case BPF_ALU | BPF_SUB | BPF_K:
916 case BPF_ALU | BPF_AND | BPF_X:
917 case BPF_ALU | BPF_AND | BPF_K:
918 case BPF_ALU | BPF_OR | BPF_X:
919 case BPF_ALU | BPF_OR | BPF_K:
920 case BPF_ALU | BPF_LSH | BPF_X:
921 case BPF_ALU | BPF_LSH | BPF_K:
922 case BPF_ALU | BPF_RSH | BPF_X:
923 case BPF_ALU | BPF_RSH | BPF_K:
924 case BPF_ALU | BPF_XOR | BPF_X:
925 case BPF_ALU | BPF_XOR | BPF_K:
926 case BPF_ALU | BPF_MUL | BPF_X:
927 case BPF_ALU | BPF_MUL | BPF_K:
928 case BPF_ALU | BPF_DIV | BPF_X:
929 case BPF_ALU | BPF_DIV | BPF_K:
930 case BPF_ALU | BPF_MOD | BPF_X:
931 case BPF_ALU | BPF_MOD | BPF_K:
932 case BPF_ALU | BPF_NEG:
933 case BPF_LD | BPF_ABS | BPF_W:
934 case BPF_LD | BPF_ABS | BPF_H:
935 case BPF_LD | BPF_ABS | BPF_B:
936 case BPF_LD | BPF_IND | BPF_W:
937 case BPF_LD | BPF_IND | BPF_H:
938 case BPF_LD | BPF_IND | BPF_B:
939 /* Check for overloaded BPF extension and
940 * directly convert it if found, otherwise
941 * just move on with mapping.
942 */
943 if (BPF_CLASS(fp->code) == BPF_LD &&
944 BPF_MODE(fp->code) == BPF_ABS &&
945 convert_bpf_extensions(fp, &insn))
946 break;
947
948 insn->code = fp->code;
949 insn->a_reg = A_REG;
950 insn->x_reg = X_REG;
951 insn->imm = fp->k;
952 break;
953
954 /* Jump opcodes map as-is, but offsets need adjustment. */
955 case BPF_JMP | BPF_JA:
956 target = i + fp->k + 1;
957 insn->code = fp->code;
958#define EMIT_JMP \
959 do { \
960 if (target >= len || target < 0) \
961 goto err; \
962 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
963 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
964 insn->off -= insn - tmp_insns; \
965 } while (0)
966
967 EMIT_JMP;
968 break;
969
970 case BPF_JMP | BPF_JEQ | BPF_K:
971 case BPF_JMP | BPF_JEQ | BPF_X:
972 case BPF_JMP | BPF_JSET | BPF_K:
973 case BPF_JMP | BPF_JSET | BPF_X:
974 case BPF_JMP | BPF_JGT | BPF_K:
975 case BPF_JMP | BPF_JGT | BPF_X:
976 case BPF_JMP | BPF_JGE | BPF_K:
977 case BPF_JMP | BPF_JGE | BPF_X:
978 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
979 /* BPF immediates are signed, zero extend
980 * immediate into tmp register and use it
981 * in compare insn.
982 */
983 insn->code = BPF_ALU | BPF_MOV | BPF_K;
984 insn->a_reg = TMP_REG;
985 insn->imm = fp->k;
986 insn++;
987
988 insn->a_reg = A_REG;
989 insn->x_reg = TMP_REG;
990 bpf_src = BPF_X;
991 } else {
992 insn->a_reg = A_REG;
993 insn->x_reg = X_REG;
994 insn->imm = fp->k;
995 bpf_src = BPF_SRC(fp->code);
996 }
997
998 /* Common case where 'jump_false' is next insn. */
999 if (fp->jf == 0) {
1000 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
1001 target = i + fp->jt + 1;
1002 EMIT_JMP;
1003 break;
1004 }
1005
1006 /* Convert JEQ into JNE when 'jump_true' is next insn. */
1007 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
1008 insn->code = BPF_JMP | BPF_JNE | bpf_src;
1009 target = i + fp->jf + 1;
1010 EMIT_JMP;
1011 break;
1012 }
1013
1014 /* Other jumps are mapped into two insns: Jxx and JA. */
1015 target = i + fp->jt + 1;
1016 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
1017 EMIT_JMP;
1018 insn++;
1019
1020 insn->code = BPF_JMP | BPF_JA;
1021 target = i + fp->jf + 1;
1022 EMIT_JMP;
1023 break;
1024
1025 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
1026 case BPF_LDX | BPF_MSH | BPF_B:
1027 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1028 insn->a_reg = TMP_REG;
1029 insn->x_reg = A_REG;
1030 insn++;
1031
1032 insn->code = BPF_LD | BPF_ABS | BPF_B;
1033 insn->a_reg = A_REG;
1034 insn->imm = fp->k;
1035 insn++;
1036
1037 insn->code = BPF_ALU | BPF_AND | BPF_K;
1038 insn->a_reg = A_REG;
1039 insn->imm = 0xf;
1040 insn++;
1041
1042 insn->code = BPF_ALU | BPF_LSH | BPF_K;
1043 insn->a_reg = A_REG;
1044 insn->imm = 2;
1045 insn++;
1046
1047 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1048 insn->a_reg = X_REG;
1049 insn->x_reg = A_REG;
1050 insn++;
1051
1052 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1053 insn->a_reg = A_REG;
1054 insn->x_reg = TMP_REG;
1055 break;
1056
1057 /* RET_K, RET_A are remaped into 2 insns. */
1058 case BPF_RET | BPF_A:
1059 case BPF_RET | BPF_K:
1060 insn->code = BPF_ALU | BPF_MOV |
1061 (BPF_RVAL(fp->code) == BPF_K ?
1062 BPF_K : BPF_X);
1063 insn->a_reg = 0;
1064 insn->x_reg = A_REG;
1065 insn->imm = fp->k;
1066 insn++;
1067
1068 insn->code = BPF_JMP | BPF_EXIT;
1069 break;
1070
1071 /* Store to stack. */
1072 case BPF_ST:
1073 case BPF_STX:
1074 insn->code = BPF_STX | BPF_MEM | BPF_W;
1075 insn->a_reg = FP_REG;
1076 insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
1077 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1078 break;
1079
1080 /* Load from stack. */
1081 case BPF_LD | BPF_MEM:
1082 case BPF_LDX | BPF_MEM:
1083 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1084 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
1085 A_REG : X_REG;
1086 insn->x_reg = FP_REG;
1087 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1088 break;
1089
1090 /* A = K or X = K */
1091 case BPF_LD | BPF_IMM:
1092 case BPF_LDX | BPF_IMM:
1093 insn->code = BPF_ALU | BPF_MOV | BPF_K;
1094 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
1095 A_REG : X_REG;
1096 insn->imm = fp->k;
1097 break;
1098
1099 /* X = A */
1100 case BPF_MISC | BPF_TAX:
1101 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1102 insn->a_reg = X_REG;
1103 insn->x_reg = A_REG;
1104 break;
1105
1106 /* A = X */
1107 case BPF_MISC | BPF_TXA:
1108 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1109 insn->a_reg = A_REG;
1110 insn->x_reg = X_REG;
1111 break;
1112
1113 /* A = skb->len or X = skb->len */
1114 case BPF_LD | BPF_W | BPF_LEN:
1115 case BPF_LDX | BPF_W | BPF_LEN:
1116 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1117 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
1118 A_REG : X_REG;
1119 insn->x_reg = CTX_REG;
1120 insn->off = offsetof(struct sk_buff, len);
1121 break;
1122
1123 /* access seccomp_data fields */
1124 case BPF_LDX | BPF_ABS | BPF_W:
1125 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1126 insn->a_reg = A_REG;
1127 insn->x_reg = CTX_REG;
1128 insn->off = fp->k;
1129 break;
1130
1131 default:
1132 goto err;
1133 }
1134
1135 insn++;
1136 if (new_prog)
1137 memcpy(new_insn, tmp_insns,
1138 sizeof(*insn) * (insn - tmp_insns));
1139
1140 new_insn += insn - tmp_insns;
1141 }
1142
1143 if (!new_prog) {
1144 /* Only calculating new length. */
1145 *new_len = new_insn - new_prog;
1146 return 0;
1147 }
1148
1149 pass++;
1150 if (new_flen != new_insn - new_prog) {
1151 new_flen = new_insn - new_prog;
1152 if (pass > 2)
1153 goto err;
1154
1155 goto do_pass;
1156 }
1157
1158 kfree(addrs);
1159 BUG_ON(*new_len != new_flen);
1160 return 0;
1161err:
1162 kfree(addrs);
1163 return -EINVAL;
1164}
1165
1166/* Security:
1167 *
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001168 * A BPF program is able to use 16 cells of memory to store intermediate
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001169 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
1170 *
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001171 * As we dont want to clear mem[] array for each packet going through
1172 * sk_run_filter(), we check that filter loaded by user never try to read
1173 * a cell if not previously written, and we check all branches to be sure
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001174 * a malicious user doesn't try to abuse us.
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001175 */
1176static int check_load_and_stores(struct sock_filter *filter, int flen)
1177{
1178 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
1179 int pc, ret = 0;
1180
1181 BUILD_BUG_ON(BPF_MEMWORDS > 16);
1182 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
1183 if (!masks)
1184 return -ENOMEM;
1185 memset(masks, 0xff, flen * sizeof(*masks));
1186
1187 for (pc = 0; pc < flen; pc++) {
1188 memvalid &= masks[pc];
1189
1190 switch (filter[pc].code) {
1191 case BPF_S_ST:
1192 case BPF_S_STX:
1193 memvalid |= (1 << filter[pc].k);
1194 break;
1195 case BPF_S_LD_MEM:
1196 case BPF_S_LDX_MEM:
1197 if (!(memvalid & (1 << filter[pc].k))) {
1198 ret = -EINVAL;
1199 goto error;
1200 }
1201 break;
1202 case BPF_S_JMP_JA:
1203 /* a jump must set masks on target */
1204 masks[pc + 1 + filter[pc].k] &= memvalid;
1205 memvalid = ~0;
1206 break;
1207 case BPF_S_JMP_JEQ_K:
1208 case BPF_S_JMP_JEQ_X:
1209 case BPF_S_JMP_JGE_K:
1210 case BPF_S_JMP_JGE_X:
1211 case BPF_S_JMP_JGT_K:
1212 case BPF_S_JMP_JGT_X:
1213 case BPF_S_JMP_JSET_X:
1214 case BPF_S_JMP_JSET_K:
1215 /* a jump must set masks on targets */
1216 masks[pc + 1 + filter[pc].jt] &= memvalid;
1217 masks[pc + 1 + filter[pc].jf] &= memvalid;
1218 memvalid = ~0;
1219 break;
1220 }
1221 }
1222error:
1223 kfree(masks);
1224 return ret;
1225}
1226
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227/**
1228 * sk_chk_filter - verify socket filter code
1229 * @filter: filter to verify
1230 * @flen: length of filter
1231 *
1232 * Check the user's filter code. If we let some ugly
1233 * filter code slip through kaboom! The filter must contain
Kris Katterjohn93699862006-01-04 13:58:36 -08001234 * no references or jumps that are out of range, no illegal
1235 * instructions, and must end with a RET instruction.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 *
Kris Katterjohn7b11f692006-01-13 14:33:06 -08001237 * All jumps are forward as they are not signed.
1238 *
1239 * Returns 0 if the rule set is legal or -EINVAL if not.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 */
Dan Carpenter4f25af22011-10-17 21:04:20 +00001241int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242{
Tetsuo Handacba328f2010-11-16 15:19:51 +00001243 /*
1244 * Valid instructions are initialized to non-0.
1245 * Invalid instructions are initialized to 0.
1246 */
1247 static const u8 codes[] = {
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001248 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
1249 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
1250 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
1251 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
1252 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
1253 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
1254 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
Eric Dumazetb6069a92012-09-07 22:03:35 +00001255 [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
1256 [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001257 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
1258 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
1259 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
1260 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
Daniel Borkmann9e49e882012-09-24 02:23:59 +00001261 [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
1262 [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001263 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
1264 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
1265 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
1266 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
1267 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
1268 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
1269 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
1270 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
1271 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
1272 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
1273 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
1274 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
1275 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
1276 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
1277 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
1278 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
1279 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
1280 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
1281 [BPF_RET|BPF_K] = BPF_S_RET_K,
1282 [BPF_RET|BPF_A] = BPF_S_RET_A,
1283 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
1284 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
1285 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
1286 [BPF_ST] = BPF_S_ST,
1287 [BPF_STX] = BPF_S_STX,
1288 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
1289 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
1290 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
1291 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
1292 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
1293 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
1294 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
1295 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
1296 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
Tetsuo Handacba328f2010-11-16 15:19:51 +00001297 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 int pc;
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001299 bool anc_found;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300
David S. Miller1b93ae642005-12-27 13:57:59 -08001301 if (flen == 0 || flen > BPF_MAXINSNS)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 return -EINVAL;
1303
1304 /* check the filter code now */
1305 for (pc = 0; pc < flen; pc++) {
Tetsuo Handacba328f2010-11-16 15:19:51 +00001306 struct sock_filter *ftest = &filter[pc];
1307 u16 code = ftest->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308
Tetsuo Handacba328f2010-11-16 15:19:51 +00001309 if (code >= ARRAY_SIZE(codes))
1310 return -EINVAL;
1311 code = codes[code];
Eric Dumazet8c1592d2010-11-18 21:56:38 +00001312 if (!code)
Tetsuo Handacba328f2010-11-16 15:19:51 +00001313 return -EINVAL;
Kris Katterjohn93699862006-01-04 13:58:36 -08001314 /* Some instructions need special checks */
Tetsuo Handacba328f2010-11-16 15:19:51 +00001315 switch (code) {
1316 case BPF_S_ALU_DIV_K:
Eric Dumazetb6069a92012-09-07 22:03:35 +00001317 case BPF_S_ALU_MOD_K:
1318 /* check for division by zero */
1319 if (ftest->k == 0)
1320 return -EINVAL;
1321 break;
Tetsuo Handacba328f2010-11-16 15:19:51 +00001322 case BPF_S_LD_MEM:
1323 case BPF_S_LDX_MEM:
1324 case BPF_S_ST:
1325 case BPF_S_STX:
1326 /* check for invalid memory addresses */
Kris Katterjohn93699862006-01-04 13:58:36 -08001327 if (ftest->k >= BPF_MEMWORDS)
1328 return -EINVAL;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001329 break;
Tetsuo Handacba328f2010-11-16 15:19:51 +00001330 case BPF_S_JMP_JA:
Kris Katterjohn93699862006-01-04 13:58:36 -08001331 /*
1332 * Note, the large ftest->k might cause loops.
1333 * Compare this with conditional jumps below,
1334 * where offsets are limited. --ANK (981016)
1335 */
Eric Dumazet95c96172012-04-15 05:58:06 +00001336 if (ftest->k >= (unsigned int)(flen-pc-1))
Kris Katterjohn93699862006-01-04 13:58:36 -08001337 return -EINVAL;
1338 break;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001339 case BPF_S_JMP_JEQ_K:
1340 case BPF_S_JMP_JEQ_X:
1341 case BPF_S_JMP_JGE_K:
1342 case BPF_S_JMP_JGE_X:
1343 case BPF_S_JMP_JGT_K:
1344 case BPF_S_JMP_JGT_X:
1345 case BPF_S_JMP_JSET_X:
1346 case BPF_S_JMP_JSET_K:
Tetsuo Handacba328f2010-11-16 15:19:51 +00001347 /* for conditionals both must be safe */
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001348 if (pc + ftest->jt + 1 >= flen ||
1349 pc + ftest->jf + 1 >= flen)
1350 return -EINVAL;
Tetsuo Handacba328f2010-11-16 15:19:51 +00001351 break;
Eric Dumazet12b16da2010-12-15 19:45:28 +00001352 case BPF_S_LD_W_ABS:
1353 case BPF_S_LD_H_ABS:
1354 case BPF_S_LD_B_ABS:
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001355 anc_found = false;
Eric Dumazet12b16da2010-12-15 19:45:28 +00001356#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
1357 code = BPF_S_ANC_##CODE; \
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001358 anc_found = true; \
Eric Dumazet12b16da2010-12-15 19:45:28 +00001359 break
1360 switch (ftest->k) {
1361 ANCILLARY(PROTOCOL);
1362 ANCILLARY(PKTTYPE);
1363 ANCILLARY(IFINDEX);
1364 ANCILLARY(NLATTR);
1365 ANCILLARY(NLATTR_NEST);
1366 ANCILLARY(MARK);
1367 ANCILLARY(QUEUE);
1368 ANCILLARY(HATYPE);
1369 ANCILLARY(RXHASH);
1370 ANCILLARY(CPU);
Jiri Pirkoffe06c12012-03-31 11:01:20 +00001371 ANCILLARY(ALU_XOR_X);
Eric Dumazetf3335032012-10-27 02:26:17 +00001372 ANCILLARY(VLAN_TAG);
1373 ANCILLARY(VLAN_TAG_PRESENT);
Daniel Borkmann3e5289d2013-03-19 06:39:31 +00001374 ANCILLARY(PAY_OFFSET);
Chema Gonzalez4cd36752014-04-21 09:21:24 -07001375 ANCILLARY(RANDOM);
Eric Dumazet12b16da2010-12-15 19:45:28 +00001376 }
Daniel Borkmannaa1113d2012-12-28 10:50:17 +00001377
1378 /* ancillary operation unknown or unsupported */
1379 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1380 return -EINVAL;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001381 }
Tetsuo Handacba328f2010-11-16 15:19:51 +00001382 ftest->code = code;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 }
1384
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +00001385 /* last instruction must be a RET code */
1386 switch (filter[flen - 1].code) {
1387 case BPF_S_RET_K:
1388 case BPF_S_RET_A:
Eric Dumazet2d5311e2010-12-01 20:46:24 +00001389 return check_load_and_stores(filter, flen);
Tetsuo Handacba328f2010-11-16 15:19:51 +00001390 }
1391 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392}
Stephen Hemmingerb7156312008-04-10 01:33:47 -07001393EXPORT_SYMBOL(sk_chk_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001395static int sk_store_orig_filter(struct sk_filter *fp,
1396 const struct sock_fprog *fprog)
1397{
1398 unsigned int fsize = sk_filter_proglen(fprog);
1399 struct sock_fprog_kern *fkprog;
1400
1401 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1402 if (!fp->orig_prog)
1403 return -ENOMEM;
1404
1405 fkprog = fp->orig_prog;
1406 fkprog->len = fprog->len;
1407 fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
1408 if (!fkprog->filter) {
1409 kfree(fp->orig_prog);
1410 return -ENOMEM;
1411 }
1412
1413 return 0;
1414}
1415
1416static void sk_release_orig_filter(struct sk_filter *fp)
1417{
1418 struct sock_fprog_kern *fprog = fp->orig_prog;
1419
1420 if (fprog) {
1421 kfree(fprog->filter);
1422 kfree(fprog);
1423 }
1424}
1425
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426/**
Eric Dumazet46bcf142010-12-06 09:29:43 -08001427 * sk_filter_release_rcu - Release a socket filter by rcu_head
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001428 * @rcu: rcu_head that contains the sk_filter to free
1429 */
Daniel Borkmannfbc907f2014-03-28 18:58:20 +01001430static void sk_filter_release_rcu(struct rcu_head *rcu)
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001431{
1432 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1433
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001434 sk_release_orig_filter(fp);
Eric Dumazet0a148422011-04-20 09:27:32 +00001435 bpf_jit_free(fp);
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001436}
Daniel Borkmannfbc907f2014-03-28 18:58:20 +01001437
1438/**
1439 * sk_filter_release - release a socket filter
1440 * @fp: filter to remove
1441 *
1442 * Remove a filter from a socket and release its resources.
1443 */
1444static void sk_filter_release(struct sk_filter *fp)
1445{
1446 if (atomic_dec_and_test(&fp->refcnt))
1447 call_rcu(&fp->rcu, sk_filter_release_rcu);
1448}
1449
1450void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1451{
1452 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1453 sk_filter_release(fp);
1454}
1455
1456void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1457{
1458 atomic_inc(&fp->refcnt);
1459 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1460}
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001461
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001462static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
1463 struct sock *sk,
1464 unsigned int len)
1465{
1466 struct sk_filter *fp_new;
1467
1468 if (sk == NULL)
1469 return krealloc(fp, len, GFP_KERNEL);
1470
1471 fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
1472 if (fp_new) {
1473 memcpy(fp_new, fp, sizeof(struct sk_filter));
1474 /* As we're kepping orig_prog in fp_new along,
1475 * we need to make sure we're not evicting it
1476 * from the old fp.
1477 */
1478 fp->orig_prog = NULL;
1479 sk_filter_uncharge(sk, fp);
1480 }
1481
1482 return fp_new;
1483}
1484
1485static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1486 struct sock *sk)
1487{
1488 struct sock_filter *old_prog;
1489 struct sk_filter *old_fp;
1490 int i, err, new_len, old_len = fp->len;
1491
1492 /* We are free to overwrite insns et al right here as it
1493 * won't be used at this point in time anymore internally
1494 * after the migration to the internal BPF instruction
1495 * representation.
1496 */
1497 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1498 sizeof(struct sock_filter_int));
1499
1500 /* For now, we need to unfiddle BPF_S_* identifiers in place.
1501 * This can sooner or later on be subject to removal, e.g. when
1502 * JITs have been converted.
1503 */
1504 for (i = 0; i < fp->len; i++)
1505 sk_decode_filter(&fp->insns[i], &fp->insns[i]);
1506
1507 /* Conversion cannot happen on overlapping memory areas,
1508 * so we need to keep the user BPF around until the 2nd
1509 * pass. At this time, the user BPF is stored in fp->insns.
1510 */
1511 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1512 GFP_KERNEL);
1513 if (!old_prog) {
1514 err = -ENOMEM;
1515 goto out_err;
1516 }
1517
1518 /* 1st pass: calculate the new program length. */
1519 err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
1520 if (err)
1521 goto out_err_free;
1522
1523 /* Expand fp for appending the new filter representation. */
1524 old_fp = fp;
1525 fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
1526 if (!fp) {
1527 /* The old_fp is still around in case we couldn't
1528 * allocate new memory, so uncharge on that one.
1529 */
1530 fp = old_fp;
1531 err = -ENOMEM;
1532 goto out_err_free;
1533 }
1534
1535 fp->bpf_func = sk_run_filter_int_skb;
1536 fp->len = new_len;
1537
1538 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
1539 err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
1540 if (err)
1541 /* 2nd sk_convert_filter() can fail only if it fails
1542 * to allocate memory, remapping must succeed. Note,
1543 * that at this time old_fp has already been released
1544 * by __sk_migrate_realloc().
1545 */
1546 goto out_err_free;
1547
1548 kfree(old_prog);
1549 return fp;
1550
1551out_err_free:
1552 kfree(old_prog);
1553out_err:
1554 /* Rollback filter setup. */
1555 if (sk != NULL)
1556 sk_filter_uncharge(sk, fp);
1557 else
1558 kfree(fp);
1559 return ERR_PTR(err);
1560}
1561
1562static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1563 struct sock *sk)
Jiri Pirko302d6632012-03-31 11:01:19 +00001564{
1565 int err;
1566
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001567 fp->bpf_func = NULL;
Daniel Borkmannf8bbbfc2014-03-28 18:58:18 +01001568 fp->jited = 0;
Jiri Pirko302d6632012-03-31 11:01:19 +00001569
1570 err = sk_chk_filter(fp->insns, fp->len);
1571 if (err)
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001572 return ERR_PTR(err);
Jiri Pirko302d6632012-03-31 11:01:19 +00001573
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001574 /* Probe if we can JIT compile the filter and if so, do
1575 * the compilation of the filter.
1576 */
Jiri Pirko302d6632012-03-31 11:01:19 +00001577 bpf_jit_compile(fp);
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001578
1579 /* JIT compiler couldn't process this filter, so do the
1580 * internal BPF translation for the optimized interpreter.
1581 */
1582 if (!fp->jited)
1583 fp = __sk_migrate_filter(fp, sk);
1584
1585 return fp;
Jiri Pirko302d6632012-03-31 11:01:19 +00001586}
1587
1588/**
1589 * sk_unattached_filter_create - create an unattached filter
1590 * @fprog: the filter program
Randy Dunlapc6c4b972012-06-08 14:01:44 +00001591 * @pfp: the unattached filter that is created
Jiri Pirko302d6632012-03-31 11:01:19 +00001592 *
Randy Dunlapc6c4b972012-06-08 14:01:44 +00001593 * Create a filter independent of any socket. We first run some
Jiri Pirko302d6632012-03-31 11:01:19 +00001594 * sanity checks on it to make sure it does not explode on us later.
1595 * If an error occurs or there is insufficient memory for the filter
1596 * a negative errno code is returned. On success the return is zero.
1597 */
1598int sk_unattached_filter_create(struct sk_filter **pfp,
1599 struct sock_fprog *fprog)
1600{
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001601 unsigned int fsize = sk_filter_proglen(fprog);
Jiri Pirko302d6632012-03-31 11:01:19 +00001602 struct sk_filter *fp;
Jiri Pirko302d6632012-03-31 11:01:19 +00001603
1604 /* Make sure new filter is there and in the right amounts. */
1605 if (fprog->filter == NULL)
1606 return -EINVAL;
1607
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001608 fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
Jiri Pirko302d6632012-03-31 11:01:19 +00001609 if (!fp)
1610 return -ENOMEM;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001611
Jiri Pirko302d6632012-03-31 11:01:19 +00001612 memcpy(fp->insns, fprog->filter, fsize);
1613
1614 atomic_set(&fp->refcnt, 1);
1615 fp->len = fprog->len;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001616 /* Since unattached filters are not copied back to user
1617 * space through sk_get_filter(), we do not need to hold
1618 * a copy here, and can spare us the work.
1619 */
1620 fp->orig_prog = NULL;
Jiri Pirko302d6632012-03-31 11:01:19 +00001621
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001622 /* __sk_prepare_filter() already takes care of uncharging
1623 * memory in case something goes wrong.
1624 */
1625 fp = __sk_prepare_filter(fp, NULL);
1626 if (IS_ERR(fp))
1627 return PTR_ERR(fp);
Jiri Pirko302d6632012-03-31 11:01:19 +00001628
1629 *pfp = fp;
1630 return 0;
Jiri Pirko302d6632012-03-31 11:01:19 +00001631}
1632EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
1633
1634void sk_unattached_filter_destroy(struct sk_filter *fp)
1635{
1636 sk_filter_release(fp);
1637}
1638EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
1639
Pavel Emelyanov47e958e2007-10-17 21:22:42 -07001640/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 * sk_attach_filter - attach a socket filter
1642 * @fprog: the filter program
1643 * @sk: the socket to use
1644 *
1645 * Attach the user's filter code. We first run some sanity checks on
1646 * it to make sure it does not explode on us later. If an error
1647 * occurs or there is insufficient memory for the filter a negative
1648 * errno code is returned. On success the return is zero.
1649 */
1650int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1651{
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001652 struct sk_filter *fp, *old_fp;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001653 unsigned int fsize = sk_filter_proglen(fprog);
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001654 unsigned int sk_fsize = sk_filter_size(fprog->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 int err;
1656
Vincent Bernatd59577b2013-01-16 22:55:49 +01001657 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1658 return -EPERM;
1659
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 /* Make sure new filter is there and in the right amounts. */
Kris Katterjohne35bedf2006-01-17 02:25:52 -08001661 if (fprog->filter == NULL)
1662 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001664 fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 if (!fp)
1666 return -ENOMEM;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001667
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -07001669 sock_kfree_s(sk, fp, sk_fsize);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 return -EFAULT;
1671 }
1672
1673 atomic_set(&fp->refcnt, 1);
1674 fp->len = fprog->len;
1675
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001676 err = sk_store_orig_filter(fp, fprog);
1677 if (err) {
1678 sk_filter_uncharge(sk, fp);
1679 return -ENOMEM;
1680 }
1681
Alexei Starovoitovbd4cf0e2014-03-28 18:58:25 +01001682 /* __sk_prepare_filter() already takes care of uncharging
1683 * memory in case something goes wrong.
1684 */
1685 fp = __sk_prepare_filter(fp, sk);
1686 if (IS_ERR(fp))
1687 return PTR_ERR(fp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688
Eric Dumazetf91ff5b2010-09-27 06:07:30 +00001689 old_fp = rcu_dereference_protected(sk->sk_filter,
1690 sock_owned_by_user(sk));
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001691 rcu_assign_pointer(sk->sk_filter, fp);
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001692
Olof Johansson9b013e02007-10-18 21:48:39 -07001693 if (old_fp)
Eric Dumazet46bcf142010-12-06 09:29:43 -08001694 sk_filter_uncharge(sk, old_fp);
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001695
Pavel Emelyanovd3904b72007-10-17 21:22:17 -07001696 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697}
Michael S. Tsirkin5ff3f072010-02-14 01:01:00 +00001698EXPORT_SYMBOL_GPL(sk_attach_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001700int sk_detach_filter(struct sock *sk)
1701{
1702 int ret = -ENOENT;
1703 struct sk_filter *filter;
1704
Vincent Bernatd59577b2013-01-16 22:55:49 +01001705 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1706 return -EPERM;
1707
Eric Dumazetf91ff5b2010-09-27 06:07:30 +00001708 filter = rcu_dereference_protected(sk->sk_filter,
1709 sock_owned_by_user(sk));
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001710 if (filter) {
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001711 RCU_INIT_POINTER(sk->sk_filter, NULL);
Eric Dumazet46bcf142010-12-06 09:29:43 -08001712 sk_filter_uncharge(sk, filter);
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001713 ret = 0;
1714 }
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001715
Pavel Emelyanov55b33322007-10-17 21:21:26 -07001716 return ret;
1717}
Michael S. Tsirkin5ff3f072010-02-14 01:01:00 +00001718EXPORT_SYMBOL_GPL(sk_detach_filter);
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001719
Nicolas Dichteled139982013-06-05 15:30:55 +02001720void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001721{
1722 static const u16 decodes[] = {
1723 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
1724 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
1725 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
1726 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
1727 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
1728 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
1729 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
1730 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
1731 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
1732 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
1733 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
1734 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
1735 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
1736 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
1737 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
1738 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
1739 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
1740 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
1741 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
1742 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
1743 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
1744 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
1745 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
1746 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
1747 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
1748 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
1749 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
1750 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
1751 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
1752 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
1753 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
1754 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
1755 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
1756 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001757 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
1758 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
Daniel Borkmann3e5289d2013-03-19 06:39:31 +00001759 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
Chema Gonzalez4cd36752014-04-21 09:21:24 -07001760 [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS,
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001761 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
1762 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
1763 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
1764 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
1765 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
1766 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
1767 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
1768 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
1769 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
1770 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
1771 [BPF_S_RET_K] = BPF_RET|BPF_K,
1772 [BPF_S_RET_A] = BPF_RET|BPF_A,
1773 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
1774 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
1775 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
1776 [BPF_S_ST] = BPF_ST,
1777 [BPF_S_STX] = BPF_STX,
1778 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
1779 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
1780 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
1781 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
1782 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
1783 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
1784 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
1785 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
1786 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
1787 };
1788 u16 code;
1789
1790 code = filt->code;
1791
1792 to->code = decodes[code];
1793 to->jt = filt->jt;
1794 to->jf = filt->jf;
Eric Dumazetaee636c2014-01-15 06:50:07 -08001795 to->k = filt->k;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001796}
1797
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001798int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1799 unsigned int len)
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001800{
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001801 struct sock_fprog_kern *fprog;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001802 struct sk_filter *filter;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001803 int ret = 0;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001804
1805 lock_sock(sk);
1806 filter = rcu_dereference_protected(sk->sk_filter,
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001807 sock_owned_by_user(sk));
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001808 if (!filter)
1809 goto out;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001810
1811 /* We're copying the filter that has been originally attached,
1812 * so no conversion/decode needed anymore.
1813 */
1814 fprog = filter->orig_prog;
1815
1816 ret = fprog->len;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001817 if (!len)
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001818 /* User space only enquires number of filter blocks. */
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001819 goto out;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001820
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001821 ret = -EINVAL;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001822 if (len < fprog->len)
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001823 goto out;
1824
1825 ret = -EFAULT;
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001826 if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog)))
1827 goto out;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001828
Daniel Borkmanna3ea2692014-03-28 18:58:19 +01001829 /* Instead of bytes, the API requests to return the number
1830 * of filter blocks.
1831 */
1832 ret = fprog->len;
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001833out:
1834 release_sock(sk);
1835 return ret;
1836}