blob: d4ce2dc712e34b7b1cb974c5e938313f58e9a8aa [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
4 * Author:
5 * Jay Schulist <jschlst@samba.org>
6 *
7 * Based on the design of:
8 * - The Berkeley Packet Filter
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Andi Kleen - Fix a few bad bugs and races.
Kris Katterjohn93699862006-01-04 13:58:36 -080016 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
18
19#include <linux/module.h>
20#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/mm.h>
22#include <linux/fcntl.h>
23#include <linux/socket.h>
24#include <linux/in.h>
25#include <linux/inet.h>
26#include <linux/netdevice.h>
27#include <linux/if_packet.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090028#include <linux/gfp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <net/ip.h>
30#include <net/protocol.h>
Patrick McHardy4738c1d2008-04-10 02:02:28 -070031#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/skbuff.h>
33#include <net/sock.h>
34#include <linux/errno.h>
35#include <linux/timer.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <asm/uaccess.h>
Dmitry Mishin40daafc2006-04-18 14:50:10 -070037#include <asm/unaligned.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/filter.h>
Eric Dumazetc26aed42010-11-18 22:04:46 +000039#include <linux/reciprocal_div.h>
David S. Miller86e4ca62011-05-26 15:00:31 -040040#include <linux/ratelimit.h>
Will Drewry46b325c2012-04-12 16:47:52 -050041#include <linux/seccomp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042
Jan Seiffertf03fb3f2012-03-30 05:08:19 +000043/* No hurry in this branch
44 *
45 * Exported for the bpf jit load helper.
46 */
47void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
Linus Torvalds1da177e2005-04-16 15:20:36 -070048{
49 u8 *ptr = NULL;
50
51 if (k >= SKF_NET_OFF)
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -070052 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 else if (k >= SKF_LL_OFF)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -070054 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -070055
Eric Dumazet4bc65dd2010-12-07 22:26:15 +000056 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 return ptr;
58 return NULL;
59}
60
Eric Dumazet62ab0812010-12-06 20:50:09 +000061static inline void *load_pointer(const struct sk_buff *skb, int k,
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +090062 unsigned int size, void *buffer)
Patrick McHardy0b05b2a2005-07-05 14:10:21 -070063{
64 if (k >= 0)
65 return skb_header_pointer(skb, k, size, buffer);
Jan Seiffertf03fb3f2012-03-30 05:08:19 +000066 return bpf_internal_load_pointer_neg_helper(skb, k, size);
Patrick McHardy0b05b2a2005-07-05 14:10:21 -070067}
68
Linus Torvalds1da177e2005-04-16 15:20:36 -070069/**
Stephen Hemminger43db6d62008-04-10 01:43:09 -070070 * sk_filter - run a packet through a socket filter
71 * @sk: sock associated with &sk_buff
72 * @skb: buffer to filter
Stephen Hemminger43db6d62008-04-10 01:43:09 -070073 *
74 * Run the filter code and then cut skb->data to correct size returned by
75 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
76 * than pkt_len we keep whole skb->data. This is the socket level
77 * wrapper to sk_run_filter. It returns 0 if the packet should
78 * be accepted or -EPERM if the packet should be tossed.
79 *
80 */
81int sk_filter(struct sock *sk, struct sk_buff *skb)
82{
83 int err;
84 struct sk_filter *filter;
85
86 err = security_sock_rcv_skb(sk, skb);
87 if (err)
88 return err;
89
Eric Dumazet80f8f102011-01-18 07:46:52 +000090 rcu_read_lock();
91 filter = rcu_dereference(sk->sk_filter);
Stephen Hemminger43db6d62008-04-10 01:43:09 -070092 if (filter) {
Eric Dumazet0a148422011-04-20 09:27:32 +000093 unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
Eric Dumazet0d7da9d2010-10-25 03:47:05 +000094
Stephen Hemminger43db6d62008-04-10 01:43:09 -070095 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
96 }
Eric Dumazet80f8f102011-01-18 07:46:52 +000097 rcu_read_unlock();
Stephen Hemminger43db6d62008-04-10 01:43:09 -070098
99 return err;
100}
101EXPORT_SYMBOL(sk_filter);
102
103/**
Kris Katterjohn2966b662006-01-23 16:26:16 -0800104 * sk_run_filter - run a filter on a socket
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 * @skb: buffer to run the filter on
Randy Dunlap697d0e32011-01-08 17:41:42 +0000106 * @fentry: filter to apply
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 *
108 * Decode and apply filter instructions to the skb->data.
Eric Dumazet93aaae22010-11-19 09:49:59 -0800109 * Return length to keep, 0 for none. @skb is the data we are
110 * filtering, @filter is the array of filter instructions.
111 * Because all jumps are guaranteed to be before last instruction,
112 * and last instruction guaranteed to be a RET, we dont need to check
113 * flen. (We used to pass to this function the length of filter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 */
Eric Dumazet62ab0812010-12-06 20:50:09 +0000115unsigned int sk_run_filter(const struct sk_buff *skb,
116 const struct sock_filter *fentry)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117{
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700118 void *ptr;
Kris Katterjohn2966b662006-01-23 16:26:16 -0800119 u32 A = 0; /* Accumulator */
120 u32 X = 0; /* Index Register */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700122 u32 tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 int k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
125 /*
126 * Process array of filter instructions.
127 */
Eric Dumazet93aaae22010-11-19 09:49:59 -0800128 for (;; fentry++) {
129#if defined(CONFIG_X86_32)
130#define K (fentry->k)
131#else
132 const u32 K = fentry->k;
133#endif
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 switch (fentry->code) {
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000136 case BPF_S_ALU_ADD_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 A += X;
138 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000139 case BPF_S_ALU_ADD_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800140 A += K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000142 case BPF_S_ALU_SUB_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 A -= X;
144 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000145 case BPF_S_ALU_SUB_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800146 A -= K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000148 case BPF_S_ALU_MUL_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 A *= X;
150 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000151 case BPF_S_ALU_MUL_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800152 A *= K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000154 case BPF_S_ALU_DIV_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 if (X == 0)
156 return 0;
157 A /= X;
158 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000159 case BPF_S_ALU_DIV_K:
Eric Dumazetc26aed42010-11-18 22:04:46 +0000160 A = reciprocal_divide(A, K);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000162 case BPF_S_ALU_AND_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 A &= X;
164 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000165 case BPF_S_ALU_AND_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800166 A &= K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000168 case BPF_S_ALU_OR_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 A |= X;
170 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000171 case BPF_S_ALU_OR_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800172 A |= K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000174 case BPF_S_ALU_LSH_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 A <<= X;
176 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000177 case BPF_S_ALU_LSH_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800178 A <<= K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000180 case BPF_S_ALU_RSH_X:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 A >>= X;
182 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000183 case BPF_S_ALU_RSH_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800184 A >>= K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000186 case BPF_S_ALU_NEG:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 A = -A;
188 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000189 case BPF_S_JMP_JA:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800190 fentry += K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000192 case BPF_S_JMP_JGT_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800193 fentry += (A > K) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000195 case BPF_S_JMP_JGE_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800196 fentry += (A >= K) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000198 case BPF_S_JMP_JEQ_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800199 fentry += (A == K) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000201 case BPF_S_JMP_JSET_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800202 fentry += (A & K) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000204 case BPF_S_JMP_JGT_X:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800205 fentry += (A > X) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000207 case BPF_S_JMP_JGE_X:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800208 fentry += (A >= X) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000210 case BPF_S_JMP_JEQ_X:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800211 fentry += (A == X) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000213 case BPF_S_JMP_JSET_X:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800214 fentry += (A & X) ? fentry->jt : fentry->jf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000216 case BPF_S_LD_W_ABS:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800217 k = K;
Kris Katterjohne35bedf2006-01-17 02:25:52 -0800218load_w:
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700219 ptr = load_pointer(skb, k, 4, &tmp);
220 if (ptr != NULL) {
Harvey Harrisond3e2ce32008-05-02 16:26:16 -0700221 A = get_unaligned_be32(ptr);
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700222 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 }
Eric Dumazet12b16da2010-12-15 19:45:28 +0000224 return 0;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000225 case BPF_S_LD_H_ABS:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800226 k = K;
Kris Katterjohne35bedf2006-01-17 02:25:52 -0800227load_h:
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700228 ptr = load_pointer(skb, k, 2, &tmp);
229 if (ptr != NULL) {
Harvey Harrisond3e2ce32008-05-02 16:26:16 -0700230 A = get_unaligned_be16(ptr);
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700231 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 }
Eric Dumazet12b16da2010-12-15 19:45:28 +0000233 return 0;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000234 case BPF_S_LD_B_ABS:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800235 k = K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236load_b:
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700237 ptr = load_pointer(skb, k, 1, &tmp);
238 if (ptr != NULL) {
239 A = *(u8 *)ptr;
240 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 }
Eric Dumazet12b16da2010-12-15 19:45:28 +0000242 return 0;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000243 case BPF_S_LD_W_LEN:
Patrick McHardy3154e542005-07-05 14:10:40 -0700244 A = skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000246 case BPF_S_LDX_W_LEN:
Patrick McHardy3154e542005-07-05 14:10:40 -0700247 X = skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000249 case BPF_S_LD_W_IND:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800250 k = X + K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 goto load_w;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000252 case BPF_S_LD_H_IND:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800253 k = X + K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 goto load_h;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000255 case BPF_S_LD_B_IND:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800256 k = X + K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 goto load_b;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000258 case BPF_S_LDX_B_MSH:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800259 ptr = load_pointer(skb, K, 1, &tmp);
Patrick McHardy0b05b2a2005-07-05 14:10:21 -0700260 if (ptr != NULL) {
261 X = (*(u8 *)ptr & 0xf) << 2;
262 continue;
263 }
264 return 0;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000265 case BPF_S_LD_IMM:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800266 A = K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000268 case BPF_S_LDX_IMM:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800269 X = K;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000271 case BPF_S_LD_MEM:
Eric Dumazet2d5311e2010-12-01 20:46:24 +0000272 A = mem[K];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000274 case BPF_S_LDX_MEM:
Eric Dumazet2d5311e2010-12-01 20:46:24 +0000275 X = mem[K];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000277 case BPF_S_MISC_TAX:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 X = A;
279 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000280 case BPF_S_MISC_TXA:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 A = X;
282 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000283 case BPF_S_RET_K:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800284 return K;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000285 case BPF_S_RET_A:
Kris Katterjohn4bad4dc2006-01-06 13:08:20 -0800286 return A;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000287 case BPF_S_ST:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800288 mem[K] = A;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 continue;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000290 case BPF_S_STX:
Eric Dumazet93aaae22010-11-19 09:49:59 -0800291 mem[K] = X;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000293 case BPF_S_ANC_PROTOCOL:
Al Viro252e3342006-11-14 20:48:11 -0800294 A = ntohs(skb->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000296 case BPF_S_ANC_PKTTYPE:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 A = skb->pkt_type;
298 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000299 case BPF_S_ANC_IFINDEX:
Paul LeoNerd Evans40eaf962010-04-22 03:32:22 +0000300 if (!skb->dev)
301 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 A = skb->dev->ifindex;
303 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000304 case BPF_S_ANC_MARK:
jamal7e75f932009-10-19 02:17:56 +0000305 A = skb->mark;
306 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000307 case BPF_S_ANC_QUEUE:
Eric Dumazetd19742f2009-10-20 01:06:22 -0700308 A = skb->queue_mapping;
309 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000310 case BPF_S_ANC_HATYPE:
Paul LeoNerd Evans40eaf962010-04-22 03:32:22 +0000311 if (!skb->dev)
312 return 0;
313 A = skb->dev->type;
314 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000315 case BPF_S_ANC_RXHASH:
Eric Dumazetda2033c2010-11-30 21:45:56 +0000316 A = skb->rxhash;
317 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000318 case BPF_S_ANC_CPU:
Eric Dumazetda2033c2010-11-30 21:45:56 +0000319 A = raw_smp_processor_id();
320 continue;
Jiri Pirkoffe06c12012-03-31 11:01:20 +0000321 case BPF_S_ANC_ALU_XOR_X:
322 A ^= X;
323 continue;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000324 case BPF_S_ANC_NLATTR: {
Patrick McHardy4738c1d2008-04-10 02:02:28 -0700325 struct nlattr *nla;
326
327 if (skb_is_nonlinear(skb))
328 return 0;
329 if (A > skb->len - sizeof(struct nlattr))
330 return 0;
331
332 nla = nla_find((struct nlattr *)&skb->data[A],
333 skb->len - A, X);
334 if (nla)
335 A = (void *)nla - (void *)skb->data;
336 else
337 A = 0;
338 continue;
339 }
Eric Dumazet12b16da2010-12-15 19:45:28 +0000340 case BPF_S_ANC_NLATTR_NEST: {
Pablo Neira Ayusod214c752008-11-20 00:49:27 -0800341 struct nlattr *nla;
342
343 if (skb_is_nonlinear(skb))
344 return 0;
345 if (A > skb->len - sizeof(struct nlattr))
346 return 0;
347
348 nla = (struct nlattr *)&skb->data[A];
349 if (nla->nla_len > A - skb->len)
350 return 0;
351
352 nla = nla_find_nested(nla, X);
353 if (nla)
354 A = (void *)nla - (void *)skb->data;
355 else
356 A = 0;
357 continue;
358 }
Will Drewry46b325c2012-04-12 16:47:52 -0500359#ifdef CONFIG_SECCOMP_FILTER
360 case BPF_S_ANC_SECCOMP_LD_W:
361 A = seccomp_bpf_load(fentry->k);
362 continue;
363#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 default:
Joe Perches6c4a5cb2011-05-21 07:48:40 +0000365 WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
366 fentry->code, fentry->jt,
367 fentry->jf, fentry->k);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 return 0;
369 }
370 }
371
372 return 0;
373}
Stephen Hemmingerb7156312008-04-10 01:33:47 -0700374EXPORT_SYMBOL(sk_run_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
Eric Dumazet2d5311e2010-12-01 20:46:24 +0000376/*
377 * Security :
378 * A BPF program is able to use 16 cells of memory to store intermediate
379 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
380 * As we dont want to clear mem[] array for each packet going through
381 * sk_run_filter(), we check that filter loaded by user never try to read
382 * a cell if not previously written, and we check all branches to be sure
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300383 * a malicious user doesn't try to abuse us.
Eric Dumazet2d5311e2010-12-01 20:46:24 +0000384 */
385static int check_load_and_stores(struct sock_filter *filter, int flen)
386{
387 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
388 int pc, ret = 0;
389
390 BUILD_BUG_ON(BPF_MEMWORDS > 16);
391 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
392 if (!masks)
393 return -ENOMEM;
394 memset(masks, 0xff, flen * sizeof(*masks));
395
396 for (pc = 0; pc < flen; pc++) {
397 memvalid &= masks[pc];
398
399 switch (filter[pc].code) {
400 case BPF_S_ST:
401 case BPF_S_STX:
402 memvalid |= (1 << filter[pc].k);
403 break;
404 case BPF_S_LD_MEM:
405 case BPF_S_LDX_MEM:
406 if (!(memvalid & (1 << filter[pc].k))) {
407 ret = -EINVAL;
408 goto error;
409 }
410 break;
411 case BPF_S_JMP_JA:
412 /* a jump must set masks on target */
413 masks[pc + 1 + filter[pc].k] &= memvalid;
414 memvalid = ~0;
415 break;
416 case BPF_S_JMP_JEQ_K:
417 case BPF_S_JMP_JEQ_X:
418 case BPF_S_JMP_JGE_K:
419 case BPF_S_JMP_JGE_X:
420 case BPF_S_JMP_JGT_K:
421 case BPF_S_JMP_JGT_X:
422 case BPF_S_JMP_JSET_X:
423 case BPF_S_JMP_JSET_K:
424 /* a jump must set masks on targets */
425 masks[pc + 1 + filter[pc].jt] &= memvalid;
426 masks[pc + 1 + filter[pc].jf] &= memvalid;
427 memvalid = ~0;
428 break;
429 }
430 }
431error:
432 kfree(masks);
433 return ret;
434}
435
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436/**
437 * sk_chk_filter - verify socket filter code
438 * @filter: filter to verify
439 * @flen: length of filter
440 *
441 * Check the user's filter code. If we let some ugly
442 * filter code slip through kaboom! The filter must contain
Kris Katterjohn93699862006-01-04 13:58:36 -0800443 * no references or jumps that are out of range, no illegal
444 * instructions, and must end with a RET instruction.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 *
Kris Katterjohn7b11f692006-01-13 14:33:06 -0800446 * All jumps are forward as they are not signed.
447 *
448 * Returns 0 if the rule set is legal or -EINVAL if not.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 */
Dan Carpenter4f25af22011-10-17 21:04:20 +0000450int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451{
Tetsuo Handacba328f2010-11-16 15:19:51 +0000452 /*
453 * Valid instructions are initialized to non-0.
454 * Invalid instructions are initialized to 0.
455 */
456 static const u8 codes[] = {
Eric Dumazet8c1592d2010-11-18 21:56:38 +0000457 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
458 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
459 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
460 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
461 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
462 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
463 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
464 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
465 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
466 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
467 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
468 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
469 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
470 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
471 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
472 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
473 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
474 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
475 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
476 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
477 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
478 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
479 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
480 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
481 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
482 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
483 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
484 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
485 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
486 [BPF_RET|BPF_K] = BPF_S_RET_K,
487 [BPF_RET|BPF_A] = BPF_S_RET_A,
488 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
489 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
490 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
491 [BPF_ST] = BPF_S_ST,
492 [BPF_STX] = BPF_S_STX,
493 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
494 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
495 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
496 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
497 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
498 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
499 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
500 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
501 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
Tetsuo Handacba328f2010-11-16 15:19:51 +0000502 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 int pc;
504
David S. Miller1b93ae642005-12-27 13:57:59 -0800505 if (flen == 0 || flen > BPF_MAXINSNS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 return -EINVAL;
507
508 /* check the filter code now */
509 for (pc = 0; pc < flen; pc++) {
Tetsuo Handacba328f2010-11-16 15:19:51 +0000510 struct sock_filter *ftest = &filter[pc];
511 u16 code = ftest->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
Tetsuo Handacba328f2010-11-16 15:19:51 +0000513 if (code >= ARRAY_SIZE(codes))
514 return -EINVAL;
515 code = codes[code];
Eric Dumazet8c1592d2010-11-18 21:56:38 +0000516 if (!code)
Tetsuo Handacba328f2010-11-16 15:19:51 +0000517 return -EINVAL;
Kris Katterjohn93699862006-01-04 13:58:36 -0800518 /* Some instructions need special checks */
Tetsuo Handacba328f2010-11-16 15:19:51 +0000519 switch (code) {
520 case BPF_S_ALU_DIV_K:
Kris Katterjohn93699862006-01-04 13:58:36 -0800521 /* check for division by zero */
522 if (ftest->k == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 return -EINVAL;
Eric Dumazetc26aed42010-11-18 22:04:46 +0000524 ftest->k = reciprocal_value(ftest->k);
Kris Katterjohn93699862006-01-04 13:58:36 -0800525 break;
Tetsuo Handacba328f2010-11-16 15:19:51 +0000526 case BPF_S_LD_MEM:
527 case BPF_S_LDX_MEM:
528 case BPF_S_ST:
529 case BPF_S_STX:
530 /* check for invalid memory addresses */
Kris Katterjohn93699862006-01-04 13:58:36 -0800531 if (ftest->k >= BPF_MEMWORDS)
532 return -EINVAL;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000533 break;
Tetsuo Handacba328f2010-11-16 15:19:51 +0000534 case BPF_S_JMP_JA:
Kris Katterjohn93699862006-01-04 13:58:36 -0800535 /*
536 * Note, the large ftest->k might cause loops.
537 * Compare this with conditional jumps below,
538 * where offsets are limited. --ANK (981016)
539 */
Eric Dumazet95c96172012-04-15 05:58:06 +0000540 if (ftest->k >= (unsigned int)(flen-pc-1))
Kris Katterjohn93699862006-01-04 13:58:36 -0800541 return -EINVAL;
542 break;
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000543 case BPF_S_JMP_JEQ_K:
544 case BPF_S_JMP_JEQ_X:
545 case BPF_S_JMP_JGE_K:
546 case BPF_S_JMP_JGE_X:
547 case BPF_S_JMP_JGT_K:
548 case BPF_S_JMP_JGT_X:
549 case BPF_S_JMP_JSET_X:
550 case BPF_S_JMP_JSET_K:
Tetsuo Handacba328f2010-11-16 15:19:51 +0000551 /* for conditionals both must be safe */
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000552 if (pc + ftest->jt + 1 >= flen ||
553 pc + ftest->jf + 1 >= flen)
554 return -EINVAL;
Tetsuo Handacba328f2010-11-16 15:19:51 +0000555 break;
Eric Dumazet12b16da2010-12-15 19:45:28 +0000556 case BPF_S_LD_W_ABS:
557 case BPF_S_LD_H_ABS:
558 case BPF_S_LD_B_ABS:
559#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
560 code = BPF_S_ANC_##CODE; \
561 break
562 switch (ftest->k) {
563 ANCILLARY(PROTOCOL);
564 ANCILLARY(PKTTYPE);
565 ANCILLARY(IFINDEX);
566 ANCILLARY(NLATTR);
567 ANCILLARY(NLATTR_NEST);
568 ANCILLARY(MARK);
569 ANCILLARY(QUEUE);
570 ANCILLARY(HATYPE);
571 ANCILLARY(RXHASH);
572 ANCILLARY(CPU);
Jiri Pirkoffe06c12012-03-31 11:01:20 +0000573 ANCILLARY(ALU_XOR_X);
Eric Dumazet12b16da2010-12-15 19:45:28 +0000574 }
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000575 }
Tetsuo Handacba328f2010-11-16 15:19:51 +0000576 ftest->code = code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 }
578
Hagen Paul Pfeifer01f2f3f2010-06-19 17:05:36 +0000579 /* last instruction must be a RET code */
580 switch (filter[flen - 1].code) {
581 case BPF_S_RET_K:
582 case BPF_S_RET_A:
Eric Dumazet2d5311e2010-12-01 20:46:24 +0000583 return check_load_and_stores(filter, flen);
Tetsuo Handacba328f2010-11-16 15:19:51 +0000584 }
585 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586}
Stephen Hemmingerb7156312008-04-10 01:33:47 -0700587EXPORT_SYMBOL(sk_chk_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588
589/**
Eric Dumazet46bcf142010-12-06 09:29:43 -0800590 * sk_filter_release_rcu - Release a socket filter by rcu_head
Pavel Emelyanov47e958e2007-10-17 21:22:42 -0700591 * @rcu: rcu_head that contains the sk_filter to free
592 */
Eric Dumazet46bcf142010-12-06 09:29:43 -0800593void sk_filter_release_rcu(struct rcu_head *rcu)
Pavel Emelyanov47e958e2007-10-17 21:22:42 -0700594{
595 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
596
Eric Dumazet0a148422011-04-20 09:27:32 +0000597 bpf_jit_free(fp);
Eric Dumazet46bcf142010-12-06 09:29:43 -0800598 kfree(fp);
Pavel Emelyanov47e958e2007-10-17 21:22:42 -0700599}
Eric Dumazet46bcf142010-12-06 09:29:43 -0800600EXPORT_SYMBOL(sk_filter_release_rcu);
Pavel Emelyanov47e958e2007-10-17 21:22:42 -0700601
Jiri Pirko302d6632012-03-31 11:01:19 +0000602static int __sk_prepare_filter(struct sk_filter *fp)
603{
604 int err;
605
606 fp->bpf_func = sk_run_filter;
607
608 err = sk_chk_filter(fp->insns, fp->len);
609 if (err)
610 return err;
611
612 bpf_jit_compile(fp);
613 return 0;
614}
615
616/**
617 * sk_unattached_filter_create - create an unattached filter
618 * @fprog: the filter program
Randy Dunlapc6c4b972012-06-08 14:01:44 +0000619 * @pfp: the unattached filter that is created
Jiri Pirko302d6632012-03-31 11:01:19 +0000620 *
Randy Dunlapc6c4b972012-06-08 14:01:44 +0000621 * Create a filter independent of any socket. We first run some
Jiri Pirko302d6632012-03-31 11:01:19 +0000622 * sanity checks on it to make sure it does not explode on us later.
623 * If an error occurs or there is insufficient memory for the filter
624 * a negative errno code is returned. On success the return is zero.
625 */
626int sk_unattached_filter_create(struct sk_filter **pfp,
627 struct sock_fprog *fprog)
628{
629 struct sk_filter *fp;
630 unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
631 int err;
632
633 /* Make sure new filter is there and in the right amounts. */
634 if (fprog->filter == NULL)
635 return -EINVAL;
636
637 fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
638 if (!fp)
639 return -ENOMEM;
640 memcpy(fp->insns, fprog->filter, fsize);
641
642 atomic_set(&fp->refcnt, 1);
643 fp->len = fprog->len;
644
645 err = __sk_prepare_filter(fp);
646 if (err)
647 goto free_mem;
648
649 *pfp = fp;
650 return 0;
651free_mem:
652 kfree(fp);
653 return err;
654}
655EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
656
657void sk_unattached_filter_destroy(struct sk_filter *fp)
658{
659 sk_filter_release(fp);
660}
661EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
662
Pavel Emelyanov47e958e2007-10-17 21:22:42 -0700663/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 * sk_attach_filter - attach a socket filter
665 * @fprog: the filter program
666 * @sk: the socket to use
667 *
668 * Attach the user's filter code. We first run some sanity checks on
669 * it to make sure it does not explode on us later. If an error
670 * occurs or there is insufficient memory for the filter a negative
671 * errno code is returned. On success the return is zero.
672 */
673int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
674{
Pavel Emelyanovd3904b72007-10-17 21:22:17 -0700675 struct sk_filter *fp, *old_fp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
677 int err;
678
679 /* Make sure new filter is there and in the right amounts. */
Kris Katterjohne35bedf2006-01-17 02:25:52 -0800680 if (fprog->filter == NULL)
681 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
684 if (!fp)
685 return -ENOMEM;
686 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900687 sock_kfree_s(sk, fp, fsize+sizeof(*fp));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 return -EFAULT;
689 }
690
691 atomic_set(&fp->refcnt, 1);
692 fp->len = fprog->len;
693
Jiri Pirko302d6632012-03-31 11:01:19 +0000694 err = __sk_prepare_filter(fp);
Pavel Emelyanovd3904b72007-10-17 21:22:17 -0700695 if (err) {
696 sk_filter_uncharge(sk, fp);
697 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 }
699
Eric Dumazetf91ff5b2010-09-27 06:07:30 +0000700 old_fp = rcu_dereference_protected(sk->sk_filter,
701 sock_owned_by_user(sk));
Pavel Emelyanovd3904b72007-10-17 21:22:17 -0700702 rcu_assign_pointer(sk->sk_filter, fp);
Pavel Emelyanovd3904b72007-10-17 21:22:17 -0700703
Olof Johansson9b013e02007-10-18 21:48:39 -0700704 if (old_fp)
Eric Dumazet46bcf142010-12-06 09:29:43 -0800705 sk_filter_uncharge(sk, old_fp);
Pavel Emelyanovd3904b72007-10-17 21:22:17 -0700706 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707}
Michael S. Tsirkin5ff3f072010-02-14 01:01:00 +0000708EXPORT_SYMBOL_GPL(sk_attach_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709
Pavel Emelyanov55b33322007-10-17 21:21:26 -0700710int sk_detach_filter(struct sock *sk)
711{
712 int ret = -ENOENT;
713 struct sk_filter *filter;
714
Eric Dumazetf91ff5b2010-09-27 06:07:30 +0000715 filter = rcu_dereference_protected(sk->sk_filter,
716 sock_owned_by_user(sk));
Pavel Emelyanov55b33322007-10-17 21:21:26 -0700717 if (filter) {
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +0000718 RCU_INIT_POINTER(sk->sk_filter, NULL);
Eric Dumazet46bcf142010-12-06 09:29:43 -0800719 sk_filter_uncharge(sk, filter);
Pavel Emelyanov55b33322007-10-17 21:21:26 -0700720 ret = 0;
721 }
Pavel Emelyanov55b33322007-10-17 21:21:26 -0700722 return ret;
723}
Michael S. Tsirkin5ff3f072010-02-14 01:01:00 +0000724EXPORT_SYMBOL_GPL(sk_detach_filter);