blob: ac3c2a10dafda9e73145dabfbc7181548f288fd5 [file] [log] [blame]
Matt Evans0ca87f02011-07-20 15:51:00 +00001/* bpf_jit_comp.c: BPF JIT compiler for PPC64
2 *
3 * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
4 *
5 * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12#include <linux/moduleloader.h>
13#include <asm/cacheflush.h>
14#include <linux/netdevice.h>
15#include <linux/filter.h>
Daniel Borkmann5082dfb2012-11-08 11:41:39 +000016#include <linux/if_vlan.h>
17
Matt Evans0ca87f02011-07-20 15:51:00 +000018#include "bpf_jit.h"
19
Matt Evans0ca87f02011-07-20 15:51:00 +000020int bpf_jit_enable __read_mostly;
21
Matt Evans0ca87f02011-07-20 15:51:00 +000022static inline void bpf_flush_icache(void *start, void *end)
23{
24 smp_wmb();
25 flush_icache_range((unsigned long)start, (unsigned long)end);
26}
27
28static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
29 struct codegen_context *ctx)
30{
31 int i;
32 const struct sock_filter *filter = fp->insns;
33
34 if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
35 /* Make stackframe */
36 if (ctx->seen & SEEN_DATAREF) {
37 /* If we call any helpers (for loads), save LR */
Michael Neulingc75df6f2012-06-25 13:33:10 +000038 EMIT(PPC_INST_MFLR | __PPC_RT(R0));
Matt Evans0ca87f02011-07-20 15:51:00 +000039 PPC_STD(0, 1, 16);
40
41 /* Back up non-volatile regs. */
42 PPC_STD(r_D, 1, -(8*(32-r_D)));
43 PPC_STD(r_HL, 1, -(8*(32-r_HL)));
44 }
45 if (ctx->seen & SEEN_MEM) {
46 /*
47 * Conditionally save regs r15-r31 as some will be used
48 * for M[] data.
49 */
50 for (i = r_M; i < (r_M+16); i++) {
51 if (ctx->seen & (1 << (i-r_M)))
52 PPC_STD(i, 1, -(8*(32-i)));
53 }
54 }
Michael Neulingc75df6f2012-06-25 13:33:10 +000055 EMIT(PPC_INST_STDU | __PPC_RS(R1) | __PPC_RA(R1) |
Matt Evans0ca87f02011-07-20 15:51:00 +000056 (-BPF_PPC_STACKFRAME & 0xfffc));
57 }
58
59 if (ctx->seen & SEEN_DATAREF) {
60 /*
61 * If this filter needs to access skb data,
62 * prepare r_D and r_HL:
63 * r_HL = skb->len - skb->data_len
64 * r_D = skb->data
65 */
66 PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
67 data_len));
68 PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
69 PPC_SUB(r_HL, r_HL, r_scratch1);
70 PPC_LD_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
71 }
72
73 if (ctx->seen & SEEN_XREG) {
74 /*
75 * TODO: Could also detect whether first instr. sets X and
76 * avoid this (as below, with A).
77 */
78 PPC_LI(r_X, 0);
79 }
80
81 switch (filter[0].code) {
82 case BPF_S_RET_K:
83 case BPF_S_LD_W_LEN:
84 case BPF_S_ANC_PROTOCOL:
85 case BPF_S_ANC_IFINDEX:
86 case BPF_S_ANC_MARK:
87 case BPF_S_ANC_RXHASH:
Daniel Borkmann5082dfb2012-11-08 11:41:39 +000088 case BPF_S_ANC_VLAN_TAG:
89 case BPF_S_ANC_VLAN_TAG_PRESENT:
Matt Evans0ca87f02011-07-20 15:51:00 +000090 case BPF_S_ANC_CPU:
91 case BPF_S_ANC_QUEUE:
92 case BPF_S_LD_W_ABS:
93 case BPF_S_LD_H_ABS:
94 case BPF_S_LD_B_ABS:
95 /* first instruction sets A register (or is RET 'constant') */
96 break;
97 default:
98 /* make sure we dont leak kernel information to user */
99 PPC_LI(r_A, 0);
100 }
101}
102
103static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
104{
105 int i;
106
107 if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
108 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
109 if (ctx->seen & SEEN_DATAREF) {
110 PPC_LD(0, 1, 16);
111 PPC_MTLR(0);
112 PPC_LD(r_D, 1, -(8*(32-r_D)));
113 PPC_LD(r_HL, 1, -(8*(32-r_HL)));
114 }
115 if (ctx->seen & SEEN_MEM) {
116 /* Restore any saved non-vol registers */
117 for (i = r_M; i < (r_M+16); i++) {
118 if (ctx->seen & (1 << (i-r_M)))
119 PPC_LD(i, 1, -(8*(32-i)));
120 }
121 }
122 }
123 /* The RETs have left a return value in R3. */
124
125 PPC_BLR();
126}
127
Jan Seiffert05be1822012-04-29 19:02:19 +0000128#define CHOOSE_LOAD_FUNC(K, func) \
129 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
130
Matt Evans0ca87f02011-07-20 15:51:00 +0000131/* Assemble the body code between the prologue & epilogue. */
132static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
133 struct codegen_context *ctx,
134 unsigned int *addrs)
135{
136 const struct sock_filter *filter = fp->insns;
137 int flen = fp->len;
138 u8 *func;
139 unsigned int true_cond;
140 int i;
141
142 /* Start of epilogue code */
143 unsigned int exit_addr = addrs[flen];
144
145 for (i = 0; i < flen; i++) {
146 unsigned int K = filter[i].k;
147
148 /*
149 * addrs[] maps a BPF bytecode address into a real offset from
150 * the start of the body code.
151 */
152 addrs[i] = ctx->idx * 4;
153
154 switch (filter[i].code) {
155 /*** ALU ops ***/
156 case BPF_S_ALU_ADD_X: /* A += X; */
157 ctx->seen |= SEEN_XREG;
158 PPC_ADD(r_A, r_A, r_X);
159 break;
160 case BPF_S_ALU_ADD_K: /* A += K; */
161 if (!K)
162 break;
163 PPC_ADDI(r_A, r_A, IMM_L(K));
164 if (K >= 32768)
165 PPC_ADDIS(r_A, r_A, IMM_HA(K));
166 break;
167 case BPF_S_ALU_SUB_X: /* A -= X; */
168 ctx->seen |= SEEN_XREG;
169 PPC_SUB(r_A, r_A, r_X);
170 break;
171 case BPF_S_ALU_SUB_K: /* A -= K */
172 if (!K)
173 break;
174 PPC_ADDI(r_A, r_A, IMM_L(-K));
175 if (K >= 32768)
176 PPC_ADDIS(r_A, r_A, IMM_HA(-K));
177 break;
178 case BPF_S_ALU_MUL_X: /* A *= X; */
179 ctx->seen |= SEEN_XREG;
180 PPC_MUL(r_A, r_A, r_X);
181 break;
182 case BPF_S_ALU_MUL_K: /* A *= K */
183 if (K < 32768)
184 PPC_MULI(r_A, r_A, K);
185 else {
186 PPC_LI32(r_scratch1, K);
187 PPC_MUL(r_A, r_A, r_scratch1);
188 }
189 break;
Vladimir Murzinb0c06d32013-09-28 10:22:01 +0200190 case BPF_S_ALU_MOD_X: /* A %= X; */
191 ctx->seen |= SEEN_XREG;
192 PPC_CMPWI(r_X, 0);
193 if (ctx->pc_ret0 != -1) {
194 PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
195 } else {
196 PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
197 PPC_LI(r_ret, 0);
198 PPC_JMP(exit_addr);
199 }
200 PPC_DIVWU(r_scratch1, r_A, r_X);
201 PPC_MUL(r_scratch1, r_X, r_scratch1);
202 PPC_SUB(r_A, r_A, r_scratch1);
203 break;
204 case BPF_S_ALU_MOD_K: /* A %= K; */
205 PPC_LI32(r_scratch2, K);
206 PPC_DIVWU(r_scratch1, r_A, r_scratch2);
207 PPC_MUL(r_scratch1, r_scratch2, r_scratch1);
208 PPC_SUB(r_A, r_A, r_scratch1);
209 break;
Matt Evans0ca87f02011-07-20 15:51:00 +0000210 case BPF_S_ALU_DIV_X: /* A /= X; */
211 ctx->seen |= SEEN_XREG;
212 PPC_CMPWI(r_X, 0);
213 if (ctx->pc_ret0 != -1) {
214 PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
215 } else {
216 /*
217 * Exit, returning 0; first pass hits here
218 * (longer worst-case code size).
219 */
220 PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
221 PPC_LI(r_ret, 0);
222 PPC_JMP(exit_addr);
223 }
224 PPC_DIVWU(r_A, r_A, r_X);
225 break;
226 case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
227 PPC_LI32(r_scratch1, K);
228 /* Top 32 bits of 64bit result -> A */
229 PPC_MULHWU(r_A, r_A, r_scratch1);
230 break;
231 case BPF_S_ALU_AND_X:
232 ctx->seen |= SEEN_XREG;
233 PPC_AND(r_A, r_A, r_X);
234 break;
235 case BPF_S_ALU_AND_K:
236 if (!IMM_H(K))
237 PPC_ANDI(r_A, r_A, K);
238 else {
239 PPC_LI32(r_scratch1, K);
240 PPC_AND(r_A, r_A, r_scratch1);
241 }
242 break;
243 case BPF_S_ALU_OR_X:
244 ctx->seen |= SEEN_XREG;
245 PPC_OR(r_A, r_A, r_X);
246 break;
247 case BPF_S_ALU_OR_K:
248 if (IMM_L(K))
249 PPC_ORI(r_A, r_A, IMM_L(K));
250 if (K >= 65536)
251 PPC_ORIS(r_A, r_A, IMM_H(K));
252 break;
Daniel Borkmann02871902012-11-08 11:39:41 +0000253 case BPF_S_ANC_ALU_XOR_X:
254 case BPF_S_ALU_XOR_X: /* A ^= X */
255 ctx->seen |= SEEN_XREG;
256 PPC_XOR(r_A, r_A, r_X);
257 break;
258 case BPF_S_ALU_XOR_K: /* A ^= K */
259 if (IMM_L(K))
260 PPC_XORI(r_A, r_A, IMM_L(K));
261 if (K >= 65536)
262 PPC_XORIS(r_A, r_A, IMM_H(K));
263 break;
Matt Evans0ca87f02011-07-20 15:51:00 +0000264 case BPF_S_ALU_LSH_X: /* A <<= X; */
265 ctx->seen |= SEEN_XREG;
266 PPC_SLW(r_A, r_A, r_X);
267 break;
268 case BPF_S_ALU_LSH_K:
269 if (K == 0)
270 break;
271 else
272 PPC_SLWI(r_A, r_A, K);
273 break;
274 case BPF_S_ALU_RSH_X: /* A >>= X; */
275 ctx->seen |= SEEN_XREG;
276 PPC_SRW(r_A, r_A, r_X);
277 break;
278 case BPF_S_ALU_RSH_K: /* A >>= K; */
279 if (K == 0)
280 break;
281 else
282 PPC_SRWI(r_A, r_A, K);
283 break;
284 case BPF_S_ALU_NEG:
285 PPC_NEG(r_A, r_A);
286 break;
287 case BPF_S_RET_K:
288 PPC_LI32(r_ret, K);
289 if (!K) {
290 if (ctx->pc_ret0 == -1)
291 ctx->pc_ret0 = i;
292 }
293 /*
294 * If this isn't the very last instruction, branch to
295 * the epilogue if we've stuff to clean up. Otherwise,
296 * if there's nothing to tidy, just return. If we /are/
297 * the last instruction, we're about to fall through to
298 * the epilogue to return.
299 */
300 if (i != flen - 1) {
301 /*
302 * Note: 'seen' is properly valid only on pass
303 * #2. Both parts of this conditional are the
304 * same instruction size though, meaning the
305 * first pass will still correctly determine the
306 * code size/addresses.
307 */
308 if (ctx->seen)
309 PPC_JMP(exit_addr);
310 else
311 PPC_BLR();
312 }
313 break;
314 case BPF_S_RET_A:
315 PPC_MR(r_ret, r_A);
316 if (i != flen - 1) {
317 if (ctx->seen)
318 PPC_JMP(exit_addr);
319 else
320 PPC_BLR();
321 }
322 break;
323 case BPF_S_MISC_TAX: /* X = A */
324 PPC_MR(r_X, r_A);
325 break;
326 case BPF_S_MISC_TXA: /* A = X */
327 ctx->seen |= SEEN_XREG;
328 PPC_MR(r_A, r_X);
329 break;
330
331 /*** Constant loads/M[] access ***/
332 case BPF_S_LD_IMM: /* A = K */
333 PPC_LI32(r_A, K);
334 break;
335 case BPF_S_LDX_IMM: /* X = K */
336 PPC_LI32(r_X, K);
337 break;
338 case BPF_S_LD_MEM: /* A = mem[K] */
339 PPC_MR(r_A, r_M + (K & 0xf));
340 ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
341 break;
342 case BPF_S_LDX_MEM: /* X = mem[K] */
343 PPC_MR(r_X, r_M + (K & 0xf));
344 ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
345 break;
346 case BPF_S_ST: /* mem[K] = A */
347 PPC_MR(r_M + (K & 0xf), r_A);
348 ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
349 break;
350 case BPF_S_STX: /* mem[K] = X */
351 PPC_MR(r_M + (K & 0xf), r_X);
352 ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
353 break;
354 case BPF_S_LD_W_LEN: /* A = skb->len; */
355 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
356 PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
357 break;
358 case BPF_S_LDX_W_LEN: /* X = skb->len; */
359 PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
360 break;
361
362 /*** Ancillary info loads ***/
Matt Evans0ca87f02011-07-20 15:51:00 +0000363 case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
364 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
365 protocol) != 2);
Philippe Bergheaud9c662ca2013-09-24 14:13:35 +0200366 PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
367 protocol));
Matt Evans0ca87f02011-07-20 15:51:00 +0000368 break;
369 case BPF_S_ANC_IFINDEX:
370 PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
371 dev));
372 PPC_CMPDI(r_scratch1, 0);
373 if (ctx->pc_ret0 != -1) {
374 PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
375 } else {
376 /* Exit, returning 0; first pass hits here. */
377 PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
378 PPC_LI(r_ret, 0);
379 PPC_JMP(exit_addr);
380 }
381 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
382 ifindex) != 4);
383 PPC_LWZ_OFFS(r_A, r_scratch1,
384 offsetof(struct net_device, ifindex));
385 break;
386 case BPF_S_ANC_MARK:
387 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
388 PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
389 mark));
390 break;
391 case BPF_S_ANC_RXHASH:
392 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
393 PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
394 rxhash));
395 break;
Daniel Borkmann5082dfb2012-11-08 11:41:39 +0000396 case BPF_S_ANC_VLAN_TAG:
397 case BPF_S_ANC_VLAN_TAG_PRESENT:
398 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
399 PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
400 vlan_tci));
401 if (filter[i].code == BPF_S_ANC_VLAN_TAG)
402 PPC_ANDI(r_A, r_A, VLAN_VID_MASK);
403 else
404 PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT);
405 break;
Matt Evans0ca87f02011-07-20 15:51:00 +0000406 case BPF_S_ANC_QUEUE:
407 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
408 queue_mapping) != 2);
409 PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
410 queue_mapping));
411 break;
412 case BPF_S_ANC_CPU:
413#ifdef CONFIG_SMP
414 /*
415 * PACA ptr is r13:
416 * raw_smp_processor_id() = local_paca->paca_index
417 */
418 BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct,
419 paca_index) != 2);
420 PPC_LHZ_OFFS(r_A, 13,
421 offsetof(struct paca_struct, paca_index));
422#else
423 PPC_LI(r_A, 0);
424#endif
425 break;
426
427 /*** Absolute loads from packet header/data ***/
428 case BPF_S_LD_W_ABS:
Jan Seiffert05be1822012-04-29 19:02:19 +0000429 func = CHOOSE_LOAD_FUNC(K, sk_load_word);
Matt Evans0ca87f02011-07-20 15:51:00 +0000430 goto common_load;
431 case BPF_S_LD_H_ABS:
Jan Seiffert05be1822012-04-29 19:02:19 +0000432 func = CHOOSE_LOAD_FUNC(K, sk_load_half);
Matt Evans0ca87f02011-07-20 15:51:00 +0000433 goto common_load;
434 case BPF_S_LD_B_ABS:
Jan Seiffert05be1822012-04-29 19:02:19 +0000435 func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
Matt Evans0ca87f02011-07-20 15:51:00 +0000436 common_load:
Jan Seiffert05be1822012-04-29 19:02:19 +0000437 /* Load from [K]. */
Matt Evans0ca87f02011-07-20 15:51:00 +0000438 ctx->seen |= SEEN_DATAREF;
Matt Evans0ca87f02011-07-20 15:51:00 +0000439 PPC_LI64(r_scratch1, func);
440 PPC_MTLR(r_scratch1);
441 PPC_LI32(r_addr, K);
442 PPC_BLRL();
443 /*
444 * Helper returns 'lt' condition on error, and an
445 * appropriate return value in r3
446 */
447 PPC_BCC(COND_LT, exit_addr);
448 break;
449
450 /*** Indirect loads from packet header/data ***/
451 case BPF_S_LD_W_IND:
452 func = sk_load_word;
453 goto common_load_ind;
454 case BPF_S_LD_H_IND:
455 func = sk_load_half;
456 goto common_load_ind;
457 case BPF_S_LD_B_IND:
458 func = sk_load_byte;
459 common_load_ind:
460 /*
461 * Load from [X + K]. Negative offsets are tested for
Jan Seiffert05be1822012-04-29 19:02:19 +0000462 * in the helper functions.
Matt Evans0ca87f02011-07-20 15:51:00 +0000463 */
464 ctx->seen |= SEEN_DATAREF | SEEN_XREG;
465 PPC_LI64(r_scratch1, func);
466 PPC_MTLR(r_scratch1);
467 PPC_ADDI(r_addr, r_X, IMM_L(K));
468 if (K >= 32768)
469 PPC_ADDIS(r_addr, r_addr, IMM_HA(K));
470 PPC_BLRL();
471 /* If error, cr0.LT set */
472 PPC_BCC(COND_LT, exit_addr);
473 break;
474
475 case BPF_S_LDX_B_MSH:
Jan Seiffert05be1822012-04-29 19:02:19 +0000476 func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
Matt Evans0ca87f02011-07-20 15:51:00 +0000477 goto common_load;
478 break;
479
480 /*** Jump and branches ***/
481 case BPF_S_JMP_JA:
482 if (K != 0)
483 PPC_JMP(addrs[i + 1 + K]);
484 break;
485
486 case BPF_S_JMP_JGT_K:
487 case BPF_S_JMP_JGT_X:
488 true_cond = COND_GT;
489 goto cond_branch;
490 case BPF_S_JMP_JGE_K:
491 case BPF_S_JMP_JGE_X:
492 true_cond = COND_GE;
493 goto cond_branch;
494 case BPF_S_JMP_JEQ_K:
495 case BPF_S_JMP_JEQ_X:
496 true_cond = COND_EQ;
497 goto cond_branch;
498 case BPF_S_JMP_JSET_K:
499 case BPF_S_JMP_JSET_X:
500 true_cond = COND_NE;
501 /* Fall through */
502 cond_branch:
503 /* same targets, can avoid doing the test :) */
504 if (filter[i].jt == filter[i].jf) {
505 if (filter[i].jt > 0)
506 PPC_JMP(addrs[i + 1 + filter[i].jt]);
507 break;
508 }
509
510 switch (filter[i].code) {
511 case BPF_S_JMP_JGT_X:
512 case BPF_S_JMP_JGE_X:
513 case BPF_S_JMP_JEQ_X:
514 ctx->seen |= SEEN_XREG;
515 PPC_CMPLW(r_A, r_X);
516 break;
517 case BPF_S_JMP_JSET_X:
518 ctx->seen |= SEEN_XREG;
519 PPC_AND_DOT(r_scratch1, r_A, r_X);
520 break;
521 case BPF_S_JMP_JEQ_K:
522 case BPF_S_JMP_JGT_K:
523 case BPF_S_JMP_JGE_K:
524 if (K < 32768)
525 PPC_CMPLWI(r_A, K);
526 else {
527 PPC_LI32(r_scratch1, K);
528 PPC_CMPLW(r_A, r_scratch1);
529 }
530 break;
531 case BPF_S_JMP_JSET_K:
532 if (K < 32768)
533 /* PPC_ANDI is /only/ dot-form */
534 PPC_ANDI(r_scratch1, r_A, K);
535 else {
536 PPC_LI32(r_scratch1, K);
537 PPC_AND_DOT(r_scratch1, r_A,
538 r_scratch1);
539 }
540 break;
541 }
542 /* Sometimes branches are constructed "backward", with
543 * the false path being the branch and true path being
544 * a fallthrough to the next instruction.
545 */
546 if (filter[i].jt == 0)
547 /* Swap the sense of the branch */
548 PPC_BCC(true_cond ^ COND_CMP_TRUE,
549 addrs[i + 1 + filter[i].jf]);
550 else {
551 PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
552 if (filter[i].jf != 0)
553 PPC_JMP(addrs[i + 1 + filter[i].jf]);
554 }
555 break;
556 default:
557 /* The filter contains something cruel & unusual.
558 * We don't handle it, but also there shouldn't be
559 * anything missing from our list.
560 */
561 if (printk_ratelimit())
562 pr_err("BPF filter opcode %04x (@%d) unsupported\n",
563 filter[i].code, i);
564 return -ENOTSUPP;
565 }
566
567 }
568 /* Set end-of-body-code address for exit. */
569 addrs[i] = ctx->idx * 4;
570
571 return 0;
572}
573
574void bpf_jit_compile(struct sk_filter *fp)
575{
576 unsigned int proglen;
577 unsigned int alloclen;
578 u32 *image = NULL;
579 u32 *code_base;
580 unsigned int *addrs;
581 struct codegen_context cgctx;
582 int pass;
583 int flen = fp->len;
584
585 if (!bpf_jit_enable)
586 return;
587
588 addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
589 if (addrs == NULL)
590 return;
591
592 /*
593 * There are multiple assembly passes as the generated code will change
594 * size as it settles down, figuring out the max branch offsets/exit
595 * paths required.
596 *
597 * The range of standard conditional branches is +/- 32Kbytes. Since
598 * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
599 * finish with 8 bytes/instruction. Not feasible, so long jumps are
600 * used, distinct from short branches.
601 *
602 * Current:
603 *
604 * For now, both branch types assemble to 2 words (short branches padded
605 * with a NOP); this is less efficient, but assembly will always complete
606 * after exactly 3 passes:
607 *
608 * First pass: No code buffer; Program is "faux-generated" -- no code
609 * emitted but maximum size of output determined (and addrs[] filled
610 * in). Also, we note whether we use M[], whether we use skb data, etc.
611 * All generation choices assumed to be 'worst-case', e.g. branches all
612 * far (2 instructions), return path code reduction not available, etc.
613 *
614 * Second pass: Code buffer allocated with size determined previously.
615 * Prologue generated to support features we have seen used. Exit paths
616 * determined and addrs[] is filled in again, as code may be slightly
617 * smaller as a result.
618 *
619 * Third pass: Code generated 'for real', and branch destinations
620 * determined from now-accurate addrs[] map.
621 *
622 * Ideal:
623 *
624 * If we optimise this, near branches will be shorter. On the
625 * first assembly pass, we should err on the side of caution and
626 * generate the biggest code. On subsequent passes, branches will be
627 * generated short or long and code size will reduce. With smaller
628 * code, more branches may fall into the short category, and code will
629 * reduce more.
630 *
631 * Finally, if we see one pass generate code the same size as the
632 * previous pass we have converged and should now generate code for
633 * real. Allocating at the end will also save the memory that would
634 * otherwise be wasted by the (small) current code shrinkage.
635 * Preferably, we should do a small number of passes (e.g. 5) and if we
636 * haven't converged by then, get impatient and force code to generate
637 * as-is, even if the odd branch would be left long. The chances of a
638 * long jump are tiny with all but the most enormous of BPF filter
639 * inputs, so we should usually converge on the third pass.
640 */
641
642 cgctx.idx = 0;
643 cgctx.seen = 0;
644 cgctx.pc_ret0 = -1;
645 /* Scouting faux-generate pass 0 */
646 if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
647 /* We hit something illegal or unsupported. */
648 goto out;
649
650 /*
651 * Pretend to build prologue, given the features we've seen. This will
652 * update ctgtx.idx as it pretends to output instructions, then we can
653 * calculate total size from idx.
654 */
655 bpf_jit_build_prologue(fp, 0, &cgctx);
656 bpf_jit_build_epilogue(0, &cgctx);
657
658 proglen = cgctx.idx * 4;
659 alloclen = proglen + FUNCTION_DESCR_SIZE;
Daniel Borkmanned900ff2013-05-20 08:05:50 +0000660 image = module_alloc(alloclen);
Matt Evans0ca87f02011-07-20 15:51:00 +0000661 if (!image)
662 goto out;
663
664 code_base = image + (FUNCTION_DESCR_SIZE/4);
665
666 /* Code generation passes 1-2 */
667 for (pass = 1; pass < 3; pass++) {
668 /* Now build the prologue, body code & epilogue for real. */
669 cgctx.idx = 0;
670 bpf_jit_build_prologue(fp, code_base, &cgctx);
671 bpf_jit_build_body(fp, code_base, &cgctx, addrs);
672 bpf_jit_build_epilogue(code_base, &cgctx);
673
674 if (bpf_jit_enable > 1)
675 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
676 proglen - (cgctx.idx * 4), cgctx.seen);
677 }
678
679 if (bpf_jit_enable > 1)
Daniel Borkmann79617802013-03-21 22:22:03 +0100680 /* Note that we output the base address of the code_base
681 * rather than image, since opcodes are in code_base.
682 */
683 bpf_jit_dump(flen, proglen, pass, code_base);
Matt Evans0ca87f02011-07-20 15:51:00 +0000684
685 if (image) {
Matt Evans0ca87f02011-07-20 15:51:00 +0000686 bpf_flush_icache(code_base, code_base + (proglen/4));
687 /* Function descriptor nastiness: Address + TOC */
688 ((u64 *)image)[0] = (u64)code_base;
689 ((u64 *)image)[1] = local_paca->kernel_toc;
690 fp->bpf_func = (void *)image;
691 }
692out:
693 kfree(addrs);
694 return;
695}
696
Matt Evans0ca87f02011-07-20 15:51:00 +0000697void bpf_jit_free(struct sk_filter *fp)
698{
Daniel Borkmanned900ff2013-05-20 08:05:50 +0000699 if (fp->bpf_func != sk_run_filter)
700 module_free(NULL, fp->bpf_func);
Alexei Starovoitovd45ed4a2013-10-04 00:14:06 -0700701 kfree(fp);
Matt Evans0ca87f02011-07-20 15:51:00 +0000702}