Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 1 | /* |
| 2 | * eBPF kernel space program part |
| 3 | * |
| 4 | * Toy eBPF program for demonstration purposes, some parts derived from |
| 5 | * kernel tree's samples/bpf/sockex2_kern.c example. |
| 6 | * |
| 7 | * More background on eBPF, kernel tree: Documentation/networking/filter.txt |
| 8 | * |
| 9 | * Note, this file is rather large, and most classifier and actions are |
| 10 | * likely smaller to accomplish one specific use-case and are tailored |
| 11 | * for high performance. For performance reasons, you might also have the |
| 12 | * classifier and action already merged inside the classifier. |
| 13 | * |
| 14 | * In order to show various features it serves as a bigger programming |
| 15 | * example, which you should feel free to rip apart and experiment with. |
| 16 | * |
| 17 | * Compilation, configuration example: |
| 18 | * |
| 19 | * Note: as long as the BPF backend in LLVM is still experimental, |
| 20 | * you need to build LLVM with LLVM with --enable-experimental-targets=BPF |
| 21 | * Also, make sure your 4.1+ kernel is compiled with CONFIG_BPF_SYSCALL=y, |
| 22 | * and you have libelf.h and gelf.h headers and can link tc against -lelf. |
| 23 | * |
| 24 | * In case you need to sync kernel headers, go to your kernel source tree: |
| 25 | * # make headers_install INSTALL_HDR_PATH=/usr/ |
| 26 | * |
| 27 | * $ export PATH=/home/<...>/llvm/Debug+Asserts/bin/:$PATH |
| 28 | * $ clang -O2 -emit-llvm -c bpf_prog.c -o - | llc -march=bpf -filetype=obj -o bpf.o |
| 29 | * $ objdump -h bpf.o |
| 30 | * [...] |
| 31 | * 3 classifier 000007f8 0000000000000000 0000000000000000 00000040 2**3 |
| 32 | * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE |
| 33 | * 4 action-mark 00000088 0000000000000000 0000000000000000 00000838 2**3 |
| 34 | * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE |
| 35 | * 5 action-rand 00000098 0000000000000000 0000000000000000 000008c0 2**3 |
| 36 | * CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE |
| 37 | * 6 maps 00000030 0000000000000000 0000000000000000 00000958 2**2 |
| 38 | * CONTENTS, ALLOC, LOAD, DATA |
| 39 | * 7 license 00000004 0000000000000000 0000000000000000 00000988 2**0 |
| 40 | * CONTENTS, ALLOC, LOAD, DATA |
| 41 | * [...] |
| 42 | * # echo 1 > /proc/sys/net/core/bpf_jit_enable |
| 43 | * $ gcc bpf_agent.c -o bpf_agent -Wall -O2 |
| 44 | * # ./bpf_agent /tmp/bpf-uds (e.g. on a different terminal) |
| 45 | * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \ |
| 46 | * action bpf obj bpf.o sec action-mark \ |
| 47 | * action bpf obj bpf.o sec action-rand ok |
| 48 | * # tc filter show dev em1 |
| 49 | * filter parent 1: protocol all pref 49152 bpf |
| 50 | * filter parent 1: protocol all pref 49152 bpf handle 0x1 flowid 1:1 bpf.o:[classifier] |
| 51 | * action order 1: bpf bpf.o:[action-mark] default-action pipe |
| 52 | * index 52 ref 1 bind 1 |
| 53 | * |
| 54 | * action order 2: bpf bpf.o:[action-rand] default-action pipe |
| 55 | * index 53 ref 1 bind 1 |
| 56 | * |
| 57 | * action order 3: gact action pass |
| 58 | * random type none pass val 0 |
| 59 | * index 38 ref 1 bind 1 |
| 60 | * |
Daniel Borkmann | 279d6a8 | 2015-04-20 13:48:54 +0200 | [diff] [blame] | 61 | * The same program can also be installed on ingress side (as opposed to above |
| 62 | * egress configuration), e.g.: |
| 63 | * |
| 64 | * # tc qdisc add dev em1 handle ffff: ingress |
| 65 | * # tc filter add dev em1 parent ffff: bpf obj ... |
| 66 | * |
Daniel Borkmann | 4bd6244 | 2015-04-16 21:20:06 +0200 | [diff] [blame] | 67 | * Notes on BPF agent: |
| 68 | * |
| 69 | * In the above example, the bpf_agent creates the unix domain socket |
| 70 | * natively. "tc exec" can also spawn a shell and hold the socktes there: |
| 71 | * |
| 72 | * # tc exec bpf imp /tmp/bpf-uds |
| 73 | * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \ |
| 74 | * action bpf obj bpf.o sec action-mark \ |
| 75 | * action bpf obj bpf.o sec action-rand ok |
| 76 | * sh-4.2# (shell spawned from tc exec) |
| 77 | * sh-4.2# bpf_agent |
| 78 | * [...] |
| 79 | * |
| 80 | * This will read out fds over environment and produce the same data dump |
| 81 | * as below. This has the advantage that the spawned shell owns the fds |
| 82 | * and thus if the agent is restarted, it can reattach to the same fds, also |
| 83 | * various programs can easily read/modify the data simultaneously from user |
| 84 | * space side. |
| 85 | * |
| 86 | * If the shell is unnecessary, the agent can also just be spawned directly |
| 87 | * via tc exec: |
| 88 | * |
| 89 | * # tc exec bpf imp /tmp/bpf-uds run bpf_agent |
| 90 | * # tc filter add dev em1 parent 1: bpf obj bpf.o exp /tmp/bpf-uds flowid 1:1 \ |
| 91 | * action bpf obj bpf.o sec action-mark \ |
| 92 | * action bpf obj bpf.o sec action-rand ok |
| 93 | * |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 94 | * BPF agent example output: |
| 95 | * |
| 96 | * ver: 1 |
| 97 | * obj: bpf.o |
| 98 | * dev: 64770 |
| 99 | * ino: 6045133 |
| 100 | * maps: 3 |
| 101 | * map0: |
| 102 | * `- fd: 4 |
| 103 | * | serial: 1 |
| 104 | * | type: 1 |
| 105 | * | max elem: 256 |
| 106 | * | size key: 1 |
| 107 | * ` size val: 16 |
| 108 | * map1: |
| 109 | * `- fd: 5 |
| 110 | * | serial: 2 |
| 111 | * | type: 1 |
| 112 | * | max elem: 1024 |
| 113 | * | size key: 4 |
| 114 | * ` size val: 16 |
| 115 | * map2: |
| 116 | * `- fd: 6 |
| 117 | * | serial: 3 |
| 118 | * | type: 2 |
| 119 | * | max elem: 64 |
| 120 | * | size key: 4 |
| 121 | * ` size val: 8 |
| 122 | * data, period: 5sec |
| 123 | * `- number of drops: cpu0: 0 cpu1: 0 cpu2: 0 cpu3: 0 |
| 124 | * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 0, mis: 0] q3:[pkts: 0, mis: 0] |
| 125 | * ` protos: tcp:[pkts: 0, bytes: 0] udp:[pkts: 0, bytes: 0] icmp:[pkts: 0, bytes: 0] |
| 126 | * data, period: 5sec |
| 127 | * `- number of drops: cpu0: 5 cpu1: 0 cpu2: 0 cpu3: 1 |
| 128 | * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 24, mis: 14] q3:[pkts: 0, mis: 0] |
| 129 | * ` protos: tcp:[pkts: 13, bytes: 1989] udp:[pkts: 10, bytes: 710] icmp:[pkts: 0, bytes: 0] |
| 130 | * data, period: 5sec |
| 131 | * `- number of drops: cpu0: 5 cpu1: 0 cpu2: 3 cpu3: 3 |
| 132 | * | nic queues: q0:[pkts: 0, mis: 0] q1:[pkts: 0, mis: 0] q2:[pkts: 39, mis: 21] q3:[pkts: 0, mis: 0] |
| 133 | * ` protos: tcp:[pkts: 20, bytes: 3549] udp:[pkts: 18, bytes: 1278] icmp:[pkts: 0, bytes: 0] |
| 134 | * [...] |
| 135 | * |
| 136 | * This now means, the below classifier and action pipeline has been loaded |
| 137 | * as eBPF bytecode into the kernel, the kernel has verified that the |
| 138 | * execution of the bytecode is "safe", and it has JITed the programs |
| 139 | * afterwards, so that upon invocation they're running on native speed. tc |
| 140 | * has transferred all map file descriptors to the bpf_agent via IPC and |
| 141 | * even after tc exits, the agent can read out or modify all map data. |
| 142 | * |
| 143 | * Note that the export to the uds is done only once in the classifier and |
| 144 | * not in the action. It's enough to export the (here) shared descriptors |
| 145 | * once. |
| 146 | * |
| 147 | * If you need to disassemble the generated JIT image (echo with 2), the |
| 148 | * kernel tree has under tools/net/ a small helper, you can invoke e.g. |
| 149 | * `bpf_jit_disasm -o`. |
| 150 | * |
| 151 | * Please find in the code below further comments. |
| 152 | * |
| 153 | * -- Happy eBPF hacking! ;) |
| 154 | */ |
| 155 | #include <stdint.h> |
| 156 | #include <stdbool.h> |
| 157 | #include <sys/types.h> |
| 158 | #include <sys/socket.h> |
| 159 | #include <asm/types.h> |
| 160 | #include <linux/in.h> |
| 161 | #include <linux/if.h> |
| 162 | #include <linux/if_ether.h> |
| 163 | #include <linux/ip.h> |
| 164 | #include <linux/ipv6.h> |
| 165 | #include <linux/if_tunnel.h> |
Daniel Borkmann | 279d6a8 | 2015-04-20 13:48:54 +0200 | [diff] [blame] | 166 | #include <linux/filter.h> |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 167 | #include <linux/bpf.h> |
| 168 | |
| 169 | /* Common, shared definitions with ebpf_agent.c. */ |
| 170 | #include "bpf_shared.h" |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 171 | /* BPF helper functions for our example. */ |
| 172 | #include "../../include/bpf_api.h" |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 173 | |
| 174 | /* Could be defined here as well, or included from the header. */ |
| 175 | #define TC_ACT_UNSPEC (-1) |
| 176 | #define TC_ACT_OK 0 |
| 177 | #define TC_ACT_RECLASSIFY 1 |
| 178 | #define TC_ACT_SHOT 2 |
| 179 | #define TC_ACT_PIPE 3 |
| 180 | #define TC_ACT_STOLEN 4 |
| 181 | #define TC_ACT_QUEUED 5 |
| 182 | #define TC_ACT_REPEAT 6 |
| 183 | |
| 184 | /* Other, misc stuff. */ |
| 185 | #define IP_MF 0x2000 |
| 186 | #define IP_OFFSET 0x1FFF |
| 187 | |
| 188 | /* eBPF map definitions, all placed in section "maps". */ |
| 189 | struct bpf_elf_map __section("maps") map_proto = { |
| 190 | .type = BPF_MAP_TYPE_HASH, |
| 191 | .id = BPF_MAP_ID_PROTO, |
| 192 | .size_key = sizeof(uint8_t), |
| 193 | .size_value = sizeof(struct count_tuple), |
| 194 | .max_elem = 256, |
| 195 | }; |
| 196 | |
| 197 | struct bpf_elf_map __section("maps") map_queue = { |
| 198 | .type = BPF_MAP_TYPE_HASH, |
| 199 | .id = BPF_MAP_ID_QUEUE, |
| 200 | .size_key = sizeof(uint32_t), |
| 201 | .size_value = sizeof(struct count_queue), |
| 202 | .max_elem = 1024, |
| 203 | }; |
| 204 | |
| 205 | struct bpf_elf_map __section("maps") map_drops = { |
| 206 | .type = BPF_MAP_TYPE_ARRAY, |
| 207 | .id = BPF_MAP_ID_DROPS, |
| 208 | .size_key = sizeof(uint32_t), |
| 209 | .size_value = sizeof(long), |
| 210 | .max_elem = 64, |
| 211 | }; |
| 212 | |
| 213 | /* Helper functions and definitions for the flow dissector used by the |
| 214 | * example classifier. This resembles the kernel's flow dissector to |
| 215 | * some extend and is just used as an example to show what's possible |
| 216 | * with eBPF. |
| 217 | */ |
| 218 | struct sockaddr; |
| 219 | |
| 220 | struct vlan_hdr { |
| 221 | __be16 h_vlan_TCI; |
| 222 | __be16 h_vlan_encapsulated_proto; |
| 223 | }; |
| 224 | |
| 225 | struct flow_keys { |
| 226 | __u32 src; |
| 227 | __u32 dst; |
| 228 | union { |
| 229 | __u32 ports; |
| 230 | __u16 port16[2]; |
| 231 | }; |
Daniel Borkmann | 279d6a8 | 2015-04-20 13:48:54 +0200 | [diff] [blame] | 232 | __s32 th_off; |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 233 | __u8 ip_proto; |
| 234 | }; |
| 235 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 236 | static __inline__ int flow_ports_offset(__u8 ip_proto) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 237 | { |
| 238 | switch (ip_proto) { |
| 239 | case IPPROTO_TCP: |
| 240 | case IPPROTO_UDP: |
| 241 | case IPPROTO_DCCP: |
| 242 | case IPPROTO_ESP: |
| 243 | case IPPROTO_SCTP: |
| 244 | case IPPROTO_UDPLITE: |
| 245 | default: |
| 246 | return 0; |
| 247 | case IPPROTO_AH: |
| 248 | return 4; |
| 249 | } |
| 250 | } |
| 251 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 252 | static __inline__ bool flow_is_frag(struct __sk_buff *skb, int nh_off) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 253 | { |
| 254 | return !!(load_half(skb, nh_off + offsetof(struct iphdr, frag_off)) & |
| 255 | (IP_MF | IP_OFFSET)); |
| 256 | } |
| 257 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 258 | static __inline__ int flow_parse_ipv4(struct __sk_buff *skb, int nh_off, |
| 259 | __u8 *ip_proto, struct flow_keys *flow) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 260 | { |
| 261 | __u8 ip_ver_len; |
| 262 | |
| 263 | if (unlikely(flow_is_frag(skb, nh_off))) |
| 264 | *ip_proto = 0; |
| 265 | else |
| 266 | *ip_proto = load_byte(skb, nh_off + offsetof(struct iphdr, |
| 267 | protocol)); |
| 268 | if (*ip_proto != IPPROTO_GRE) { |
| 269 | flow->src = load_word(skb, nh_off + offsetof(struct iphdr, saddr)); |
| 270 | flow->dst = load_word(skb, nh_off + offsetof(struct iphdr, daddr)); |
| 271 | } |
| 272 | |
| 273 | ip_ver_len = load_byte(skb, nh_off + 0 /* offsetof(struct iphdr, ihl) */); |
| 274 | if (likely(ip_ver_len == 0x45)) |
| 275 | nh_off += 20; |
| 276 | else |
| 277 | nh_off += (ip_ver_len & 0xF) << 2; |
| 278 | |
| 279 | return nh_off; |
| 280 | } |
| 281 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 282 | static __inline__ __u32 flow_addr_hash_ipv6(struct __sk_buff *skb, int off) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 283 | { |
| 284 | __u32 w0 = load_word(skb, off); |
| 285 | __u32 w1 = load_word(skb, off + sizeof(w0)); |
| 286 | __u32 w2 = load_word(skb, off + sizeof(w0) * 2); |
| 287 | __u32 w3 = load_word(skb, off + sizeof(w0) * 3); |
| 288 | |
Daniel Borkmann | 279d6a8 | 2015-04-20 13:48:54 +0200 | [diff] [blame] | 289 | return w0 ^ w1 ^ w2 ^ w3; |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 290 | } |
| 291 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 292 | static __inline__ int flow_parse_ipv6(struct __sk_buff *skb, int nh_off, |
| 293 | __u8 *ip_proto, struct flow_keys *flow) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 294 | { |
| 295 | *ip_proto = load_byte(skb, nh_off + offsetof(struct ipv6hdr, nexthdr)); |
| 296 | |
| 297 | flow->src = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, saddr)); |
| 298 | flow->dst = flow_addr_hash_ipv6(skb, nh_off + offsetof(struct ipv6hdr, daddr)); |
| 299 | |
| 300 | return nh_off + sizeof(struct ipv6hdr); |
| 301 | } |
| 302 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 303 | static __inline__ bool flow_dissector(struct __sk_buff *skb, |
| 304 | struct flow_keys *flow) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 305 | { |
Daniel Borkmann | 279d6a8 | 2015-04-20 13:48:54 +0200 | [diff] [blame] | 306 | int poff, nh_off = BPF_LL_OFF + ETH_HLEN; |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 307 | __be16 proto = skb->protocol; |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 308 | __u8 ip_proto; |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 309 | |
| 310 | /* TODO: check for skb->vlan_tci, skb->vlan_proto first */ |
| 311 | if (proto == htons(ETH_P_8021AD)) { |
| 312 | proto = load_half(skb, nh_off + |
| 313 | offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); |
| 314 | nh_off += sizeof(struct vlan_hdr); |
| 315 | } |
| 316 | if (proto == htons(ETH_P_8021Q)) { |
| 317 | proto = load_half(skb, nh_off + |
| 318 | offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); |
| 319 | nh_off += sizeof(struct vlan_hdr); |
| 320 | } |
| 321 | |
| 322 | if (likely(proto == htons(ETH_P_IP))) |
| 323 | nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow); |
| 324 | else if (proto == htons(ETH_P_IPV6)) |
| 325 | nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow); |
| 326 | else |
| 327 | return false; |
| 328 | |
| 329 | switch (ip_proto) { |
| 330 | case IPPROTO_GRE: { |
| 331 | struct gre_hdr { |
| 332 | __be16 flags; |
| 333 | __be16 proto; |
| 334 | }; |
| 335 | |
| 336 | __u16 gre_flags = load_half(skb, nh_off + |
| 337 | offsetof(struct gre_hdr, flags)); |
| 338 | __u16 gre_proto = load_half(skb, nh_off + |
| 339 | offsetof(struct gre_hdr, proto)); |
| 340 | |
| 341 | if (gre_flags & (GRE_VERSION | GRE_ROUTING)) |
| 342 | break; |
| 343 | |
| 344 | nh_off += 4; |
| 345 | if (gre_flags & GRE_CSUM) |
| 346 | nh_off += 4; |
| 347 | if (gre_flags & GRE_KEY) |
| 348 | nh_off += 4; |
| 349 | if (gre_flags & GRE_SEQ) |
| 350 | nh_off += 4; |
| 351 | |
| 352 | if (gre_proto == ETH_P_8021Q) { |
| 353 | gre_proto = load_half(skb, nh_off + |
| 354 | offsetof(struct vlan_hdr, |
| 355 | h_vlan_encapsulated_proto)); |
| 356 | nh_off += sizeof(struct vlan_hdr); |
| 357 | } |
| 358 | if (gre_proto == ETH_P_IP) |
| 359 | nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow); |
| 360 | else if (gre_proto == ETH_P_IPV6) |
| 361 | nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow); |
| 362 | else |
| 363 | return false; |
| 364 | break; |
| 365 | } |
| 366 | case IPPROTO_IPIP: |
| 367 | nh_off = flow_parse_ipv4(skb, nh_off, &ip_proto, flow); |
| 368 | break; |
| 369 | case IPPROTO_IPV6: |
| 370 | nh_off = flow_parse_ipv6(skb, nh_off, &ip_proto, flow); |
| 371 | default: |
| 372 | break; |
| 373 | } |
| 374 | |
| 375 | nh_off += flow_ports_offset(ip_proto); |
| 376 | |
| 377 | flow->ports = load_word(skb, nh_off); |
Daniel Borkmann | 279d6a8 | 2015-04-20 13:48:54 +0200 | [diff] [blame] | 378 | flow->th_off = nh_off; |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 379 | flow->ip_proto = ip_proto; |
| 380 | |
| 381 | return true; |
| 382 | } |
| 383 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 384 | static __inline__ void cls_update_proto_map(const struct __sk_buff *skb, |
| 385 | const struct flow_keys *flow) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 386 | { |
| 387 | uint8_t proto = flow->ip_proto; |
| 388 | struct count_tuple *ct, _ct; |
| 389 | |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 390 | ct = map_lookup_elem(&map_proto, &proto); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 391 | if (likely(ct)) { |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 392 | lock_xadd(&ct->packets, 1); |
| 393 | lock_xadd(&ct->bytes, skb->len); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 394 | return; |
| 395 | } |
| 396 | |
| 397 | /* No hit yet, we need to create a new entry. */ |
| 398 | _ct.packets = 1; |
| 399 | _ct.bytes = skb->len; |
| 400 | |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 401 | map_update_elem(&map_proto, &proto, &_ct, BPF_ANY); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 402 | } |
| 403 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 404 | static __inline__ void cls_update_queue_map(const struct __sk_buff *skb) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 405 | { |
| 406 | uint32_t queue = skb->queue_mapping; |
| 407 | struct count_queue *cq, _cq; |
| 408 | bool mismatch; |
| 409 | |
| 410 | mismatch = skb->queue_mapping != get_smp_processor_id(); |
| 411 | |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 412 | cq = map_lookup_elem(&map_queue, &queue); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 413 | if (likely(cq)) { |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 414 | lock_xadd(&cq->total, 1); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 415 | if (mismatch) |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 416 | lock_xadd(&cq->mismatch, 1); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 417 | return; |
| 418 | } |
| 419 | |
| 420 | /* No hit yet, we need to create a new entry. */ |
| 421 | _cq.total = 1; |
| 422 | _cq.mismatch = mismatch ? 1 : 0; |
| 423 | |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 424 | map_update_elem(&map_queue, &queue, &_cq, BPF_ANY); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 425 | } |
| 426 | |
| 427 | /* eBPF program definitions, placed in various sections, which can |
| 428 | * have custom section names. If custom names are in use, it's |
| 429 | * required to point tc to the correct section, e.g. |
| 430 | * |
| 431 | * tc filter add [...] bpf obj cls.o sec cls-tos [...] |
| 432 | * |
| 433 | * in case the program resides in __section("cls-tos"). |
| 434 | * |
| 435 | * Default section for cls_bpf is: "classifier", for act_bpf is: |
| 436 | * "action". Naturally, if for example multiple actions are present |
| 437 | * in the same file, they need to have distinct section names. |
| 438 | * |
| 439 | * It is however not required to have multiple programs sharing |
| 440 | * a file. |
| 441 | */ |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 442 | __section("classifier") |
| 443 | int cls_main(struct __sk_buff *skb) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 444 | { |
| 445 | struct flow_keys flow; |
| 446 | |
| 447 | if (!flow_dissector(skb, &flow)) |
| 448 | return 0; /* No match in cls_bpf. */ |
| 449 | |
| 450 | cls_update_proto_map(skb, &flow); |
| 451 | cls_update_queue_map(skb); |
| 452 | |
| 453 | return flow.ip_proto; |
| 454 | } |
| 455 | |
Daniel Borkmann | 92a3699 | 2016-02-07 02:11:50 +0100 | [diff] [blame^] | 456 | static __inline__ void act_update_drop_map(void) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 457 | { |
| 458 | uint32_t *count, cpu = get_smp_processor_id(); |
| 459 | |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 460 | count = map_lookup_elem(&map_drops, &cpu); |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 461 | if (count) |
| 462 | /* Only this cpu is accessing this element. */ |
| 463 | (*count)++; |
| 464 | } |
| 465 | |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 466 | __section("action-mark") |
| 467 | int act_mark_main(struct __sk_buff *skb) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 468 | { |
| 469 | /* You could also mangle skb data here with the helper function |
| 470 | * BPF_FUNC_skb_store_bytes, etc. Or, alternatively you could |
| 471 | * do that already in the classifier itself as a merged combination |
| 472 | * of classifier'n'action model. |
| 473 | */ |
| 474 | |
| 475 | if (skb->mark == 0xcafe) { |
| 476 | act_update_drop_map(); |
| 477 | return TC_ACT_SHOT; |
| 478 | } |
| 479 | |
| 480 | /* Default configured tc opcode. */ |
| 481 | return TC_ACT_UNSPEC; |
| 482 | } |
| 483 | |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 484 | __section("action-rand") |
| 485 | int act_rand_main(struct __sk_buff *skb) |
Daniel Borkmann | 6256f8c | 2015-04-01 17:57:44 +0200 | [diff] [blame] | 486 | { |
| 487 | /* Sorry, we're near event horizon ... */ |
| 488 | if ((get_prandom_u32() & 3) == 0) { |
| 489 | act_update_drop_map(); |
| 490 | return TC_ACT_SHOT; |
| 491 | } |
| 492 | |
| 493 | return TC_ACT_UNSPEC; |
| 494 | } |
| 495 | |
| 496 | /* Last but not least, the file contains a license. Some future helper |
| 497 | * functions may only be available with a GPL license. |
| 498 | */ |
Daniel Borkmann | 41d6e33 | 2015-12-02 00:25:36 +0100 | [diff] [blame] | 499 | BPF_LICENSE("GPL"); |