Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # @lint-avoid-python-3-compatibility-imports |
| 3 | # |
| 4 | # tcpsubnet Summarize TCP bytes sent to different subnets. |
| 5 | # For Linux, uses BCC, eBPF. Embedded C. |
| 6 | # |
| 7 | # USAGE: tcpsubnet [-h] [-v] [--ebpf] [-J] [-f FORMAT] [-i INTERVAL] [subnets] |
| 8 | # |
| 9 | # This uses dynamic tracing of kernel functions, and will need to be updated |
| 10 | # to match kernel changes. |
| 11 | # |
| 12 | # This is an adaptation of tcptop from written by Brendan Gregg. |
| 13 | # |
| 14 | # WARNING: This traces all send at the TCP level, and while it |
| 15 | # summarizes data in-kernel to reduce overhead, there may still be some |
| 16 | # overhead at high TCP send/receive rates (eg, ~13% of one CPU at 100k TCP |
| 17 | # events/sec. This is not the same as packet rate: funccount can be used to |
| 18 | # count the kprobes below to find out the TCP rate). Test in a lab environment |
| 19 | # first. If your send rate is low (eg, <1k/sec) then the overhead is |
| 20 | # expected to be negligible. |
| 21 | # |
| 22 | # Copyright 2017 Rodrigo Manyari |
| 23 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 24 | # |
| 25 | # 03-Oct-2017 Rodrigo Manyari Created this based on tcptop. |
| 26 | # 13-Feb-2018 Rodrigo Manyari Fix pep8 errors, some refactoring. |
Rodrigo Manyari | efcb30f | 2018-03-05 22:55:17 -0500 | [diff] [blame] | 27 | # 05-Mar-2018 Rodrigo Manyari Add date time to output. |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 28 | |
| 29 | import argparse |
| 30 | import json |
| 31 | import logging |
| 32 | import struct |
| 33 | import socket |
| 34 | from bcc import BPF |
Rodrigo Manyari | efcb30f | 2018-03-05 22:55:17 -0500 | [diff] [blame] | 35 | from datetime import datetime as dt |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 36 | from time import sleep |
| 37 | |
| 38 | # arguments |
| 39 | examples = """examples: |
| 40 | ./tcpsubnet # Trace TCP sent to the default subnets: |
| 41 | # 127.0.0.1/32,10.0.0.0/8,172.16.0.0/12, |
Rodrigo Manyari | efcb30f | 2018-03-05 22:55:17 -0500 | [diff] [blame] | 42 | # 192.168.0.0/16,0.0.0.0/0 |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 43 | ./tcpsubnet -f K # Trace TCP sent to the default subnets |
| 44 | # aggregated in KBytes. |
| 45 | ./tcpsubnet 10.80.0.0/24 # Trace TCP sent to 10.80.0.0/24 only |
| 46 | ./tcpsubnet -J # Format the output in JSON. |
| 47 | """ |
| 48 | |
Rodrigo Manyari | efcb30f | 2018-03-05 22:55:17 -0500 | [diff] [blame] | 49 | default_subnets = "127.0.0.1/32,10.0.0.0/8," \ |
| 50 | "172.16.0.0/12,192.168.0.0/16,0.0.0.0/0" |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 51 | |
| 52 | parser = argparse.ArgumentParser( |
| 53 | description="Summarize TCP send and aggregate by subnet", |
| 54 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 55 | epilog=examples) |
| 56 | parser.add_argument("subnets", help="comma separated list of subnets", |
| 57 | type=str, nargs="?", default=default_subnets) |
| 58 | parser.add_argument("-v", "--verbose", action="store_true", |
| 59 | help="output debug statements") |
| 60 | parser.add_argument("-J", "--json", action="store_true", |
| 61 | help="format output in JSON") |
| 62 | parser.add_argument("--ebpf", action="store_true", |
| 63 | help=argparse.SUPPRESS) |
| 64 | parser.add_argument("-f", "--format", default="B", |
| 65 | help="[bkmBKM] format to report: bits, Kbits, Mbits, bytes, " + |
Rodrigo Manyari | 14e23ad | 2018-03-02 20:41:42 -0500 | [diff] [blame] | 66 | "KBytes, MBytes (default B)", choices=["b", "k", "m", "B", "K", "M"]) |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 67 | parser.add_argument("-i", "--interval", default=1, type=int, |
| 68 | help="output interval, in seconds (default 1)") |
| 69 | args = parser.parse_args() |
| 70 | |
| 71 | level = logging.INFO |
| 72 | if args.verbose: |
| 73 | level = logging.DEBUG |
| 74 | |
| 75 | logging.basicConfig(level=level) |
| 76 | |
| 77 | logging.debug("Starting with the following args:") |
| 78 | logging.debug(args) |
| 79 | |
| 80 | # args checking |
| 81 | if int(args.interval) <= 0: |
| 82 | logging.error("Invalid interval, must be > 0. Exiting.") |
| 83 | exit(1) |
| 84 | else: |
| 85 | args.interval = int(args.interval) |
| 86 | |
| 87 | # map of supported formats |
| 88 | formats = { |
| 89 | "b": lambda x: (x * 8), |
| 90 | "k": lambda x: ((x * 8) / 1024), |
| 91 | "m": lambda x: ((x * 8) / pow(1024, 2)), |
| 92 | "B": lambda x: x, |
| 93 | "K": lambda x: x / 1024, |
| 94 | "M": lambda x: x / pow(1024, 2) |
| 95 | } |
| 96 | |
| 97 | # Let's swap the string with the actual numeric value |
| 98 | # once here so we don't have to do it on every interval |
| 99 | formatFn = formats[args.format] |
| 100 | |
| 101 | # define the basic structure of the BPF program |
| 102 | bpf_text = """ |
| 103 | #include <uapi/linux/ptrace.h> |
| 104 | #include <net/sock.h> |
| 105 | #include <bcc/proto.h> |
| 106 | |
| 107 | struct index_key_t { |
| 108 | u32 index; |
| 109 | }; |
| 110 | |
| 111 | BPF_HASH(ipv4_send_bytes, struct index_key_t); |
| 112 | |
| 113 | int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, |
| 114 | struct msghdr *msg, size_t size) |
| 115 | { |
| 116 | u16 family = sk->__sk_common.skc_family; |
| 117 | u64 *val, zero = 0; |
| 118 | |
| 119 | if (family == AF_INET) { |
| 120 | u32 dst = sk->__sk_common.skc_daddr; |
| 121 | unsigned categorized = 0; |
| 122 | __SUBNETS__ |
| 123 | } |
| 124 | return 0; |
| 125 | } |
| 126 | """ |
| 127 | |
| 128 | |
| 129 | # Takes in a mask and returns the integer equivalent |
| 130 | # e.g. |
| 131 | # mask_to_int(8) returns 4278190080 |
| 132 | def mask_to_int(n): |
| 133 | return ((1 << n) - 1) << (32 - n) |
| 134 | |
| 135 | # Takes in a list of subnets and returns a list |
| 136 | # of tuple-3 containing: |
| 137 | # - The subnet info at index 0 |
| 138 | # - The addr portion as an int at index 1 |
| 139 | # - The mask portion as an int at index 2 |
| 140 | # |
| 141 | # e.g. |
| 142 | # parse_subnets([10.10.0.0/24]) returns |
| 143 | # [ |
| 144 | # ['10.10.0.0/24', 168427520, 4294967040], |
| 145 | # ] |
| 146 | def parse_subnets(subnets): |
| 147 | m = [] |
| 148 | for s in subnets: |
| 149 | parts = s.split("/") |
| 150 | if len(parts) != 2: |
| 151 | msg = "Subnet [%s] is invalid, please refer to the examples." % s |
| 152 | raise ValueError(msg) |
| 153 | netaddr_int = 0 |
| 154 | mask_int = 0 |
| 155 | try: |
| 156 | netaddr_int = struct.unpack("!I", socket.inet_aton(parts[0]))[0] |
| 157 | except: |
| 158 | msg = ("Invalid net address in subnet [%s], " + |
| 159 | "please refer to the examples.") % s |
| 160 | raise ValueError(msg) |
| 161 | try: |
| 162 | mask_int = int(parts[1]) |
| 163 | except: |
| 164 | msg = "Invalid mask in subnet [%s]. Mask must be an int" % s |
| 165 | raise ValueError(msg) |
| 166 | if mask_int < 0 or mask_int > 32: |
| 167 | msg = ("Invalid mask in subnet [%s]. Must be an " + |
| 168 | "int between 0 and 32.") % s |
| 169 | raise ValueError(msg) |
| 170 | mask_int = mask_to_int(int(parts[1])) |
| 171 | m.append([s, netaddr_int, mask_int]) |
| 172 | return m |
| 173 | |
| 174 | def generate_bpf_subnets(subnets): |
| 175 | template = """ |
| 176 | if (!categorized && (__NET_ADDR__ & __NET_MASK__) == |
| 177 | (dst & __NET_MASK__)) { |
| 178 | struct index_key_t key = {.index = __POS__}; |
| 179 | val = ipv4_send_bytes.lookup_or_init(&key, &zero); |
| 180 | categorized = 1; |
| 181 | (*val) += size; |
| 182 | } |
| 183 | """ |
| 184 | bpf = '' |
| 185 | for i, s in enumerate(subnets): |
| 186 | branch = template |
| 187 | branch = branch.replace("__NET_ADDR__", str(socket.htonl(s[1]))) |
| 188 | branch = branch.replace("__NET_MASK__", str(socket.htonl(s[2]))) |
| 189 | branch = branch.replace("__POS__", str(i)) |
| 190 | bpf += branch |
| 191 | return bpf |
| 192 | |
| 193 | subnets = [] |
| 194 | if args.subnets: |
| 195 | subnets = args.subnets.split(",") |
| 196 | |
| 197 | subnets = parse_subnets(subnets) |
| 198 | |
| 199 | logging.debug("Packets are going to be categorized in the following subnets:") |
| 200 | logging.debug(subnets) |
| 201 | |
| 202 | bpf_subnets = generate_bpf_subnets(subnets) |
| 203 | |
| 204 | # initialize BPF |
| 205 | bpf_text = bpf_text.replace("__SUBNETS__", bpf_subnets) |
| 206 | |
| 207 | logging.debug("Done preprocessing the BPF program, " + |
| 208 | "this is what will actually get executed:") |
| 209 | logging.debug(bpf_text) |
| 210 | |
| 211 | if args.ebpf: |
| 212 | print(bpf_text) |
| 213 | exit() |
| 214 | |
| 215 | b = BPF(text=bpf_text) |
| 216 | |
| 217 | ipv4_send_bytes = b["ipv4_send_bytes"] |
| 218 | |
Rodrigo Manyari | 14e23ad | 2018-03-02 20:41:42 -0500 | [diff] [blame] | 219 | if not args.json: |
| 220 | print("Tracing... Output every %d secs. Hit Ctrl-C to end" % args.interval) |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 221 | |
| 222 | # output |
| 223 | exiting = 0 |
| 224 | while (1): |
| 225 | |
| 226 | try: |
| 227 | sleep(args.interval) |
| 228 | except KeyboardInterrupt: |
| 229 | exiting = 1 |
| 230 | |
| 231 | # IPv4: build dict of all seen keys |
| 232 | keys = ipv4_send_bytes |
| 233 | for k, v in ipv4_send_bytes.items(): |
| 234 | if k not in keys: |
| 235 | keys[k] = v |
| 236 | |
| 237 | # to hold json data |
| 238 | data = {} |
| 239 | |
| 240 | # output |
Rodrigo Manyari | efcb30f | 2018-03-05 22:55:17 -0500 | [diff] [blame] | 241 | now = dt.now() |
| 242 | data['date'] = now.strftime('%x') |
| 243 | data['time'] = now.strftime('%X') |
| 244 | data['entries'] = {} |
| 245 | if not args.json: |
| 246 | print(now.strftime('[%x %X]')) |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 247 | for k, v in reversed(sorted(keys.items(), key=lambda keys: keys[1].value)): |
| 248 | send_bytes = 0 |
| 249 | if k in ipv4_send_bytes: |
| 250 | send_bytes = int(ipv4_send_bytes[k].value) |
| 251 | subnet = subnets[k.index][0] |
| 252 | send = formatFn(send_bytes) |
| 253 | if args.json: |
Rodrigo Manyari | efcb30f | 2018-03-05 22:55:17 -0500 | [diff] [blame] | 254 | data['entries'][subnet] = send |
Rodrigo Manyari | e3b59b3 | 2018-03-02 19:19:14 -0500 | [diff] [blame] | 255 | else: |
| 256 | print("%-21s %6d" % (subnet, send)) |
| 257 | |
| 258 | if args.json: |
| 259 | print(json.dumps(data)) |
| 260 | |
| 261 | ipv4_send_bytes.clear() |
| 262 | |
| 263 | if exiting: |
| 264 | exit(0) |