blob: 50d1f415bd34169bbe92ca178e85e31d6f75788a [file] [log] [blame]
Brendan Gregg553f2aa2016-02-14 18:15:24 -08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
Brendan Gregg73b54012017-12-18 20:37:11 -08004# tcpretrans Trace or count TCP retransmits and TLPs.
Brendan Gregg553f2aa2016-02-14 18:15:24 -08005# For Linux, uses BCC, eBPF. Embedded C.
6#
Brendan Gregg73b54012017-12-18 20:37:11 -08007# USAGE: tcpretrans [-c] [-h] [-l]
Brendan Gregg553f2aa2016-02-14 18:15:24 -08008#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
Brendan Gregg553f2aa2016-02-14 18:15:24 -080012# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 14-Feb-2016 Brendan Gregg Created this.
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010016# 03-Nov-2017 Matthias Tafelmeier Extended this.
Brendan Gregg553f2aa2016-02-14 18:15:24 -080017
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from time import strftime
Mark Drayton11de2982016-06-26 21:14:44 +010022from socket import inet_ntop, AF_INET, AF_INET6
23from struct import pack
Brendan Gregg553f2aa2016-02-14 18:15:24 -080024import ctypes as ct
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010025from time import sleep
Brendan Gregg553f2aa2016-02-14 18:15:24 -080026
27# arguments
28examples = """examples:
29 ./tcpretrans # trace TCP retransmits
30 ./tcpretrans -l # include TLP attempts
31"""
32parser = argparse.ArgumentParser(
33 description="Trace TCP retransmits",
34 formatter_class=argparse.RawDescriptionHelpFormatter,
35 epilog=examples)
36parser.add_argument("-l", "--lossprobe", action="store_true",
37 help="include tail loss probe attempts")
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010038parser.add_argument("-c", "--count", action="store_true",
39 help="count occurred retransmits per flow")
Brendan Gregg553f2aa2016-02-14 18:15:24 -080040args = parser.parse_args()
Brendan Gregg73b54012017-12-18 20:37:11 -080041debug = 0
Brendan Gregg553f2aa2016-02-14 18:15:24 -080042
43# define BPF program
44bpf_text = """
45#include <uapi/linux/ptrace.h>
46#include <net/sock.h>
47#include <bcc/proto.h>
48
49#define RETRANSMIT 1
50#define TLP 2
51
52// separate data structs for ipv4 and ipv6
53struct ipv4_data_t {
54 // XXX: switch some to u32's when supported
55 u64 pid;
56 u64 ip;
57 u64 saddr;
58 u64 daddr;
59 u64 lport;
60 u64 dport;
61 u64 state;
62 u64 type;
63};
64BPF_PERF_OUTPUT(ipv4_events);
65
66struct ipv6_data_t {
Brendan Gregg553f2aa2016-02-14 18:15:24 -080067 u64 pid;
68 u64 ip;
Mark Drayton11de2982016-06-26 21:14:44 +010069 unsigned __int128 saddr;
70 unsigned __int128 daddr;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080071 u64 lport;
72 u64 dport;
73 u64 state;
74 u64 type;
75};
76BPF_PERF_OUTPUT(ipv6_events);
77
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010078// separate flow keys per address family
79struct ipv4_flow_key_t {
80 u32 saddr;
81 u32 daddr;
82 u16 lport;
83 u16 dport;
84};
85BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
86
87struct ipv6_flow_key_t {
88 unsigned __int128 saddr;
89 unsigned __int128 daddr;
90 u16 lport;
91 u16 dport;
92};
93BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
94
Paul Chaignon25212ee2017-08-06 11:15:11 +020095static int trace_event(struct pt_regs *ctx, struct sock *skp, int type)
Brendan Gregg553f2aa2016-02-14 18:15:24 -080096{
Paul Chaignon25212ee2017-08-06 11:15:11 +020097 if (skp == NULL)
Brendan Gregg553f2aa2016-02-14 18:15:24 -080098 return 0;
99 u32 pid = bpf_get_current_pid_tgid();
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800100
101 // pull in details
Paul Chaignon25212ee2017-08-06 11:15:11 +0200102 u16 family = skp->__sk_common.skc_family;
103 u16 lport = skp->__sk_common.skc_num;
104 u16 dport = skp->__sk_common.skc_dport;
105 char state = skp->__sk_common.skc_state;
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800106
107 if (family == AF_INET) {
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100108 IPV4_INIT
109 IPV4_CORE
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800110 } else if (family == AF_INET6) {
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100111 IPV6_INIT
112 IPV6_CORE
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800113 }
114 // else drop
115
116 return 0;
117}
118
119int trace_retransmit(struct pt_regs *ctx, struct sock *sk)
120{
121 trace_event(ctx, sk, RETRANSMIT);
122 return 0;
123}
124
125int trace_tlp(struct pt_regs *ctx, struct sock *sk)
126{
127 trace_event(ctx, sk, TLP);
128 return 0;
129}
130"""
131
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100132struct_init = { 'ipv4':
133 { 'count' :
134 """
135 struct ipv4_flow_key_t flow_key = {};
136 flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
137 flow_key.daddr = skp->__sk_common.skc_daddr;
138 // lport is host order
139 flow_key.lport = lport;
140 flow_key.dport = ntohs(dport);""",
141 'trace' :
142 """
143 struct ipv4_data_t data4 = {.pid = pid, .ip = 4, .type = type};
144 data4.saddr = skp->__sk_common.skc_rcv_saddr;
145 data4.daddr = skp->__sk_common.skc_daddr;
146 // lport is host order
147 data4.lport = lport;
148 data4.dport = ntohs(dport);
149 data4.state = state; """
150 },
151 'ipv6':
152 { 'count' :
153 """
154 struct ipv6_flow_key_t flow_key = {};
155 bpf_probe_read(&flow_key.saddr, sizeof(flow_key.saddr),
156 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
157 bpf_probe_read(&flow_key.daddr, sizeof(flow_key.daddr),
158 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
159 // lport is host order
160 flow_key.lport = lport;
161 flow_key.dport = ntohs(dport);""",
162 'trace' : """
163 struct ipv6_data_t data6 = {.pid = pid, .ip = 6, .type = type};
164 bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
165 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
166 bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
167 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
168 // lport is host order
169 data6.lport = lport;
170 data6.dport = ntohs(dport);
171 data6.state = state;"""
172 }
173 }
174
175count_core_base = """
176 u64 zero = 0, *val;
177 val = COUNT_STRUCT.lookup_or_init(&flow_key, &zero);
178 (*val)++;
179"""
180
181if args.count:
182 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['count'])
183 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['count'])
184 bpf_text = bpf_text.replace("IPV4_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv4_count'))
185 bpf_text = bpf_text.replace("IPV6_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv6_count'))
186else:
187 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['trace'])
188 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['trace'])
189 bpf_text = bpf_text.replace("IPV4_CORE", "ipv4_events.perf_submit(ctx, &data4, sizeof(data4));")
190 bpf_text = bpf_text.replace("IPV6_CORE", "ipv6_events.perf_submit(ctx, &data6, sizeof(data6));")
191
Brendan Gregg73b54012017-12-18 20:37:11 -0800192if debug:
193 print(bpf_text)
194
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800195# event data
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800196class Data_ipv4(ct.Structure):
197 _fields_ = [
198 ("pid", ct.c_ulonglong),
199 ("ip", ct.c_ulonglong),
200 ("saddr", ct.c_ulonglong),
201 ("daddr", ct.c_ulonglong),
202 ("lport", ct.c_ulonglong),
203 ("dport", ct.c_ulonglong),
204 ("state", ct.c_ulonglong),
205 ("type", ct.c_ulonglong)
206 ]
Mark Drayton11de2982016-06-26 21:14:44 +0100207
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800208class Data_ipv6(ct.Structure):
209 _fields_ = [
210 ("pid", ct.c_ulonglong),
211 ("ip", ct.c_ulonglong),
Mark Drayton11de2982016-06-26 21:14:44 +0100212 ("saddr", (ct.c_ulonglong * 2)),
213 ("daddr", (ct.c_ulonglong * 2)),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800214 ("lport", ct.c_ulonglong),
215 ("dport", ct.c_ulonglong),
216 ("state", ct.c_ulonglong),
217 ("type", ct.c_ulonglong)
218 ]
219
220# from bpf_text:
221type = {}
222type[1] = 'R'
223type[2] = 'L'
224
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800225# from include/net/tcp_states.h:
226tcpstate = {}
227tcpstate[1] = 'ESTABLISHED'
228tcpstate[2] = 'SYN_SENT'
229tcpstate[3] = 'SYN_RECV'
230tcpstate[4] = 'FIN_WAIT1'
231tcpstate[5] = 'FIN_WAIT2'
232tcpstate[6] = 'TIME_WAIT'
233tcpstate[7] = 'CLOSE'
234tcpstate[8] = 'CLOSE_WAIT'
235tcpstate[9] = 'LAST_ACK'
236tcpstate[10] = 'LISTEN'
237tcpstate[11] = 'CLOSING'
238tcpstate[12] = 'NEW_SYN_RECV'
239
240# process event
241def print_ipv4_event(cpu, data, size):
242 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
243 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
244 strftime("%H:%M:%S"), event.pid, event.ip,
Mark Drayton11de2982016-06-26 21:14:44 +0100245 "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800246 type[event.type],
Mark Drayton11de2982016-06-26 21:14:44 +0100247 "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800248 tcpstate[event.state]))
Mark Drayton11de2982016-06-26 21:14:44 +0100249
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800250def print_ipv6_event(cpu, data, size):
251 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
Ivan Babrouc862e312016-06-23 18:11:25 +0100252 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800253 strftime("%H:%M:%S"), event.pid, event.ip,
Mark Drayton11de2982016-06-26 21:14:44 +0100254 "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800255 type[event.type],
Mark Drayton11de2982016-06-26 21:14:44 +0100256 "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800257 tcpstate[event.state]))
258
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100259def depict_cnt(counts_tab, l3prot='ipv4'):
260 for k, v in sorted(counts_tab.items(), key=lambda counts: counts[1].value):
261 depict_key = ""
262 ep_fmt = "[%s]#%d"
263 if l3prot == 'ipv4':
264 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET, pack('I', k.saddr)), k.lport),
265 ep_fmt % (inet_ntop(AF_INET, pack('I', k.daddr)), k.dport))
266 else:
267 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET6, k.saddr), k.lport),
268 ep_fmt % (inet_ntop(AF_INET6, k.daddr), k.dport))
269
270 print ("%s %10d" % (depict_key, v.value))
271
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800272# initialize BPF
273b = BPF(text=bpf_text)
274b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit")
Mark Drayton11de2982016-06-26 21:14:44 +0100275if args.lossprobe:
276 b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp")
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800277
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100278print("Tracing retransmits ... Hit Ctrl-C to end")
279if args.count:
280 try:
281 while 1:
282 sleep(99999999)
283 except BaseException:
284 pass
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800285
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100286 # header
287 print("\n%-25s %-25s %-10s" % (
288 "LADDR:LPORT", "RADDR:RPORT", "RETRANSMITS"))
289 depict_cnt(b.get_table("ipv4_count"))
290 depict_cnt(b.get_table("ipv6_count"), l3prot='ipv6')
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800291# read events
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100292else:
293 # header
294 print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP",
295 "LADDR:LPORT", "T", "RADDR:RPORT", "STATE"))
296 b["ipv4_events"].open_perf_buffer(print_ipv4_event)
297 b["ipv6_events"].open_perf_buffer(print_ipv6_event)
298 while 1:
299 b.kprobe_poll()