blob: 7785d9b343f280cb144a5ba2f192c38cc9636531 [file] [log] [blame]
Alexey Ivanovcc01a9c2019-01-16 09:50:46 -08001#!/usr/bin/python
Brendan Gregg553f2aa2016-02-14 18:15:24 -08002# @lint-avoid-python-3-compatibility-imports
3#
Brendan Gregg73b54012017-12-18 20:37:11 -08004# tcpretrans Trace or count TCP retransmits and TLPs.
Brendan Gregg553f2aa2016-02-14 18:15:24 -08005# For Linux, uses BCC, eBPF. Embedded C.
6#
Brendan Gregg73b54012017-12-18 20:37:11 -08007# USAGE: tcpretrans [-c] [-h] [-l]
Brendan Gregg553f2aa2016-02-14 18:15:24 -08008#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
Brendan Gregg553f2aa2016-02-14 18:15:24 -080012# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 14-Feb-2016 Brendan Gregg Created this.
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010016# 03-Nov-2017 Matthias Tafelmeier Extended this.
Brendan Gregg553f2aa2016-02-14 18:15:24 -080017
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from time import strftime
Mark Drayton11de2982016-06-26 21:14:44 +010022from socket import inet_ntop, AF_INET, AF_INET6
23from struct import pack
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010024from time import sleep
Brendan Gregg553f2aa2016-02-14 18:15:24 -080025
26# arguments
27examples = """examples:
28 ./tcpretrans # trace TCP retransmits
29 ./tcpretrans -l # include TLP attempts
30"""
31parser = argparse.ArgumentParser(
32 description="Trace TCP retransmits",
33 formatter_class=argparse.RawDescriptionHelpFormatter,
34 epilog=examples)
35parser.add_argument("-l", "--lossprobe", action="store_true",
36 help="include tail loss probe attempts")
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010037parser.add_argument("-c", "--count", action="store_true",
38 help="count occurred retransmits per flow")
Nathan Scottcf0792f2018-02-02 16:56:50 +110039parser.add_argument("--ebpf", action="store_true",
40 help=argparse.SUPPRESS)
Brendan Gregg553f2aa2016-02-14 18:15:24 -080041args = parser.parse_args()
Brendan Gregg73b54012017-12-18 20:37:11 -080042debug = 0
Brendan Gregg553f2aa2016-02-14 18:15:24 -080043
44# define BPF program
45bpf_text = """
46#include <uapi/linux/ptrace.h>
47#include <net/sock.h>
48#include <bcc/proto.h>
49
50#define RETRANSMIT 1
51#define TLP 2
52
53// separate data structs for ipv4 and ipv6
54struct ipv4_data_t {
Joe Yin36ce1122018-08-17 06:04:00 +080055 u32 pid;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080056 u64 ip;
Joe Yin36ce1122018-08-17 06:04:00 +080057 u32 saddr;
58 u32 daddr;
59 u16 lport;
60 u16 dport;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080061 u64 state;
62 u64 type;
63};
64BPF_PERF_OUTPUT(ipv4_events);
65
66struct ipv6_data_t {
Joe Yin36ce1122018-08-17 06:04:00 +080067 u32 pid;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080068 u64 ip;
Mark Drayton11de2982016-06-26 21:14:44 +010069 unsigned __int128 saddr;
70 unsigned __int128 daddr;
Joe Yin36ce1122018-08-17 06:04:00 +080071 u16 lport;
72 u16 dport;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080073 u64 state;
74 u64 type;
75};
76BPF_PERF_OUTPUT(ipv6_events);
77
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010078// separate flow keys per address family
79struct ipv4_flow_key_t {
80 u32 saddr;
81 u32 daddr;
82 u16 lport;
83 u16 dport;
84};
85BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
86
87struct ipv6_flow_key_t {
88 unsigned __int128 saddr;
89 unsigned __int128 daddr;
90 u16 lport;
91 u16 dport;
92};
93BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
Xiaozhou Liu9cede202019-11-09 08:34:45 +080094"""
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010095
Xiaozhou Liu9cede202019-11-09 08:34:45 +080096bpf_text_kprobe = """
Paul Chaignon25212ee2017-08-06 11:15:11 +020097static int trace_event(struct pt_regs *ctx, struct sock *skp, int type)
Brendan Gregg553f2aa2016-02-14 18:15:24 -080098{
Paul Chaignon25212ee2017-08-06 11:15:11 +020099 if (skp == NULL)
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800100 return 0;
Marko Myllynena77a2542018-09-10 20:44:55 +0300101 u32 pid = bpf_get_current_pid_tgid() >> 32;
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800102
103 // pull in details
Paul Chaignon25212ee2017-08-06 11:15:11 +0200104 u16 family = skp->__sk_common.skc_family;
105 u16 lport = skp->__sk_common.skc_num;
106 u16 dport = skp->__sk_common.skc_dport;
107 char state = skp->__sk_common.skc_state;
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800108
109 if (family == AF_INET) {
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100110 IPV4_INIT
111 IPV4_CORE
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800112 } else if (family == AF_INET6) {
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100113 IPV6_INIT
114 IPV6_CORE
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800115 }
116 // else drop
117
118 return 0;
119}
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800120"""
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800121
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800122bpf_text_kprobe_retransmit = """
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800123int trace_retransmit(struct pt_regs *ctx, struct sock *sk)
124{
125 trace_event(ctx, sk, RETRANSMIT);
126 return 0;
127}
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800128"""
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800129
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800130bpf_text_kprobe_tlp = """
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800131int trace_tlp(struct pt_regs *ctx, struct sock *sk)
132{
133 trace_event(ctx, sk, TLP);
134 return 0;
135}
136"""
137
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800138bpf_text_tracepoint = """
139TRACEPOINT_PROBE(tcp, tcp_retransmit_skb)
140{
141 u32 pid = bpf_get_current_pid_tgid() >> 32;
142 const struct sock *skp = (const struct sock *)args->skaddr;
143 u16 lport = args->sport;
144 u16 dport = args->dport;
145 char state = skp->__sk_common.skc_state;
146 u16 family = skp->__sk_common.skc_family;
147
148 if (family == AF_INET) {
149 IPV4_CODE
150 } else if (family == AF_INET6) {
151 IPV6_CODE
152 }
153 return 0;
154}
155"""
156
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200157struct_init = { 'ipv4':
158 { 'count' :
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100159 """
160 struct ipv4_flow_key_t flow_key = {};
161 flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
162 flow_key.daddr = skp->__sk_common.skc_daddr;
163 // lport is host order
164 flow_key.lport = lport;
165 flow_key.dport = ntohs(dport);""",
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200166 'trace' :
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100167 """
Joe Yin36ce1122018-08-17 06:04:00 +0800168 struct ipv4_data_t data4 = {};
169 data4.pid = pid;
170 data4.ip = 4;
171 data4.type = type;
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100172 data4.saddr = skp->__sk_common.skc_rcv_saddr;
173 data4.daddr = skp->__sk_common.skc_daddr;
174 // lport is host order
175 data4.lport = lport;
176 data4.dport = ntohs(dport);
177 data4.state = state; """
178 },
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200179 'ipv6':
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100180 { 'count' :
181 """
182 struct ipv6_flow_key_t flow_key = {};
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500183 bpf_probe_read_kernel(&flow_key.saddr, sizeof(flow_key.saddr),
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100184 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500185 bpf_probe_read_kernel(&flow_key.daddr, sizeof(flow_key.daddr),
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100186 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
187 // lport is host order
188 flow_key.lport = lport;
189 flow_key.dport = ntohs(dport);""",
190 'trace' : """
Marko Myllynenbfbf17e2018-09-11 21:49:58 +0300191 struct ipv6_data_t data6 = {};
192 data6.pid = pid;
193 data6.ip = 6;
194 data6.type = type;
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500195 bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr),
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100196 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -0500197 bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100198 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
199 // lport is host order
200 data6.lport = lport;
201 data6.dport = ntohs(dport);
202 data6.state = state;"""
203 }
204 }
205
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800206struct_init_tracepoint = { 'ipv4':
207 { 'count' : """
208 struct ipv4_flow_key_t flow_key = {};
209 __builtin_memcpy(&flow_key.saddr, args->saddr, sizeof(flow_key.saddr));
210 __builtin_memcpy(&flow_key.daddr, args->daddr, sizeof(flow_key.daddr));
211 flow_key.lport = lport;
212 flow_key.dport = dport;
213 ipv4_count.increment(flow_key);
214 """,
215 'trace' : """
216 struct ipv4_data_t data4 = {};
217 data4.pid = pid;
218 data4.lport = lport;
219 data4.dport = dport;
220 data4.type = RETRANSMIT;
221 data4.ip = 4;
222 data4.state = state;
223 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
224 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
225 ipv4_events.perf_submit(args, &data4, sizeof(data4));
226 """
227 },
228 'ipv6':
229 { 'count' : """
230 struct ipv6_flow_key_t flow_key = {};
231 __builtin_memcpy(&flow_key.saddr, args->saddr_v6, sizeof(flow_key.saddr));
232 __builtin_memcpy(&flow_key.daddr, args->daddr_v6, sizeof(flow_key.daddr));
233 flow_key.lport = lport;
234 flow_key.dport = dport;
235 ipv6_count.increment(flow_key);
236 """,
237 'trace' : """
238 struct ipv6_data_t data6 = {};
239 data6.pid = pid;
240 data6.lport = lport;
241 data6.dport = dport;
242 data6.type = RETRANSMIT;
243 data6.ip = 6;
244 data6.state = state;
245 __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
246 __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
247 ipv6_events.perf_submit(args, &data6, sizeof(data6));
248 """
249 }
250 }
251
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100252count_core_base = """
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200253 COUNT_STRUCT.increment(flow_key);
254"""
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100255
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800256if BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
257 if args.count:
258 bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV4_CODE", struct_init_tracepoint['ipv4']['count'])
259 bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV6_CODE", struct_init_tracepoint['ipv6']['count'])
260 else:
261 bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV4_CODE", struct_init_tracepoint['ipv4']['trace'])
262 bpf_text_tracepoint = bpf_text_tracepoint.replace("IPV6_CODE", struct_init_tracepoint['ipv6']['trace'])
263 bpf_text += bpf_text_tracepoint
264
265if args.lossprobe or not BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
266 bpf_text += bpf_text_kprobe
267 if args.count:
268 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['count'])
269 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['count'])
270 bpf_text = bpf_text.replace("IPV4_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv4_count'))
271 bpf_text = bpf_text.replace("IPV6_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv6_count'))
272 else:
273 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['trace'])
274 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['trace'])
275 bpf_text = bpf_text.replace("IPV4_CORE", "ipv4_events.perf_submit(ctx, &data4, sizeof(data4));")
276 bpf_text = bpf_text.replace("IPV6_CORE", "ipv6_events.perf_submit(ctx, &data6, sizeof(data6));")
277 if args.lossprobe:
278 bpf_text += bpf_text_kprobe_tlp
279 if not BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
280 bpf_text += bpf_text_kprobe_retransmit
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100281
Nathan Scottcf0792f2018-02-02 16:56:50 +1100282if debug or args.ebpf:
Brendan Gregg73b54012017-12-18 20:37:11 -0800283 print(bpf_text)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100284 if args.ebpf:
285 exit()
Brendan Gregg73b54012017-12-18 20:37:11 -0800286
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800287# from bpf_text:
288type = {}
289type[1] = 'R'
290type[2] = 'L'
291
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800292# from include/net/tcp_states.h:
293tcpstate = {}
294tcpstate[1] = 'ESTABLISHED'
295tcpstate[2] = 'SYN_SENT'
296tcpstate[3] = 'SYN_RECV'
297tcpstate[4] = 'FIN_WAIT1'
298tcpstate[5] = 'FIN_WAIT2'
299tcpstate[6] = 'TIME_WAIT'
300tcpstate[7] = 'CLOSE'
301tcpstate[8] = 'CLOSE_WAIT'
302tcpstate[9] = 'LAST_ACK'
303tcpstate[10] = 'LISTEN'
304tcpstate[11] = 'CLOSING'
305tcpstate[12] = 'NEW_SYN_RECV'
306
307# process event
308def print_ipv4_event(cpu, data, size):
Xiaozhou Liu51d62d32019-02-15 13:03:05 +0800309 event = b["ipv4_events"].event(data)
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800310 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
311 strftime("%H:%M:%S"), event.pid, event.ip,
Mark Drayton11de2982016-06-26 21:14:44 +0100312 "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800313 type[event.type],
Mark Drayton11de2982016-06-26 21:14:44 +0100314 "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800315 tcpstate[event.state]))
Mark Drayton11de2982016-06-26 21:14:44 +0100316
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800317def print_ipv6_event(cpu, data, size):
Xiaozhou Liu51d62d32019-02-15 13:03:05 +0800318 event = b["ipv6_events"].event(data)
Ivan Babrouc862e312016-06-23 18:11:25 +0100319 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800320 strftime("%H:%M:%S"), event.pid, event.ip,
Mark Drayton11de2982016-06-26 21:14:44 +0100321 "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800322 type[event.type],
Mark Drayton11de2982016-06-26 21:14:44 +0100323 "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800324 tcpstate[event.state]))
325
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100326def depict_cnt(counts_tab, l3prot='ipv4'):
327 for k, v in sorted(counts_tab.items(), key=lambda counts: counts[1].value):
328 depict_key = ""
329 ep_fmt = "[%s]#%d"
330 if l3prot == 'ipv4':
331 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET, pack('I', k.saddr)), k.lport),
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200332 ep_fmt % (inet_ntop(AF_INET, pack('I', k.daddr)), k.dport))
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100333 else:
334 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET6, k.saddr), k.lport),
335 ep_fmt % (inet_ntop(AF_INET6, k.daddr), k.dport))
336
337 print ("%s %10d" % (depict_key, v.value))
338
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800339# initialize BPF
340b = BPF(text=bpf_text)
Xiaozhou Liu9cede202019-11-09 08:34:45 +0800341if not BPF.tracepoint_exists("tcp", "tcp_retransmit_skb"):
342 b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit")
Mark Drayton11de2982016-06-26 21:14:44 +0100343if args.lossprobe:
344 b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp")
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800345
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100346print("Tracing retransmits ... Hit Ctrl-C to end")
347if args.count:
348 try:
349 while 1:
350 sleep(99999999)
351 except BaseException:
352 pass
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800353
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100354 # header
355 print("\n%-25s %-25s %-10s" % (
356 "LADDR:LPORT", "RADDR:RPORT", "RETRANSMITS"))
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200357 depict_cnt(b.get_table("ipv4_count"))
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100358 depict_cnt(b.get_table("ipv6_count"), l3prot='ipv6')
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800359# read events
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100360else:
361 # header
362 print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP",
363 "LADDR:LPORT", "T", "RADDR:RPORT", "STATE"))
364 b["ipv4_events"].open_perf_buffer(print_ipv4_event)
365 b["ipv6_events"].open_perf_buffer(print_ipv6_event)
366 while 1:
Jerome Marchand51671272018-12-19 01:57:24 +0100367 try:
368 b.perf_buffer_poll()
369 except KeyboardInterrupt:
370 exit()