blob: 187ffcf0d4214cd7012b636438d615071d573db5 [file] [log] [blame]
Brendan Gregg553f2aa2016-02-14 18:15:24 -08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
Brendan Gregg73b54012017-12-18 20:37:11 -08004# tcpretrans Trace or count TCP retransmits and TLPs.
Brendan Gregg553f2aa2016-02-14 18:15:24 -08005# For Linux, uses BCC, eBPF. Embedded C.
6#
Brendan Gregg73b54012017-12-18 20:37:11 -08007# USAGE: tcpretrans [-c] [-h] [-l]
Brendan Gregg553f2aa2016-02-14 18:15:24 -08008#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
Brendan Gregg553f2aa2016-02-14 18:15:24 -080012# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 14-Feb-2016 Brendan Gregg Created this.
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010016# 03-Nov-2017 Matthias Tafelmeier Extended this.
Brendan Gregg553f2aa2016-02-14 18:15:24 -080017
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from time import strftime
Mark Drayton11de2982016-06-26 21:14:44 +010022from socket import inet_ntop, AF_INET, AF_INET6
23from struct import pack
Brendan Gregg553f2aa2016-02-14 18:15:24 -080024import ctypes as ct
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010025from time import sleep
Brendan Gregg553f2aa2016-02-14 18:15:24 -080026
27# arguments
28examples = """examples:
29 ./tcpretrans # trace TCP retransmits
30 ./tcpretrans -l # include TLP attempts
31"""
32parser = argparse.ArgumentParser(
33 description="Trace TCP retransmits",
34 formatter_class=argparse.RawDescriptionHelpFormatter,
35 epilog=examples)
36parser.add_argument("-l", "--lossprobe", action="store_true",
37 help="include tail loss probe attempts")
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010038parser.add_argument("-c", "--count", action="store_true",
39 help="count occurred retransmits per flow")
Nathan Scottcf0792f2018-02-02 16:56:50 +110040parser.add_argument("--ebpf", action="store_true",
41 help=argparse.SUPPRESS)
Brendan Gregg553f2aa2016-02-14 18:15:24 -080042args = parser.parse_args()
Brendan Gregg73b54012017-12-18 20:37:11 -080043debug = 0
Brendan Gregg553f2aa2016-02-14 18:15:24 -080044
45# define BPF program
46bpf_text = """
47#include <uapi/linux/ptrace.h>
48#include <net/sock.h>
49#include <bcc/proto.h>
50
51#define RETRANSMIT 1
52#define TLP 2
53
54// separate data structs for ipv4 and ipv6
55struct ipv4_data_t {
56 // XXX: switch some to u32's when supported
Joe Yin36ce1122018-08-17 06:04:00 +080057 u32 pid;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080058 u64 ip;
Joe Yin36ce1122018-08-17 06:04:00 +080059 u32 saddr;
60 u32 daddr;
61 u16 lport;
62 u16 dport;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080063 u64 state;
64 u64 type;
65};
66BPF_PERF_OUTPUT(ipv4_events);
67
68struct ipv6_data_t {
Joe Yin36ce1122018-08-17 06:04:00 +080069 u32 pid;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080070 u64 ip;
Mark Drayton11de2982016-06-26 21:14:44 +010071 unsigned __int128 saddr;
72 unsigned __int128 daddr;
Joe Yin36ce1122018-08-17 06:04:00 +080073 u16 lport;
74 u16 dport;
Brendan Gregg553f2aa2016-02-14 18:15:24 -080075 u64 state;
76 u64 type;
77};
78BPF_PERF_OUTPUT(ipv6_events);
79
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +010080// separate flow keys per address family
81struct ipv4_flow_key_t {
82 u32 saddr;
83 u32 daddr;
84 u16 lport;
85 u16 dport;
86};
87BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
88
89struct ipv6_flow_key_t {
90 unsigned __int128 saddr;
91 unsigned __int128 daddr;
92 u16 lport;
93 u16 dport;
94};
95BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
96
Paul Chaignon25212ee2017-08-06 11:15:11 +020097static int trace_event(struct pt_regs *ctx, struct sock *skp, int type)
Brendan Gregg553f2aa2016-02-14 18:15:24 -080098{
Paul Chaignon25212ee2017-08-06 11:15:11 +020099 if (skp == NULL)
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800100 return 0;
101 u32 pid = bpf_get_current_pid_tgid();
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800102
103 // pull in details
Paul Chaignon25212ee2017-08-06 11:15:11 +0200104 u16 family = skp->__sk_common.skc_family;
105 u16 lport = skp->__sk_common.skc_num;
106 u16 dport = skp->__sk_common.skc_dport;
107 char state = skp->__sk_common.skc_state;
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800108
109 if (family == AF_INET) {
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100110 IPV4_INIT
111 IPV4_CORE
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800112 } else if (family == AF_INET6) {
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100113 IPV6_INIT
114 IPV6_CORE
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800115 }
116 // else drop
117
118 return 0;
119}
120
121int trace_retransmit(struct pt_regs *ctx, struct sock *sk)
122{
123 trace_event(ctx, sk, RETRANSMIT);
124 return 0;
125}
126
127int trace_tlp(struct pt_regs *ctx, struct sock *sk)
128{
129 trace_event(ctx, sk, TLP);
130 return 0;
131}
132"""
133
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200134struct_init = { 'ipv4':
135 { 'count' :
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100136 """
137 struct ipv4_flow_key_t flow_key = {};
138 flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
139 flow_key.daddr = skp->__sk_common.skc_daddr;
140 // lport is host order
141 flow_key.lport = lport;
142 flow_key.dport = ntohs(dport);""",
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200143 'trace' :
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100144 """
Joe Yin36ce1122018-08-17 06:04:00 +0800145 struct ipv4_data_t data4 = {};
146 data4.pid = pid;
147 data4.ip = 4;
148 data4.type = type;
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100149 data4.saddr = skp->__sk_common.skc_rcv_saddr;
150 data4.daddr = skp->__sk_common.skc_daddr;
151 // lport is host order
152 data4.lport = lport;
153 data4.dport = ntohs(dport);
154 data4.state = state; """
155 },
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200156 'ipv6':
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100157 { 'count' :
158 """
159 struct ipv6_flow_key_t flow_key = {};
160 bpf_probe_read(&flow_key.saddr, sizeof(flow_key.saddr),
161 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
162 bpf_probe_read(&flow_key.daddr, sizeof(flow_key.daddr),
163 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
164 // lport is host order
165 flow_key.lport = lport;
166 flow_key.dport = ntohs(dport);""",
167 'trace' : """
168 struct ipv6_data_t data6 = {.pid = pid, .ip = 6, .type = type};
169 bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
170 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
171 bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
172 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
173 // lport is host order
174 data6.lport = lport;
175 data6.dport = ntohs(dport);
176 data6.state = state;"""
177 }
178 }
179
180count_core_base = """
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200181 COUNT_STRUCT.increment(flow_key);
182"""
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100183
184if args.count:
185 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['count'])
186 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['count'])
187 bpf_text = bpf_text.replace("IPV4_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv4_count'))
188 bpf_text = bpf_text.replace("IPV6_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv6_count'))
189else:
190 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['trace'])
191 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['trace'])
192 bpf_text = bpf_text.replace("IPV4_CORE", "ipv4_events.perf_submit(ctx, &data4, sizeof(data4));")
193 bpf_text = bpf_text.replace("IPV6_CORE", "ipv6_events.perf_submit(ctx, &data6, sizeof(data6));")
194
Nathan Scottcf0792f2018-02-02 16:56:50 +1100195if debug or args.ebpf:
Brendan Gregg73b54012017-12-18 20:37:11 -0800196 print(bpf_text)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100197 if args.ebpf:
198 exit()
Brendan Gregg73b54012017-12-18 20:37:11 -0800199
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800200# event data
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800201class Data_ipv4(ct.Structure):
202 _fields_ = [
Joe Yin36ce1122018-08-17 06:04:00 +0800203 ("pid", ct.c_uint),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800204 ("ip", ct.c_ulonglong),
Joe Yin36ce1122018-08-17 06:04:00 +0800205 ("saddr", ct.c_uint),
206 ("daddr", ct.c_uint),
207 ("lport", ct.c_ushort),
208 ("dport", ct.c_ushort),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800209 ("state", ct.c_ulonglong),
210 ("type", ct.c_ulonglong)
211 ]
Mark Drayton11de2982016-06-26 21:14:44 +0100212
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800213class Data_ipv6(ct.Structure):
214 _fields_ = [
Joe Yin36ce1122018-08-17 06:04:00 +0800215 ("pid", ct.c_uint),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800216 ("ip", ct.c_ulonglong),
Mark Drayton11de2982016-06-26 21:14:44 +0100217 ("saddr", (ct.c_ulonglong * 2)),
218 ("daddr", (ct.c_ulonglong * 2)),
Joe Yin36ce1122018-08-17 06:04:00 +0800219 ("lport", ct.c_ushort),
220 ("dport", ct.c_ushort),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800221 ("state", ct.c_ulonglong),
222 ("type", ct.c_ulonglong)
223 ]
224
225# from bpf_text:
226type = {}
227type[1] = 'R'
228type[2] = 'L'
229
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800230# from include/net/tcp_states.h:
231tcpstate = {}
232tcpstate[1] = 'ESTABLISHED'
233tcpstate[2] = 'SYN_SENT'
234tcpstate[3] = 'SYN_RECV'
235tcpstate[4] = 'FIN_WAIT1'
236tcpstate[5] = 'FIN_WAIT2'
237tcpstate[6] = 'TIME_WAIT'
238tcpstate[7] = 'CLOSE'
239tcpstate[8] = 'CLOSE_WAIT'
240tcpstate[9] = 'LAST_ACK'
241tcpstate[10] = 'LISTEN'
242tcpstate[11] = 'CLOSING'
243tcpstate[12] = 'NEW_SYN_RECV'
244
245# process event
246def print_ipv4_event(cpu, data, size):
247 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
248 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
249 strftime("%H:%M:%S"), event.pid, event.ip,
Mark Drayton11de2982016-06-26 21:14:44 +0100250 "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800251 type[event.type],
Mark Drayton11de2982016-06-26 21:14:44 +0100252 "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800253 tcpstate[event.state]))
Mark Drayton11de2982016-06-26 21:14:44 +0100254
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800255def print_ipv6_event(cpu, data, size):
256 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
Ivan Babrouc862e312016-06-23 18:11:25 +0100257 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800258 strftime("%H:%M:%S"), event.pid, event.ip,
Mark Drayton11de2982016-06-26 21:14:44 +0100259 "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800260 type[event.type],
Mark Drayton11de2982016-06-26 21:14:44 +0100261 "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport),
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800262 tcpstate[event.state]))
263
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100264def depict_cnt(counts_tab, l3prot='ipv4'):
265 for k, v in sorted(counts_tab.items(), key=lambda counts: counts[1].value):
266 depict_key = ""
267 ep_fmt = "[%s]#%d"
268 if l3prot == 'ipv4':
269 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET, pack('I', k.saddr)), k.lport),
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200270 ep_fmt % (inet_ntop(AF_INET, pack('I', k.daddr)), k.dport))
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100271 else:
272 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET6, k.saddr), k.lport),
273 ep_fmt % (inet_ntop(AF_INET6, k.daddr), k.dport))
274
275 print ("%s %10d" % (depict_key, v.value))
276
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800277# initialize BPF
278b = BPF(text=bpf_text)
279b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit")
Mark Drayton11de2982016-06-26 21:14:44 +0100280if args.lossprobe:
281 b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp")
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800282
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100283print("Tracing retransmits ... Hit Ctrl-C to end")
284if args.count:
285 try:
286 while 1:
287 sleep(99999999)
288 except BaseException:
289 pass
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800290
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100291 # header
292 print("\n%-25s %-25s %-10s" % (
293 "LADDR:LPORT", "RADDR:RPORT", "RETRANSMITS"))
Javier Honduvilla Coto64bf9652018-08-01 06:50:19 +0200294 depict_cnt(b.get_table("ipv4_count"))
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100295 depict_cnt(b.get_table("ipv6_count"), l3prot='ipv6')
Brendan Gregg553f2aa2016-02-14 18:15:24 -0800296# read events
Matthias Tafelmeier1e9467f2017-12-13 18:50:22 +0100297else:
298 # header
299 print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP",
300 "LADDR:LPORT", "T", "RADDR:RPORT", "STATE"))
301 b["ipv4_events"].open_perf_buffer(print_ipv4_event)
302 b["ipv6_events"].open_perf_buffer(print_ipv6_event)
303 while 1:
Teng Qindbf00292018-02-28 21:47:50 -0800304 b.perf_buffer_poll()