blob: 8bfeb1e9bbfb2d116e30b59486d4351fa57d7723 [file] [log] [blame]
Brendan Greggafc97252016-02-19 16:07:36 -08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpconnlat Trace TCP active connection latency (connect).
5# For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcpconnlat [-h] [-t] [-p PID]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
Brendan Greggafc97252016-02-19 16:07:36 -080012# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 19-Feb-2016 Brendan Gregg Created this.
16
17from __future__ import print_function
18from bcc import BPF
Mark Drayton11de2982016-06-26 21:14:44 +010019from socket import inet_ntop, AF_INET, AF_INET6
20from struct import pack
Brendan Greggafc97252016-02-19 16:07:36 -080021import argparse
22import ctypes as ct
23
dpaynefc507992018-01-18 14:09:28 -080024# arg validation
25def positive_float(val):
26 try:
27 ival = float(val)
28 except ValueError:
29 raise argparse.ArgumentTypeError("must be a float")
30
31 if ival < 0:
32 raise argparse.ArgumentTypeError("must be positive")
33 return ival
34
Brendan Greggafc97252016-02-19 16:07:36 -080035# arguments
36examples = """examples:
37 ./tcpconnlat # trace all TCP connect()s
dpaynefc507992018-01-18 14:09:28 -080038 ./tcpconnlat 1 # trace connection latency slower than 1 ms
39 ./tcpconnlat 0.1 # trace connection latency slower than 100 us
Brendan Greggafc97252016-02-19 16:07:36 -080040 ./tcpconnlat -t # include timestamps
41 ./tcpconnlat -p 181 # only trace PID 181
42"""
43parser = argparse.ArgumentParser(
44 description="Trace TCP connects and show connection latency",
45 formatter_class=argparse.RawDescriptionHelpFormatter,
46 epilog=examples)
47parser.add_argument("-t", "--timestamp", action="store_true",
48 help="include timestamp on output")
49parser.add_argument("-p", "--pid",
50 help="trace this PID only")
dpaynefc507992018-01-18 14:09:28 -080051parser.add_argument("duration_ms", nargs="?", default=0,
52 type=positive_float,
53 help="minimum duration to trace (ms)")
dpayne3aed4fb2018-01-17 13:56:49 -080054parser.add_argument("-v", "--verbose", action="store_true",
55 help="print the BPF program for debugging purposes")
Nathan Scottcf0792f2018-02-02 16:56:50 +110056parser.add_argument("--ebpf", action="store_true",
57 help=argparse.SUPPRESS)
Brendan Greggafc97252016-02-19 16:07:36 -080058args = parser.parse_args()
dpayne3aed4fb2018-01-17 13:56:49 -080059
dpaynefc507992018-01-18 14:09:28 -080060if args.duration_ms:
61 # support fractions but round to nearest microsecond
62 duration_us = int(args.duration_ms * 1000)
dpayne3aed4fb2018-01-17 13:56:49 -080063else:
64 duration_us = 0 # default is show all
65
Brendan Greggafc97252016-02-19 16:07:36 -080066debug = 0
67
68# define BPF program
69bpf_text = """
70#include <uapi/linux/ptrace.h>
71#include <net/sock.h>
Mark Drayton11de2982016-06-26 21:14:44 +010072#include <net/tcp_states.h>
Brendan Greggafc97252016-02-19 16:07:36 -080073#include <bcc/proto.h>
74
75struct info_t {
76 u64 ts;
77 u64 pid;
78 char task[TASK_COMM_LEN];
79};
80BPF_HASH(start, struct sock *, struct info_t);
81
82// separate data structs for ipv4 and ipv6
83struct ipv4_data_t {
84 // XXX: switch some to u32's when supported
85 u64 ts_us;
86 u64 pid;
Brendan Greggafc97252016-02-19 16:07:36 -080087 u64 saddr;
88 u64 daddr;
Mark Drayton11de2982016-06-26 21:14:44 +010089 u64 ip;
Brendan Greggafc97252016-02-19 16:07:36 -080090 u64 dport;
91 u64 delta_us;
92 char task[TASK_COMM_LEN];
93};
94BPF_PERF_OUTPUT(ipv4_events);
95
96struct ipv6_data_t {
Brendan Greggafc97252016-02-19 16:07:36 -080097 u64 ts_us;
98 u64 pid;
Mark Drayton11de2982016-06-26 21:14:44 +010099 unsigned __int128 saddr;
100 unsigned __int128 daddr;
Brendan Greggafc97252016-02-19 16:07:36 -0800101 u64 ip;
Brendan Greggafc97252016-02-19 16:07:36 -0800102 u64 dport;
103 u64 delta_us;
104 char task[TASK_COMM_LEN];
105};
106BPF_PERF_OUTPUT(ipv6_events);
107
108int trace_connect(struct pt_regs *ctx, struct sock *sk)
109{
110 u32 pid = bpf_get_current_pid_tgid();
111 FILTER
112 struct info_t info = {.pid = pid};
113 info.ts = bpf_ktime_get_ns();
114 bpf_get_current_comm(&info.task, sizeof(info.task));
115 start.update(&sk, &info);
116 return 0;
117};
118
119// See tcp_v4_do_rcv() and tcp_v6_do_rcv(). So TCP_ESTBALISHED and TCP_LISTEN
120// are fast path and processed elsewhere, and leftovers are processed by
Brendan Gregg2482c1c2016-02-19 16:14:01 -0800121// tcp_rcv_state_process(). We can trace this for handshake completion.
122// This should all be switched to static tracepoints when available.
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200123int trace_tcp_rcv_state_process(struct pt_regs *ctx, struct sock *skp)
Brendan Greggafc97252016-02-19 16:07:36 -0800124{
Brendan Gregg4db6d442016-02-19 16:56:47 -0800125 // will be in TCP_SYN_SENT for handshake
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200126 if (skp->__sk_common.skc_state != TCP_SYN_SENT)
Brendan Gregg4db6d442016-02-19 16:56:47 -0800127 return 0;
Brendan Greggafc97252016-02-19 16:07:36 -0800128
129 // check start and calculate delta
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200130 struct info_t *infop = start.lookup(&skp);
Brendan Greggafc97252016-02-19 16:07:36 -0800131 if (infop == 0) {
132 return 0; // missed entry or filtered
133 }
dpayne3aed4fb2018-01-17 13:56:49 -0800134
Brendan Greggafc97252016-02-19 16:07:36 -0800135 u64 ts = infop->ts;
136 u64 now = bpf_ktime_get_ns();
137
dpayne3aed4fb2018-01-17 13:56:49 -0800138 u64 delta_us = (now - ts) / 1000ul;
139
140#ifdef MIN_LATENCY
141 if ( delta_us < DURATION_US ) {
142 return 0; // connect latency is below latency filter minimum
143 }
144#endif
145
Brendan Greggafc97252016-02-19 16:07:36 -0800146 // pull in details
147 u16 family = 0, dport = 0;
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200148 family = skp->__sk_common.skc_family;
149 dport = skp->__sk_common.skc_dport;
Brendan Greggafc97252016-02-19 16:07:36 -0800150
151 // emit to appropriate data path
152 if (family == AF_INET) {
153 struct ipv4_data_t data4 = {.pid = infop->pid, .ip = 4};
154 data4.ts_us = now / 1000;
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200155 data4.saddr = skp->__sk_common.skc_rcv_saddr;
156 data4.daddr = skp->__sk_common.skc_daddr;
Brendan Greggafc97252016-02-19 16:07:36 -0800157 data4.dport = ntohs(dport);
dpaynece0cf922018-01-17 14:02:29 -0800158 data4.delta_us = delta_us;
Brendan Greggafc97252016-02-19 16:07:36 -0800159 __builtin_memcpy(&data4.task, infop->task, sizeof(data4.task));
160 ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
161
162 } else /* AF_INET6 */ {
163 struct ipv6_data_t data6 = {.pid = infop->pid, .ip = 6};
164 data6.ts_us = now / 1000;
Mark Drayton11de2982016-06-26 21:14:44 +0100165 bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200166 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
Mark Drayton11de2982016-06-26 21:14:44 +0100167 bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200168 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
Brendan Greggafc97252016-02-19 16:07:36 -0800169 data6.dport = ntohs(dport);
dpaynece0cf922018-01-17 14:02:29 -0800170 data6.delta_us = delta_us;
Brendan Greggafc97252016-02-19 16:07:36 -0800171 __builtin_memcpy(&data6.task, infop->task, sizeof(data6.task));
172 ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
173 }
174
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200175 start.delete(&skp);
Brendan Greggafc97252016-02-19 16:07:36 -0800176
177 return 0;
178}
179"""
180
dpayne3aed4fb2018-01-17 13:56:49 -0800181if duration_us > 0:
182 bpf_text = "#define MIN_LATENCY\n" + bpf_text
183 bpf_text = bpf_text.replace('DURATION_US', str(duration_us))
184
Brendan Greggafc97252016-02-19 16:07:36 -0800185# code substitutions
186if args.pid:
187 bpf_text = bpf_text.replace('FILTER',
188 'if (pid != %s) { return 0; }' % args.pid)
189else:
190 bpf_text = bpf_text.replace('FILTER', '')
Nathan Scottcf0792f2018-02-02 16:56:50 +1100191if debug or args.verbose or args.ebpf:
Brendan Greggafc97252016-02-19 16:07:36 -0800192 print(bpf_text)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100193 if args.ebpf:
194 exit()
Brendan Greggafc97252016-02-19 16:07:36 -0800195
196# initialize BPF
197b = BPF(text=bpf_text)
198b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect")
199b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect")
200b.attach_kprobe(event="tcp_rcv_state_process",
201 fn_name="trace_tcp_rcv_state_process")
202
203# event data
204TASK_COMM_LEN = 16 # linux/sched.h
Mark Drayton11de2982016-06-26 21:14:44 +0100205
Brendan Greggafc97252016-02-19 16:07:36 -0800206class Data_ipv4(ct.Structure):
207 _fields_ = [
208 ("ts_us", ct.c_ulonglong),
209 ("pid", ct.c_ulonglong),
Brendan Greggafc97252016-02-19 16:07:36 -0800210 ("saddr", ct.c_ulonglong),
211 ("daddr", ct.c_ulonglong),
Mark Drayton11de2982016-06-26 21:14:44 +0100212 ("ip", ct.c_ulonglong),
Brendan Greggafc97252016-02-19 16:07:36 -0800213 ("dport", ct.c_ulonglong),
214 ("delta_us", ct.c_ulonglong),
215 ("task", ct.c_char * TASK_COMM_LEN)
216 ]
Mark Drayton11de2982016-06-26 21:14:44 +0100217
Brendan Greggafc97252016-02-19 16:07:36 -0800218class Data_ipv6(ct.Structure):
219 _fields_ = [
220 ("ts_us", ct.c_ulonglong),
221 ("pid", ct.c_ulonglong),
Mark Drayton11de2982016-06-26 21:14:44 +0100222 ("saddr", (ct.c_ulonglong * 2)),
223 ("daddr", (ct.c_ulonglong * 2)),
Brendan Greggafc97252016-02-19 16:07:36 -0800224 ("ip", ct.c_ulonglong),
Brendan Greggafc97252016-02-19 16:07:36 -0800225 ("dport", ct.c_ulonglong),
226 ("delta_us", ct.c_ulonglong),
227 ("task", ct.c_char * TASK_COMM_LEN)
228 ]
229
Brendan Greggafc97252016-02-19 16:07:36 -0800230# process event
231start_ts = 0
Mark Drayton11de2982016-06-26 21:14:44 +0100232
Brendan Greggafc97252016-02-19 16:07:36 -0800233def print_ipv4_event(cpu, data, size):
234 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
235 global start_ts
236 if args.timestamp:
237 if start_ts == 0:
238 start_ts = event.ts_us
Mark Drayton11de2982016-06-26 21:14:44 +0100239 print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
Rafael F78948e42017-03-26 14:54:25 +0200240 print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
241 event.task.decode(), event.ip,
242 inet_ntop(AF_INET, pack("I", event.saddr)),
Mark Drayton11de2982016-06-26 21:14:44 +0100243 inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
244 float(event.delta_us) / 1000))
245
Brendan Greggafc97252016-02-19 16:07:36 -0800246def print_ipv6_event(cpu, data, size):
247 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
248 global start_ts
249 if args.timestamp:
250 if start_ts == 0:
251 start_ts = event.ts_us
Mark Drayton11de2982016-06-26 21:14:44 +0100252 print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
Rafael F78948e42017-03-26 14:54:25 +0200253 print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
254 event.task.decode(), event.ip, inet_ntop(AF_INET6, event.saddr),
Mark Drayton11de2982016-06-26 21:14:44 +0100255 inet_ntop(AF_INET6, event.daddr), event.dport,
Brendan Greggafc97252016-02-19 16:07:36 -0800256 float(event.delta_us) / 1000))
257
258# header
259if args.timestamp:
260 print("%-9s" % ("TIME(s)"), end="")
261print("%-6s %-12s %-2s %-16s %-16s %-5s %s" % ("PID", "COMM", "IP", "SADDR",
262 "DADDR", "DPORT", "LAT(ms)"))
263
264# read events
265b["ipv4_events"].open_perf_buffer(print_ipv4_event)
266b["ipv6_events"].open_perf_buffer(print_ipv6_event)
267while 1:
Teng Qindbf00292018-02-28 21:47:50 -0800268 b.perf_buffer_poll()