blob: 0d21b837c5f78bac2ffbf7b0a6a250641cfb0579 [file] [log] [blame]
Brendan Greggafc97252016-02-19 16:07:36 -08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpconnlat Trace TCP active connection latency (connect).
5# For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcpconnlat [-h] [-t] [-p PID]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
Brendan Greggafc97252016-02-19 16:07:36 -080012# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 19-Feb-2016 Brendan Gregg Created this.
16
17from __future__ import print_function
18from bcc import BPF
Mark Drayton11de2982016-06-26 21:14:44 +010019from socket import inet_ntop, AF_INET, AF_INET6
20from struct import pack
Brendan Greggafc97252016-02-19 16:07:36 -080021import argparse
22import ctypes as ct
23
dpaynefc507992018-01-18 14:09:28 -080024# arg validation
25def positive_float(val):
26 try:
27 ival = float(val)
28 except ValueError:
29 raise argparse.ArgumentTypeError("must be a float")
30
31 if ival < 0:
32 raise argparse.ArgumentTypeError("must be positive")
33 return ival
34
Brendan Greggafc97252016-02-19 16:07:36 -080035# arguments
36examples = """examples:
37 ./tcpconnlat # trace all TCP connect()s
dpaynefc507992018-01-18 14:09:28 -080038 ./tcpconnlat 1 # trace connection latency slower than 1 ms
39 ./tcpconnlat 0.1 # trace connection latency slower than 100 us
Brendan Greggafc97252016-02-19 16:07:36 -080040 ./tcpconnlat -t # include timestamps
41 ./tcpconnlat -p 181 # only trace PID 181
42"""
43parser = argparse.ArgumentParser(
44 description="Trace TCP connects and show connection latency",
45 formatter_class=argparse.RawDescriptionHelpFormatter,
46 epilog=examples)
47parser.add_argument("-t", "--timestamp", action="store_true",
48 help="include timestamp on output")
49parser.add_argument("-p", "--pid",
50 help="trace this PID only")
dpaynefc507992018-01-18 14:09:28 -080051parser.add_argument("duration_ms", nargs="?", default=0,
52 type=positive_float,
53 help="minimum duration to trace (ms)")
dpayne3aed4fb2018-01-17 13:56:49 -080054parser.add_argument("-v", "--verbose", action="store_true",
55 help="print the BPF program for debugging purposes")
Nathan Scottcf0792f2018-02-02 16:56:50 +110056parser.add_argument("--ebpf", action="store_true",
57 help=argparse.SUPPRESS)
Brendan Greggafc97252016-02-19 16:07:36 -080058args = parser.parse_args()
dpayne3aed4fb2018-01-17 13:56:49 -080059
dpaynefc507992018-01-18 14:09:28 -080060if args.duration_ms:
61 # support fractions but round to nearest microsecond
62 duration_us = int(args.duration_ms * 1000)
dpayne3aed4fb2018-01-17 13:56:49 -080063else:
64 duration_us = 0 # default is show all
65
Brendan Greggafc97252016-02-19 16:07:36 -080066debug = 0
67
68# define BPF program
69bpf_text = """
70#include <uapi/linux/ptrace.h>
71#include <net/sock.h>
Mark Drayton11de2982016-06-26 21:14:44 +010072#include <net/tcp_states.h>
Brendan Greggafc97252016-02-19 16:07:36 -080073#include <bcc/proto.h>
74
75struct info_t {
76 u64 ts;
Joe Yin44c28bf2018-06-27 23:34:59 +080077 u32 pid;
Brendan Greggafc97252016-02-19 16:07:36 -080078 char task[TASK_COMM_LEN];
79};
80BPF_HASH(start, struct sock *, struct info_t);
81
82// separate data structs for ipv4 and ipv6
83struct ipv4_data_t {
Brendan Greggafc97252016-02-19 16:07:36 -080084 u64 ts_us;
Joe Yin44c28bf2018-06-27 23:34:59 +080085 u32 pid;
86 u32 saddr;
87 u32 daddr;
Mark Drayton11de2982016-06-26 21:14:44 +010088 u64 ip;
Joe Yin44c28bf2018-06-27 23:34:59 +080089 u16 dport;
Brendan Greggafc97252016-02-19 16:07:36 -080090 u64 delta_us;
91 char task[TASK_COMM_LEN];
92};
93BPF_PERF_OUTPUT(ipv4_events);
94
95struct ipv6_data_t {
Brendan Greggafc97252016-02-19 16:07:36 -080096 u64 ts_us;
Joe Yin44c28bf2018-06-27 23:34:59 +080097 u32 pid;
Mark Drayton11de2982016-06-26 21:14:44 +010098 unsigned __int128 saddr;
99 unsigned __int128 daddr;
Brendan Greggafc97252016-02-19 16:07:36 -0800100 u64 ip;
Joe Yin44c28bf2018-06-27 23:34:59 +0800101 u16 dport;
Brendan Greggafc97252016-02-19 16:07:36 -0800102 u64 delta_us;
103 char task[TASK_COMM_LEN];
104};
105BPF_PERF_OUTPUT(ipv6_events);
106
107int trace_connect(struct pt_regs *ctx, struct sock *sk)
108{
109 u32 pid = bpf_get_current_pid_tgid();
110 FILTER
111 struct info_t info = {.pid = pid};
112 info.ts = bpf_ktime_get_ns();
113 bpf_get_current_comm(&info.task, sizeof(info.task));
114 start.update(&sk, &info);
115 return 0;
116};
117
118// See tcp_v4_do_rcv() and tcp_v6_do_rcv(). So TCP_ESTBALISHED and TCP_LISTEN
119// are fast path and processed elsewhere, and leftovers are processed by
Brendan Gregg2482c1c2016-02-19 16:14:01 -0800120// tcp_rcv_state_process(). We can trace this for handshake completion.
121// This should all be switched to static tracepoints when available.
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200122int trace_tcp_rcv_state_process(struct pt_regs *ctx, struct sock *skp)
Brendan Greggafc97252016-02-19 16:07:36 -0800123{
Brendan Gregg4db6d442016-02-19 16:56:47 -0800124 // will be in TCP_SYN_SENT for handshake
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200125 if (skp->__sk_common.skc_state != TCP_SYN_SENT)
Brendan Gregg4db6d442016-02-19 16:56:47 -0800126 return 0;
Brendan Greggafc97252016-02-19 16:07:36 -0800127
128 // check start and calculate delta
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200129 struct info_t *infop = start.lookup(&skp);
Brendan Greggafc97252016-02-19 16:07:36 -0800130 if (infop == 0) {
131 return 0; // missed entry or filtered
132 }
dpayne3aed4fb2018-01-17 13:56:49 -0800133
Brendan Greggafc97252016-02-19 16:07:36 -0800134 u64 ts = infop->ts;
135 u64 now = bpf_ktime_get_ns();
136
dpayne3aed4fb2018-01-17 13:56:49 -0800137 u64 delta_us = (now - ts) / 1000ul;
138
139#ifdef MIN_LATENCY
140 if ( delta_us < DURATION_US ) {
141 return 0; // connect latency is below latency filter minimum
142 }
143#endif
144
Brendan Greggafc97252016-02-19 16:07:36 -0800145 // pull in details
146 u16 family = 0, dport = 0;
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200147 family = skp->__sk_common.skc_family;
148 dport = skp->__sk_common.skc_dport;
Brendan Greggafc97252016-02-19 16:07:36 -0800149
150 // emit to appropriate data path
151 if (family == AF_INET) {
152 struct ipv4_data_t data4 = {.pid = infop->pid, .ip = 4};
153 data4.ts_us = now / 1000;
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200154 data4.saddr = skp->__sk_common.skc_rcv_saddr;
155 data4.daddr = skp->__sk_common.skc_daddr;
Brendan Greggafc97252016-02-19 16:07:36 -0800156 data4.dport = ntohs(dport);
dpaynece0cf922018-01-17 14:02:29 -0800157 data4.delta_us = delta_us;
Brendan Greggafc97252016-02-19 16:07:36 -0800158 __builtin_memcpy(&data4.task, infop->task, sizeof(data4.task));
159 ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
160
161 } else /* AF_INET6 */ {
162 struct ipv6_data_t data6 = {.pid = infop->pid, .ip = 6};
163 data6.ts_us = now / 1000;
Mark Drayton11de2982016-06-26 21:14:44 +0100164 bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200165 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
Mark Drayton11de2982016-06-26 21:14:44 +0100166 bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200167 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
Brendan Greggafc97252016-02-19 16:07:36 -0800168 data6.dport = ntohs(dport);
dpaynece0cf922018-01-17 14:02:29 -0800169 data6.delta_us = delta_us;
Brendan Greggafc97252016-02-19 16:07:36 -0800170 __builtin_memcpy(&data6.task, infop->task, sizeof(data6.task));
171 ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
172 }
173
Paul Chaignon7e1cf0a2017-08-06 11:14:12 +0200174 start.delete(&skp);
Brendan Greggafc97252016-02-19 16:07:36 -0800175
176 return 0;
177}
178"""
179
dpayne3aed4fb2018-01-17 13:56:49 -0800180if duration_us > 0:
181 bpf_text = "#define MIN_LATENCY\n" + bpf_text
182 bpf_text = bpf_text.replace('DURATION_US', str(duration_us))
183
Brendan Greggafc97252016-02-19 16:07:36 -0800184# code substitutions
185if args.pid:
186 bpf_text = bpf_text.replace('FILTER',
187 'if (pid != %s) { return 0; }' % args.pid)
188else:
189 bpf_text = bpf_text.replace('FILTER', '')
Nathan Scottcf0792f2018-02-02 16:56:50 +1100190if debug or args.verbose or args.ebpf:
Brendan Greggafc97252016-02-19 16:07:36 -0800191 print(bpf_text)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100192 if args.ebpf:
193 exit()
Brendan Greggafc97252016-02-19 16:07:36 -0800194
195# initialize BPF
196b = BPF(text=bpf_text)
197b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect")
198b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect")
199b.attach_kprobe(event="tcp_rcv_state_process",
200 fn_name="trace_tcp_rcv_state_process")
201
202# event data
203TASK_COMM_LEN = 16 # linux/sched.h
Mark Drayton11de2982016-06-26 21:14:44 +0100204
Brendan Greggafc97252016-02-19 16:07:36 -0800205class Data_ipv4(ct.Structure):
206 _fields_ = [
207 ("ts_us", ct.c_ulonglong),
Joe Yin44c28bf2018-06-27 23:34:59 +0800208 ("pid", ct.c_uint),
209 ("saddr", ct.c_uint),
210 ("daddr", ct.c_uint),
Mark Drayton11de2982016-06-26 21:14:44 +0100211 ("ip", ct.c_ulonglong),
Joe Yin44c28bf2018-06-27 23:34:59 +0800212 ("dport", ct.c_ushort),
Brendan Greggafc97252016-02-19 16:07:36 -0800213 ("delta_us", ct.c_ulonglong),
214 ("task", ct.c_char * TASK_COMM_LEN)
215 ]
Mark Drayton11de2982016-06-26 21:14:44 +0100216
Brendan Greggafc97252016-02-19 16:07:36 -0800217class Data_ipv6(ct.Structure):
218 _fields_ = [
219 ("ts_us", ct.c_ulonglong),
Joe Yin44c28bf2018-06-27 23:34:59 +0800220 ("pid", ct.c_uint),
Mark Drayton11de2982016-06-26 21:14:44 +0100221 ("saddr", (ct.c_ulonglong * 2)),
222 ("daddr", (ct.c_ulonglong * 2)),
Brendan Greggafc97252016-02-19 16:07:36 -0800223 ("ip", ct.c_ulonglong),
Joe Yin44c28bf2018-06-27 23:34:59 +0800224 ("dport", ct.c_ushort),
Brendan Greggafc97252016-02-19 16:07:36 -0800225 ("delta_us", ct.c_ulonglong),
226 ("task", ct.c_char * TASK_COMM_LEN)
227 ]
228
Brendan Greggafc97252016-02-19 16:07:36 -0800229# process event
230start_ts = 0
Mark Drayton11de2982016-06-26 21:14:44 +0100231
Brendan Greggafc97252016-02-19 16:07:36 -0800232def print_ipv4_event(cpu, data, size):
233 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
234 global start_ts
235 if args.timestamp:
236 if start_ts == 0:
237 start_ts = event.ts_us
Mark Drayton11de2982016-06-26 21:14:44 +0100238 print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
Rafael F78948e42017-03-26 14:54:25 +0200239 print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200240 event.task.decode('utf-8', 'replace'), event.ip,
Rafael F78948e42017-03-26 14:54:25 +0200241 inet_ntop(AF_INET, pack("I", event.saddr)),
Mark Drayton11de2982016-06-26 21:14:44 +0100242 inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
243 float(event.delta_us) / 1000))
244
Brendan Greggafc97252016-02-19 16:07:36 -0800245def print_ipv6_event(cpu, data, size):
246 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
247 global start_ts
248 if args.timestamp:
249 if start_ts == 0:
250 start_ts = event.ts_us
Mark Drayton11de2982016-06-26 21:14:44 +0100251 print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
Rafael F78948e42017-03-26 14:54:25 +0200252 print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
jeromemarchandb96ebcd2018-10-10 01:58:15 +0200253 event.task.decode('utf-8', 'replace'), event.ip,
254 inet_ntop(AF_INET6, event.saddr), inet_ntop(AF_INET6, event.daddr),
255 event.dport, float(event.delta_us) / 1000))
Brendan Greggafc97252016-02-19 16:07:36 -0800256
257# header
258if args.timestamp:
259 print("%-9s" % ("TIME(s)"), end="")
260print("%-6s %-12s %-2s %-16s %-16s %-5s %s" % ("PID", "COMM", "IP", "SADDR",
261 "DADDR", "DPORT", "LAT(ms)"))
262
263# read events
264b["ipv4_events"].open_perf_buffer(print_ipv4_event)
265b["ipv6_events"].open_perf_buffer(print_ipv6_event)
266while 1:
Teng Qindbf00292018-02-28 21:47:50 -0800267 b.perf_buffer_poll()