samples/bpf: extend xdp_rxq_info to read packet payload
There is a cost associated with reading the packet data payload
that this test ignored. Add option --read to allow enabling
reading part of the payload.
This sample/tool helps us analyse an issue observed with a NIC
mlx5 (ConnectX-5 Ex) and an Intel(R) Xeon(R) CPU E5-1650 v4.
With no_touch of data:
Running XDP on dev:mlx5p1 (ifindex:8) action:XDP_DROP options:no_touch
XDP stats CPU pps issue-pps
XDP-RX CPU 0 14,465,157 0
XDP-RX CPU 1 14,464,728 0
XDP-RX CPU 2 14,465,283 0
XDP-RX CPU 3 14,465,282 0
XDP-RX CPU 4 14,464,159 0
XDP-RX CPU 5 14,465,379 0
XDP-RX CPU total 86,789,992
When not touching data, we observe that the CPUs have idle cycles.
When reading data the CPUs are 100% busy in softirq.
With reading data:
Running XDP on dev:mlx5p1 (ifindex:8) action:XDP_DROP options:read
XDP stats CPU pps issue-pps
XDP-RX CPU 0 9,620,639 0
XDP-RX CPU 1 9,489,843 0
XDP-RX CPU 2 9,407,854 0
XDP-RX CPU 3 9,422,289 0
XDP-RX CPU 4 9,321,959 0
XDP-RX CPU 5 9,395,242 0
XDP-RX CPU total 56,657,828
The effect seen above is a result of cache-misses occuring when
more RXQs are being used. Based on perf-event observations, our
conclusion is that the CPUs DDIO (Direct Data I/O) choose to
deliver packet into main memory, instead of L3-cache. We also
found, that this can be mitigated by either using less RXQs or by
reducing NICs the RX-ring size.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
index 3fd2092..61af621 100644
--- a/samples/bpf/xdp_rxq_info_kern.c
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -4,6 +4,8 @@
* Example howto extract XDP RX-queue info
*/
#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in.h>
#include "bpf_helpers.h"
/* Config setup from with userspace
@@ -14,6 +16,11 @@
struct config {
__u32 action;
int ifindex;
+ __u32 options;
+};
+enum cfg_options_flags {
+ NO_TOUCH = 0x0U,
+ READ_MEM = 0x1U,
};
struct bpf_map_def SEC("maps") config_map = {
.type = BPF_MAP_TYPE_ARRAY,
@@ -90,6 +97,18 @@
if (key == MAX_RXQs)
rxq_rec->issue++;
+ /* Default: Don't touch packet data, only count packets */
+ if (unlikely(config->options & READ_MEM)) {
+ struct ethhdr *eth = data;
+
+ if (eth + 1 > data_end)
+ return XDP_ABORTED;
+
+ /* Avoid compiler removing this: Drop non 802.3 Ethertypes */
+ if (ntohs(eth->h_proto) < ETH_P_802_3_MIN)
+ return XDP_ABORTED;
+ }
+
return config->action;
}
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index e4e9ba52..435485d 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -50,6 +50,7 @@
{"sec", required_argument, NULL, 's' },
{"no-separators", no_argument, NULL, 'z' },
{"action", required_argument, NULL, 'a' },
+ {"readmem", no_argument, NULL, 'r' },
{0, 0, NULL, 0 }
};
@@ -66,6 +67,11 @@
struct config {
__u32 action;
int ifindex;
+ __u32 options;
+};
+enum cfg_options_flags {
+ NO_TOUCH = 0x0U,
+ READ_MEM = 0x1U,
};
#define XDP_ACTION_MAX (XDP_TX + 1)
#define XDP_ACTION_MAX_STRLEN 11
@@ -109,6 +115,16 @@
printf("\n");
}
+static char* options2str(enum cfg_options_flags flag)
+{
+ if (flag == NO_TOUCH)
+ return "no_touch";
+ if (flag & READ_MEM)
+ return "read";
+ fprintf(stderr, "ERR: Unknown config option flags");
+ exit(EXIT_FAIL);
+}
+
static void usage(char *argv[])
{
int i;
@@ -305,7 +321,7 @@
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
- int action)
+ int action, __u32 cfg_opt)
{
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -316,8 +332,8 @@
int i;
/* Header */
- printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
- ifname, ifindex, action2str(action));
+ printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n",
+ ifname, ifindex, action2str(action), options2str(cfg_opt));
/* stats_global_map */
{
@@ -399,7 +415,7 @@
*b = tmp;
}
-static void stats_poll(int interval, int action)
+static void stats_poll(int interval, int action, __u32 cfg_opt)
{
struct stats_record *record, *prev;
@@ -410,7 +426,7 @@
while (1) {
swap(&prev, &record);
stats_collect(record);
- stats_print(record, prev, action);
+ stats_print(record, prev, action, cfg_opt);
sleep(interval);
}
@@ -421,6 +437,7 @@
int main(int argc, char **argv)
{
+ __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
struct bpf_prog_load_attr prog_load_attr = {
.prog_type = BPF_PROG_TYPE_XDP,
@@ -435,6 +452,7 @@
int interval = 2;
__u32 key = 0;
+
char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
int action = XDP_PASS; /* Default action */
char *action_str = NULL;
@@ -496,6 +514,9 @@
action_str = (char *)&action_str_buf;
strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
break;
+ case 'r':
+ cfg_options |= READ_MEM;
+ break;
case 'h':
error:
default:
@@ -522,6 +543,7 @@
}
}
cfg.action = action;
+ cfg.options = cfg_options;
/* Trick to pretty printf with thousands separators use %' */
if (use_separators)
@@ -542,6 +564,6 @@
return EXIT_FAIL_XDP;
}
- stats_poll(interval, action);
+ stats_poll(interval, action, cfg_options);
return EXIT_OK;
}