Merge branch 'master' into net-next
diff --git a/include/libnetlink.h b/include/libnetlink.h
index 407440d..2280c39 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -42,6 +42,8 @@
int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req,
int len)
__attribute__((warn_unused_result));
+int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n)
+ __attribute__((warn_unused_result));
struct rtnl_ctrl_data {
int nsid;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3d6d00b..37ba601 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -272,6 +272,32 @@
BPF_FUNC_skb_get_tunnel_key,
BPF_FUNC_skb_set_tunnel_key,
BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */
+ /**
+ * bpf_redirect(ifindex, flags) - redirect to another netdev
+ * @ifindex: ifindex of the net device
+ * @flags: bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * Return: TC_ACT_REDIRECT
+ */
+ BPF_FUNC_redirect,
+
+ /**
+ * bpf_get_route_realm(skb) - retrieve a dst's tclassid
+ * @skb: pointer to skb
+ * Return: realm if != 0
+ */
+ BPF_FUNC_get_route_realm,
+
+ /**
+ * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
+ * @ctx: struct pt_regs*
+ * @map: pointer to perf_event_array map
+ * @index: index of event in the map
+ * @data: data on stack to be output as raw data
+ * @size: size of data
+ * Return: 0 on success
+ */
+ BPF_FUNC_perf_event_output,
__BPF_FUNC_MAX_ID,
};
@@ -293,6 +319,7 @@
__u32 tc_index;
__u32 cb[5];
__u32 hash;
+ __u32 tc_classid;
};
struct bpf_tunnel_key {
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index f24050b..ee197a3 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -127,6 +127,7 @@
#define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */
#define BRIDGE_VLAN_INFO_RANGE_BEGIN (1<<3) /* VLAN is start of vlan range */
#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */
+#define BRIDGE_VLAN_INFO_BRENTRY (1<<5) /* Global bridge VLAN entry */
struct bridge_vlan_info {
__u16 flags;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 1934566..288d3cd 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -230,11 +230,47 @@
IFLA_BR_PRIORITY,
IFLA_BR_VLAN_FILTERING,
IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
__IFLA_BR_MAX,
};
#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+struct ifla_bridge_id {
+ __u8 prio[2];
+ __u8 addr[6]; /* ETH_ALEN */
+};
+
enum {
BRIDGE_MODE_UNSPEC,
BRIDGE_MODE_HAIRPIN,
@@ -254,6 +290,19 @@
IFLA_BRPORT_PROXYARP, /* proxy ARP */
IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+ IFLA_BRPORT_ROOT_ID, /* designated root */
+ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */
+ IFLA_BRPORT_DESIGNATED_PORT,
+ IFLA_BRPORT_DESIGNATED_COST,
+ IFLA_BRPORT_ID,
+ IFLA_BRPORT_NO,
+ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ IFLA_BRPORT_CONFIG_PENDING,
+ IFLA_BRPORT_MESSAGE_AGE_TIMER,
+ IFLA_BRPORT_FORWARD_DELAY_TIMER,
+ IFLA_BRPORT_HOLD_TIMER,
+ IFLA_BRPORT_FLUSH,
+ IFLA_BRPORT_MULTICAST_ROUTER,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h
new file mode 100644
index 0000000..1d2f4f6
--- /dev/null
+++ b/include/linux/lwtunnel.h
@@ -0,0 +1,43 @@
+#ifndef _LWTUNNEL_H_
+#define _LWTUNNEL_H_
+
+#include <linux/types.h>
+
+enum lwtunnel_encap_types {
+ LWTUNNEL_ENCAP_NONE,
+ LWTUNNEL_ENCAP_MPLS,
+ LWTUNNEL_ENCAP_IP,
+ LWTUNNEL_ENCAP_ILA,
+ LWTUNNEL_ENCAP_IP6,
+ __LWTUNNEL_ENCAP_MAX,
+};
+
+#define LWTUNNEL_ENCAP_MAX (__LWTUNNEL_ENCAP_MAX - 1)
+
+enum lwtunnel_ip_t {
+ LWTUNNEL_IP_UNSPEC,
+ LWTUNNEL_IP_ID,
+ LWTUNNEL_IP_DST,
+ LWTUNNEL_IP_SRC,
+ LWTUNNEL_IP_TTL,
+ LWTUNNEL_IP_TOS,
+ LWTUNNEL_IP_FLAGS,
+ __LWTUNNEL_IP_MAX,
+};
+
+#define LWTUNNEL_IP_MAX (__LWTUNNEL_IP_MAX - 1)
+
+enum lwtunnel_ip6_t {
+ LWTUNNEL_IP6_UNSPEC,
+ LWTUNNEL_IP6_ID,
+ LWTUNNEL_IP6_DST,
+ LWTUNNEL_IP6_SRC,
+ LWTUNNEL_IP6_HOPLIMIT,
+ LWTUNNEL_IP6_TC,
+ LWTUNNEL_IP6_FLAGS,
+ __LWTUNNEL_IP6_MAX,
+};
+
+#define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1)
+
+#endif /* _LWTUNNEL_H_ */
diff --git a/include/linux/mpls_iptunnel.h b/include/linux/mpls_iptunnel.h
new file mode 100644
index 0000000..4132c3c
--- /dev/null
+++ b/include/linux/mpls_iptunnel.h
@@ -0,0 +1,28 @@
+/*
+ * mpls tunnel api
+ *
+ * Authors:
+ * Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_MPLS_IPTUNNEL_H
+#define _LINUX_MPLS_IPTUNNEL_H
+
+/* MPLS tunnel attributes
+ * [RTA_ENCAP] = {
+ * [MPLS_IPTUNNEL_DST]
+ * }
+ */
+enum {
+ MPLS_IPTUNNEL_UNSPEC,
+ MPLS_IPTUNNEL_DST,
+ __MPLS_IPTUNNEL_MAX,
+};
+#define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1)
+
+#endif /* _LINUX_MPLS_IPTUNNEL_H */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 352b5b8..8a7ca5c 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -54,6 +54,7 @@
#define NLM_F_ACK 4 /* Reply with ack, with zero or error code */
#define NLM_F_ECHO 8 /* Echo this request */
#define NLM_F_DUMP_INTR 16 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 32 /* Dump was filtered as requested */
/* Modifiers to GET request */
#define NLM_F_ROOT 0x100 /* specify tree root */
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 25af89f..a323146 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -33,6 +33,7 @@
#define TC_ACT_STOLEN 4
#define TC_ACT_QUEUED 5
#define TC_ACT_REPEAT 6
+#define TC_ACT_REDIRECT 7
#define TC_ACT_JUMP 0x10000000
/* Action type identifiers*/
@@ -319,6 +320,8 @@
/* BPF classifier */
+#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0)
+
enum {
TCA_BPF_UNSPEC,
TCA_BPF_ACT,
@@ -328,6 +331,7 @@
TCA_BPF_OPS,
TCA_BPF_FD,
TCA_BPF_NAME,
+ TCA_BPF_FLAGS,
__TCA_BPF_MAX,
};
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 3244947..18c543a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -270,6 +270,7 @@
#define RTM_F_CLONED 0x200 /* This route is cloned */
#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
/* Reserved table identifiers */
@@ -664,6 +665,7 @@
#define RTEXT_FILTER_VF (1 << 0)
#define RTEXT_FILTER_BRVLAN (1 << 1)
#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2)
+#define RTEXT_FILTER_SKIP_STATS (1 << 3)
/* End of information exported to user level */
diff --git a/include/utils.h b/include/utils.h
index f77edeb..1d35149 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -47,6 +47,7 @@
#define NEXT_ARG() do { argv++; if (--argc <= 0) incomplete_command(); } while(0)
#define NEXT_ARG_OK() (argc - 1 > 0)
+#define NEXT_ARG_FWD() do { argv++; argc--; } while(0)
#define PREV_ARG() do { argv--; argc++; } while(0)
typedef struct
@@ -191,6 +192,9 @@
__attribute__ ((format (printf, (pos_str), (pos_args))))
#endif
+#define htonll(x) ((1==htonl(1)) ? (x) : ((uint64_t)htonl((x) & 0xFFFFFFFF) << 32) | htonl((x) >> 32))
+#define ntohll(x) ((1==ntohl(1)) ? (x) : ((uint64_t)ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((x) >> 32))
+
extern int cmdlineno;
ssize_t getcmdline(char **line, size_t *len, FILE *in);
int makeargs(char *line, char *argv[], int maxargs);
diff --git a/ip/Makefile b/ip/Makefile
index 52b76ef..f3d2987 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -7,7 +7,7 @@
iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
- iplink_geneve.o iplink_vrf.o
+ iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o
RTMONOBJ=rtmon.o
diff --git a/ip/ipneigh.c b/ip/ipneigh.c
index ce57ede..5465584 100644
--- a/ip/ipneigh.c
+++ b/ip/ipneigh.c
@@ -39,6 +39,7 @@
char *flushb;
int flushp;
int flushe;
+ int master;
} filter;
static void usage(void) __attribute__((noreturn));
@@ -193,6 +194,7 @@
int len = n->nlmsg_len;
struct rtattr * tb[NDA_MAX+1];
char abuf[256];
+ static int logit = 1;
if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH &&
n->nlmsg_type != RTM_GETNEIGH) {
@@ -220,6 +222,14 @@
(r->ndm_family != AF_DECnet))
return 0;
+ if (filter.master && !(n->nlmsg_flags & NLM_F_DUMP_FILTERED)) {
+ if (logit) {
+ logit = 0;
+ fprintf(fp,
+ "\nWARNING: Kernel does not support filtering by master device\n\n");
+ }
+ }
+
parse_rtattr(tb, NDA_MAX, NDA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
if (tb[NDA_DST]) {
@@ -327,9 +337,18 @@
static int do_show_or_flush(int argc, char **argv, int flush)
{
+ struct {
+ struct nlmsghdr n;
+ struct ndmsg ndm;
+ char buf[256];
+ } req;
char *filter_dev = NULL;
int state_given = 0;
- struct ndmsg ndm = { 0 };
+
+ memset(&req, 0, sizeof(req));
+
+ req.n.nlmsg_type = RTM_GETNEIGH;
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
ipneigh_reset_filter(0);
@@ -351,6 +370,14 @@
if (filter_dev)
duparg("dev", *argv);
filter_dev = *argv;
+ } else if (strcmp(*argv, "master") == 0) {
+ int ifindex;
+ NEXT_ARG();
+ ifindex = ll_name_to_index(*argv);
+ if (!ifindex)
+ invarg("Device does not exist\n", *argv);
+ addattr32(&req.n, sizeof(req), NDA_MASTER, ifindex);
+ filter.master = ifindex;
} else if (strcmp(*argv, "unused") == 0) {
filter.unused_only = 1;
} else if (strcmp(*argv, "nud") == 0) {
@@ -371,7 +398,7 @@
state = 0x100;
filter.state |= state;
} else if (strcmp(*argv, "proxy") == 0)
- ndm.ndm_flags = NTF_PROXY;
+ req.ndm.ndm_flags = NTF_PROXY;
else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
@@ -392,6 +419,7 @@
fprintf(stderr, "Cannot find device \"%s\"\n", filter_dev);
return -1;
}
+ addattr32(&req.n, sizeof(req), NDA_IFINDEX, filter.index);
}
if (flush) {
@@ -436,9 +464,9 @@
return 1;
}
- ndm.ndm_family = filter.family;
+ req.ndm.ndm_family = filter.family;
- if (rtnl_dump_request(&rth, RTM_GETNEIGH, &ndm, sizeof(struct ndmsg)) < 0) {
+ if (rtnl_dump_request_n(&rth, &req.n) < 0) {
perror("Cannot send dump request");
exit(1);
}
diff --git a/ip/iproute.c b/ip/iproute.c
index eab512d..c0ef7bf 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -29,6 +29,7 @@
#include "rt_names.h"
#include "utils.h"
#include "ip_common.h"
+#include "iproute_lwtunnel.h"
#ifndef RTAX_RTTVAR
#define RTAX_RTTVAR RTAX_HOPS
@@ -76,7 +77,8 @@
fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n");
fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n");
fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
- fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
+ fprintf(stderr, "NH := [ encap ENCAPTYPE ENCAPHDR ] [ via [ FAMILY ] ADDRESS ]\n");
+ fprintf(stderr, " [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]\n");
fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n");
fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n");
@@ -95,6 +97,8 @@
fprintf(stderr, "TIME := NUMBER[s|ms]\n");
fprintf(stderr, "BOOL := [1|0]\n");
fprintf(stderr, "FEATURES := ecn\n");
+ fprintf(stderr, "ENCAPTYPE := [ mpls | ip | ip6 ]\n");
+ fprintf(stderr, "ENCAPHDR := [ MPLSLABEL ]\n");
exit(-1);
}
@@ -401,6 +405,10 @@
abuf, sizeof(abuf))
);
}
+
+ if (tb[RTA_ENCAP])
+ lwt_print_encap(fp, tb[RTA_ENCAP_TYPE], tb[RTA_ENCAP]);
+
if (r->rtm_tos && filter.tosmask != -1) {
SPRINT_BUF(b1);
fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
@@ -424,9 +432,9 @@
if (tb[RTA_OIF] && filter.oifmask != -1)
fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
+ if (table && (table != RT_TABLE_MAIN || show_details > 0) && !filter.tb)
+ fprintf(fp, " table %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
if (!(r->rtm_flags&RTM_F_CLONED)) {
- if ((table != RT_TABLE_MAIN || show_details > 0) && !filter.tb)
- fprintf(fp, " table %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
if ((r->rtm_protocol != RTPROT_BOOT || show_details > 0) && filter.protocolmask != -1)
fprintf(fp, " proto %s ", rtnl_rtprot_n2a(r->rtm_protocol, b1, sizeof(b1)));
if ((r->rtm_scope != RT_SCOPE_UNIVERSE || show_details > 0) && filter.scopemask != -1)
@@ -633,6 +641,12 @@
fprintf(fp, "%s\tnexthop", _SL_);
if (nh->rtnh_len > sizeof(*nh)) {
parse_rtattr(tb, RTA_MAX, RTNH_DATA(nh), nh->rtnh_len - sizeof(*nh));
+
+ if (tb[RTA_ENCAP])
+ lwt_print_encap(fp,
+ tb[RTA_ENCAP_TYPE],
+ tb[RTA_ENCAP]);
+
if (tb[RTA_GATEWAY]) {
fprintf(fp, " via %s ",
format_host(r->rtm_family,
@@ -704,9 +718,8 @@
return 0;
}
-
-static int parse_one_nh(struct rtmsg *r, struct rtattr *rta,
- struct rtnexthop *rtnh,
+static int parse_one_nh(struct nlmsghdr *n, struct rtmsg *r,
+ struct rtattr *rta, struct rtnexthop *rtnh,
int *argcp, char ***argvp)
{
int argc = *argcp;
@@ -753,6 +766,11 @@
invarg("\"realm\" value is invalid\n", *argv);
rta_addattr32(rta, 4096, RTA_FLOW, realm);
rtnh->rtnh_len += sizeof(struct rtattr) + 4;
+ } else if (strcmp(*argv, "encap") == 0) {
+ int len = rta->rta_len;
+
+ lwt_parse_encap(rta, 4096, &argc, &argv);
+ rtnh->rtnh_len += rta->rta_len - len;
} else
break;
}
@@ -784,7 +802,7 @@
memset(rtnh, 0, sizeof(*rtnh));
rtnh->rtnh_len = sizeof(*rtnh);
rta->rta_len += rtnh->rtnh_len;
- parse_one_nh(r, rta, rtnh, &argc, &argv);
+ parse_one_nh(n, r, rta, rtnh, &argc, &argv);
rtnh = RTNH_NEXT(rtnh);
}
@@ -1092,6 +1110,17 @@
else if (get_u8(&pref, *argv, 0))
invarg("\"pref\" value is invalid\n", *argv);
addattr8(&req.n, sizeof(req), RTA_PREF, pref);
+ } else if (strcmp(*argv, "encap") == 0) {
+ char buf[1024];
+ struct rtattr *rta = (void*)buf;
+
+ rta->rta_type = RTA_ENCAP;
+ rta->rta_len = RTA_LENGTH(0);
+
+ lwt_parse_encap(rta, sizeof(buf), &argc, &argv);
+
+ if (rta->rta_len > RTA_LENGTH(0))
+ addraw_l(&req.n, 1024, RTA_DATA(rta), RTA_PAYLOAD(rta));
} else {
int type;
inet_prefix dst;
@@ -1642,6 +1671,8 @@
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = AF_INET;
+ req.r.rtm_flags |= RTM_F_LOOKUP_TABLE;
+
if (rtnl_talk(&rth, &req.n, &req.n, sizeof(req)) < 0)
return -2;
diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c
new file mode 100644
index 0000000..63322a1
--- /dev/null
+++ b/ip/iproute_lwtunnel.c
@@ -0,0 +1,228 @@
+/*
+ * iproute_lwtunnel.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
+ * Thomas Graf <tgraf@suug.ch>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <linux/lwtunnel.h>
+#include <linux/mpls_iptunnel.h>
+#include <errno.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "iproute_lwtunnel.h"
+
+static int read_encap_type(const char *name)
+{
+ if (strcmp(name, "mpls") == 0)
+ return LWTUNNEL_ENCAP_MPLS;
+ else if (strcmp(name, "ip") == 0)
+ return LWTUNNEL_ENCAP_IP;
+ else if (strcmp(name, "ip6") == 0)
+ return LWTUNNEL_ENCAP_IP6;
+ else
+ return LWTUNNEL_ENCAP_NONE;
+}
+
+static const char *format_encap_type(int type)
+{
+ switch (type) {
+ case LWTUNNEL_ENCAP_MPLS:
+ return "mpls";
+ case LWTUNNEL_ENCAP_IP:
+ return "ip";
+ case LWTUNNEL_ENCAP_IP6:
+ return "ip6";
+ default:
+ return "unknown";
+ }
+}
+
+static void print_encap_mpls(FILE *fp, struct rtattr *encap)
+{
+ struct rtattr *tb[MPLS_IPTUNNEL_MAX+1];
+ char abuf[256];
+
+ parse_rtattr_nested(tb, MPLS_IPTUNNEL_MAX, encap);
+
+ if (tb[MPLS_IPTUNNEL_DST])
+ fprintf(fp, " %s ", format_host(AF_MPLS,
+ RTA_PAYLOAD(tb[MPLS_IPTUNNEL_DST]),
+ RTA_DATA(tb[MPLS_IPTUNNEL_DST]),
+ abuf, sizeof(abuf)));
+}
+
+static void print_encap_ip(FILE *fp, struct rtattr *encap)
+{
+ struct rtattr *tb[LWTUNNEL_IP_MAX+1];
+ char abuf[256];
+
+ parse_rtattr_nested(tb, LWTUNNEL_IP_MAX, encap);
+
+ if (tb[LWTUNNEL_IP_ID])
+ fprintf(fp, "id %llu ", ntohll(rta_getattr_u64(tb[LWTUNNEL_IP_ID])));
+
+ if (tb[LWTUNNEL_IP_SRC])
+ fprintf(fp, "src %s ",
+ rt_addr_n2a(AF_INET,
+ RTA_PAYLOAD(tb[LWTUNNEL_IP_SRC]),
+ RTA_DATA(tb[LWTUNNEL_IP_SRC]),
+ abuf, sizeof(abuf)));
+
+ if (tb[LWTUNNEL_IP_DST])
+ fprintf(fp, "dst %s ",
+ rt_addr_n2a(AF_INET,
+ RTA_PAYLOAD(tb[LWTUNNEL_IP_DST]),
+ RTA_DATA(tb[LWTUNNEL_IP_DST]),
+ abuf, sizeof(abuf)));
+
+ if (tb[LWTUNNEL_IP_TTL])
+ fprintf(fp, "ttl %d ", rta_getattr_u8(tb[LWTUNNEL_IP_TTL]));
+
+ if (tb[LWTUNNEL_IP_TOS])
+ fprintf(fp, "tos %d ", rta_getattr_u8(tb[LWTUNNEL_IP_TOS]));
+}
+
+void lwt_print_encap(FILE *fp, struct rtattr *encap_type,
+ struct rtattr *encap)
+{
+ int et;
+
+ if (!encap_type)
+ return;
+
+ et = rta_getattr_u16(encap_type);
+
+ fprintf(fp, " encap %s", format_encap_type(et));
+
+ switch (et) {
+ case LWTUNNEL_ENCAP_MPLS:
+ print_encap_mpls(fp, encap);
+ break;
+ case LWTUNNEL_ENCAP_IP:
+ print_encap_ip(fp, encap);
+ break;
+ }
+}
+
+static int parse_encap_mpls(struct rtattr *rta, size_t len, int *argcp, char ***argvp)
+{
+ inet_prefix addr;
+ int argc = *argcp;
+ char **argv = *argvp;
+
+ if (get_addr(&addr, *argv, AF_MPLS)) {
+ fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", *argv);
+ exit(1);
+ }
+
+ rta_addattr_l(rta, len, MPLS_IPTUNNEL_DST, &addr.data,
+ addr.bytelen);
+
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
+
+static int parse_encap_ip(struct rtattr *rta, size_t len, int *argcp, char ***argvp)
+{
+ int id_ok = 0, dst_ok = 0, tos_ok = 0, ttl_ok = 0;
+ char **argv = *argvp;
+ int argc = *argcp;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "id") == 0) {
+ __u64 id;
+ NEXT_ARG();
+ if (id_ok++)
+ duparg2("id", *argv);
+ if (get_u64(&id, *argv, 0))
+ invarg("\"id\" value is invalid\n", *argv);
+ rta_addattr64(rta, len, LWTUNNEL_IP_ID, htonll(id));
+ } else if (strcmp(*argv, "dst") == 0) {
+ inet_prefix addr;
+ NEXT_ARG();
+ if (dst_ok++)
+ duparg2("dst", *argv);
+ get_addr(&addr, *argv, AF_INET);
+ rta_addattr_l(rta, len, LWTUNNEL_IP_DST, &addr.data, addr.bytelen);
+ } else if (strcmp(*argv, "tos") == 0) {
+ __u32 tos;
+ NEXT_ARG();
+ if (tos_ok++)
+ duparg2("tos", *argv);
+ if (rtnl_dsfield_a2n(&tos, *argv))
+ invarg("\"tos\" value is invalid\n", *argv);
+ rta_addattr8(rta, len, LWTUNNEL_IP_TOS, tos);
+ } else if (strcmp(*argv, "ttl") == 0) {
+ __u8 ttl;
+ NEXT_ARG();
+ if (ttl_ok++)
+ duparg2("ttl", *argv);
+ if (get_u8(&ttl, *argv, 0))
+ invarg("\"ttl\" value is invalid\n", *argv);
+ rta_addattr8(rta, len, LWTUNNEL_IP_TTL, ttl);
+ } else {
+ break;
+ }
+ }
+
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
+
+
+int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp)
+{
+ struct rtattr *nest;
+ int argc = *argcp;
+ char **argv = *argvp;
+ __u16 type;
+
+ NEXT_ARG();
+ type = read_encap_type(*argv);
+ if (!type)
+ invarg("\"encap type\" value is invalid\n", *argv);
+
+ NEXT_ARG();
+ if (argc <= 1) {
+ fprintf(stderr, "Error: unexpected end of line after \"encap\"\n");
+ exit(-1);
+ }
+
+ nest = rta_nest(rta, 1024, RTA_ENCAP);
+ switch (type) {
+ case LWTUNNEL_ENCAP_MPLS:
+ parse_encap_mpls(rta, len, &argc, &argv);
+ break;
+ case LWTUNNEL_ENCAP_IP:
+ parse_encap_ip(rta, len, &argc, &argv);
+ break;
+ default:
+ fprintf(stderr, "Error: unsupported encap type\n");
+ break;
+ }
+ rta_nest_end(rta, nest);
+
+ rta_addattr16(rta, 1024, RTA_ENCAP_TYPE, type);
+
+ *argcp = argc;
+ *argvp = argv;
+
+ return 0;
+}
diff --git a/ip/iproute_lwtunnel.h b/ip/iproute_lwtunnel.h
new file mode 100644
index 0000000..b82b58a
--- /dev/null
+++ b/ip/iproute_lwtunnel.h
@@ -0,0 +1,8 @@
+#ifndef __LWTUNNEL_H__
+#define __LETUNNEL_H__ 1
+
+int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp);
+void lwt_print_encap(FILE *fp, struct rtattr *encap_type,
+ struct rtattr *encap);
+
+#endif
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 8430033..09b0e91 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -191,6 +191,27 @@
return sendmsg(rth->fd, &msg, 0);
}
+int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct iovec iov = {
+ .iov_base = (void*) n,
+ .iov_len = n->nlmsg_len
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ n->nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST;
+ n->nlmsg_pid = 0;
+ n->nlmsg_seq = rth->dump = ++rth->seq;
+
+ return sendmsg(rth->fd, &msg, 0);
+}
+
int rtnl_dump_filter_l(struct rtnl_handle *rth,
const struct rtnl_dump_filter_arg *arg)
{
diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in
index 72d8d77..9934a1e 100644
--- a/man/man8/ip-route.8.in
+++ b/man/man8/ip-route.8.in
@@ -80,6 +80,8 @@
.ti -8
.IR NH " := [ "
+.B encap
+.IR ENCAP " ] [ "
.B via
[
.IR FAMILY " ] " ADDRESS " ] [ "
@@ -164,6 +166,26 @@
.IR PREF " := [ "
.BR low " | " medium " | " high " ]"
+.ti -8
+.IR ENCAP " := [ "
+.IR MPLS " | " IP " ]"
+
+.ti -8
+.IR ENCAP_MPLS " := "
+.BR mpls " [ "
+.IR LABEL " ]"
+
+.ti -8
+.IR ENCAP_IP " := "
+.B ip
+.B id
+.IR TUNNEL_ID
+.B dst
+.IR REMOTE_IP " [ "
+.B tos
+.IR TOS " ] ["
+.B ttl
+.IR TTL " ]"
.SH DESCRIPTION
.B ip route
@@ -589,6 +611,48 @@
- the route has a highest priority
.sp
+.TP
+.BI encap " ENCAPTYPE ENCAPHDR"
+attach tunnel encapsulation attributes to this route.
+.sp
+.I ENCAPTYPE
+is a string specifying the supported encapsulation type. Namely:
+
+.in +8
+.BI mpls
+- encapsulation type MPLS
+.sp
+.BI ip
+- IP encapsulation (Geneve, GRE, VXLAN, ...)
+.sp
+
+.in -8
+.I ENCAPHDR
+is a set of encapsulation attributes specific to the
+.I ENCAPTYPE.
+
+.in +8
+.B mpls
+.in +2
+.I MPLSLABEL
+- mpls label stack with labels separated by
+.I "/"
+.in -2
+.sp
+
+.B ip
+.in +2
+.B id
+.I TUNNEL_ID
+.B dst
+.IR REMOTE_IP " [ "
+.B tos
+.IR TOS " ] ["
+.B ttl
+.IR TTL " ]"
+.in -2
+.sp
+
.in -8
.RE
@@ -847,7 +911,11 @@
Adds a default route (for all addresses) via the local gateway 192.168.1.1 that can
be reached on device eth0.
.RE
-
+.PP
+ip route add 10.1.1.0/30 encap mpls 200/300 via 10.1.1.1 dev eth0
+.RS 4
+Adds an ipv4 route with mpls encapsulation attributes attached to it.
+.RE
.SH SEE ALSO
.br
.BR ip (8)
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index 490dc6b..ac77af5 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -41,7 +41,7 @@
fprintf(stderr, "\n");
fprintf(stderr, "eBPF use case:\n");
fprintf(stderr, " object-file FILE [ section CLS_NAME ] [ export UDS_FILE ]");
- fprintf(stderr, " [ verbose ]\n");
+ fprintf(stderr, " [ verbose ] [ direct-action ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Common remaining options:\n");
fprintf(stderr, " [ action ACTION_SPEC ]\n");
@@ -69,6 +69,7 @@
struct tcmsg *t = NLMSG_DATA(n);
const char *bpf_uds_name = NULL;
const char *bpf_sec_name = NULL;
+ unsigned int bpf_flags = 0;
char *bpf_obj = NULL;
struct rtattr *tail;
bool seen_run = false;
@@ -124,25 +125,28 @@
if (ebpf) {
bpf_uds_name = getenv(BPF_ENV_UDS);
bpf_obj = *argv;
- NEXT_ARG();
- if (strcmp(*argv, "section") == 0 ||
- strcmp(*argv, "sec") == 0) {
+ NEXT_ARG_FWD();
+
+ if (argc > 0 &&
+ (strcmp(*argv, "section") == 0 ||
+ strcmp(*argv, "sec") == 0)) {
NEXT_ARG();
bpf_sec_name = *argv;
- NEXT_ARG();
+ NEXT_ARG_FWD();
}
- if (!bpf_uds_name &&
+ if (argc > 0 && !bpf_uds_name &&
(strcmp(*argv, "export") == 0 ||
strcmp(*argv, "exp") == 0)) {
NEXT_ARG();
bpf_uds_name = *argv;
- NEXT_ARG();
+ NEXT_ARG_FWD();
}
- if (strcmp(*argv, "verbose") == 0 ||
- strcmp(*argv, "verb") == 0) {
+ if (argc > 0 &&
+ (strcmp(*argv, "verbose") == 0 ||
+ strcmp(*argv, "verb") == 0)) {
bpf_verbose = true;
- NEXT_ARG();
+ NEXT_ARG_FWD();
}
PREV_ARG();
@@ -182,7 +186,10 @@
fprintf(stderr, "Illegal \"classid\"\n");
return -1;
}
- addattr_l(n, MAX_MSG, TCA_BPF_CLASSID, &handle, 4);
+ addattr32(n, MAX_MSG, TCA_BPF_CLASSID, handle);
+ } else if (matches(*argv, "direct-action") == 0 ||
+ matches(*argv, "da") == 0) {
+ bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
} else if (matches(*argv, "action") == 0) {
NEXT_ARG();
if (parse_action(&argc, &argv, TCA_BPF_ACT, n)) {
@@ -208,10 +215,13 @@
explain();
return -1;
}
- argc--;
- argv++;
+
+ NEXT_ARG_FWD();
}
+ if (bpf_obj && bpf_flags)
+ addattr32(n, MAX_MSG, TCA_BPF_FLAGS, bpf_flags);
+
tail->rta_len = (((void *)n) + n->nlmsg_len) - (void *)tail;
if (bpf_uds_name)
@@ -244,6 +254,13 @@
else if (tb[TCA_BPF_FD])
fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
+ if (tb[TCA_BPF_FLAGS]) {
+ unsigned int flags = rta_getattr_u32(tb[TCA_BPF_FLAGS]);
+
+ if (flags & TCA_BPF_FLAG_ACT_DIRECT)
+ fprintf(f, "direct-action ");
+ }
+
if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) {
bpf_print_ops(f, tb[TCA_BPF_OPS],
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
diff --git a/tc/m_bpf.c b/tc/m_bpf.c
index e1bb6a4..fb4c3c7 100644
--- a/tc/m_bpf.c
+++ b/tc/m_bpf.c
@@ -111,25 +111,28 @@
if (ebpf) {
bpf_uds_name = getenv(BPF_ENV_UDS);
bpf_obj = *argv;
- NEXT_ARG();
- if (strcmp(*argv, "section") == 0 ||
- strcmp(*argv, "sec") == 0) {
+ NEXT_ARG_FWD();
+
+ if (argc > 0 &&
+ (strcmp(*argv, "section") == 0 ||
+ strcmp(*argv, "sec") == 0)) {
NEXT_ARG();
bpf_sec_name = *argv;
- NEXT_ARG();
+ NEXT_ARG_FWD();
}
- if (!bpf_uds_name &&
+ if (argc > 0 && !bpf_uds_name &&
(strcmp(*argv, "export") == 0 ||
strcmp(*argv, "exp") == 0)) {
NEXT_ARG();
bpf_uds_name = *argv;
- NEXT_ARG();
+ NEXT_ARG_FWD();
}
- if (strcmp(*argv, "verbose") == 0 ||
- strcmp(*argv, "verb") == 0) {
+ if (argc > 0 &&
+ (strcmp(*argv, "verbose") == 0 ||
+ strcmp(*argv, "verb") == 0)) {
bpf_verbose = true;
- NEXT_ARG();
+ NEXT_ARG_FWD();
}
PREV_ARG();
@@ -166,33 +169,29 @@
goto opt_bpf;
break;
}
- argc--;
- argv++;
+
+ NEXT_ARG_FWD();
}
parm.action = TC_ACT_PIPE;
if (argc) {
if (matches(*argv, "reclassify") == 0) {
parm.action = TC_ACT_RECLASSIFY;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
} else if (matches(*argv, "pipe") == 0) {
parm.action = TC_ACT_PIPE;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
} else if (matches(*argv, "drop") == 0 ||
matches(*argv, "shot") == 0) {
parm.action = TC_ACT_SHOT;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
} else if (matches(*argv, "continue") == 0) {
parm.action = TC_ACT_UNSPEC;
- argc--;
- argv++;
- } else if (matches(*argv, "pass") == 0) {
+ NEXT_ARG_FWD();
+ } else if (matches(*argv, "pass") == 0 ||
+ matches(*argv, "ok") == 0) {
parm.action = TC_ACT_OK;
- argc--;
- argv++;
+ NEXT_ARG_FWD();
}
}
@@ -203,8 +202,8 @@
fprintf(stderr, "bpf: Illegal \"index\"\n");
return -1;
}
- argc--;
- argv++;
+
+ NEXT_ARG_FWD();
}
}