Merge branch 'master' into net-next
Conflicts:
man/man8/ip-route.8.in
diff --git a/Makefile b/Makefile
index 9dbb29f..ca6c2e1 100644
--- a/Makefile
+++ b/Makefile
@@ -26,6 +26,9 @@
#options for ipx
ADDLIB+=ipx_ntop.o ipx_pton.o
+#options for mpls
+ADDLIB+=mpls_ntop.o mpls_pton.o
+
CC = gcc
HOSTCC = gcc
DEFINES += -D_GNU_SOURCE
diff --git a/configure b/configure
index 631938e..7bec8a9 100755
--- a/configure
+++ b/configure
@@ -266,6 +266,29 @@
rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest
}
+check_elf()
+{
+ cat >$TMPDIR/elftest.c <<EOF
+#include <libelf.h>
+#include <gelf.h>
+int main(void)
+{
+ Elf_Scn *scn;
+ GElf_Shdr shdr;
+ return elf_version(EV_CURRENT);
+}
+EOF
+
+ if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
+ then
+ echo "TC_CONFIG_ELF:=y" >>Config
+ echo "yes"
+ else
+ echo "no"
+ fi
+ rm -f $TMPDIR/elftest.c $TMPDIR/elftest
+}
+
check_selinux()
# SELinux is a compile time option in the ss utility
{
@@ -306,5 +329,8 @@
echo -n "SELinux support: "
check_selinux
+echo -n "ELF support: "
+check_elf
+
echo -e "\nDocs"
check_docs
diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex
index e7a79a5..ea14795 100644
--- a/doc/ip-cref.tex
+++ b/doc/ip-cref.tex
@@ -1432,6 +1432,17 @@
even if it does not match any interface prefix. One application of this
option may be found in~\cite{IP-TUNNELS}.
+\item \verb|pref PREF|
+
+--- the IPv6 route preference.
+\verb|PREF| PREF is a string specifying the route preference as defined in
+RFC4191 for Router Discovery messages. Namely:
+\begin{itemize}
+\item \verb|low| --- the route has a lowest priority.
+\item \verb|medium| --- the route has a default priority.
+\item \verb|high| --- the route has a highest priority.
+\end{itemize}
+
\end{itemize}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index 0000000..77f33a6
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,188 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __LINUX_BPF_H__
+#define __LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* BPF syscall commands */
+enum bpf_cmd {
+ /* create a map with given type and attributes
+ * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
+ * returns fd or negative error
+ * map is deleted when fd is closed
+ */
+ BPF_MAP_CREATE,
+
+ /* lookup key in a given map
+ * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->value
+ * returns zero and stores found elem into value
+ * or negative error
+ */
+ BPF_MAP_LOOKUP_ELEM,
+
+ /* create or update key/value pair in a given map
+ * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->value, attr->flags
+ * returns zero or negative error
+ */
+ BPF_MAP_UPDATE_ELEM,
+
+ /* find and delete elem by key in a given map
+ * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key
+ * returns zero or negative error
+ */
+ BPF_MAP_DELETE_ELEM,
+
+ /* lookup key in a given map and return next key
+ * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->next_key
+ * returns zero and stores next key or negative error
+ */
+ BPF_MAP_GET_NEXT_KEY,
+
+ /* verify and load eBPF program
+ * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
+ * Using attr->prog_type, attr->insns, attr->license
+ * returns fd or negative error
+ */
+ BPF_PROG_LOAD,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+};
+
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+};
+
+#define BPF_PSEUDO_MAP_FD 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ };
+} __attribute__((aligned(8)));
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+ BPF_FUNC_unspec,
+ BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
+ BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
+ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+ __BPF_FUNC_MAX_ID,
+};
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+};
+
+#endif /* __LINUX_BPF_H__ */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8688a98..344781d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -77,7 +77,8 @@
#define SKF_AD_VLAN_TAG_PRESENT 48
#define SKF_AD_PAY_OFFSET 52
#define SKF_AD_RANDOM 56
-#define SKF_AD_MAX 60
+#define SKF_AD_VLAN_TPID 60
+#define SKF_AD_MAX 64
#define SKF_NET_OFF (-0x100000)
#define SKF_LL_OFF (-0x200000)
diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index cc375e4..26f0ecf 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -50,6 +50,8 @@
#define IFA_F_PERMANENT 0x80
#define IFA_F_MANAGETEMPADDR 0x100
#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
struct ifa_cacheinfo {
__u32 ifa_prefered;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 3450c3f..6689e8f 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -147,6 +147,7 @@
IFLA_CARRIER_CHANGES,
IFLA_PHYS_SWITCH_ID,
IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
__IFLA_MAX
};
@@ -213,6 +214,7 @@
enum in6_addr_gen_mode {
IN6_ADDR_GEN_MODE_EUI64,
IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
};
/* Bridge section */
@@ -222,6 +224,9 @@
IFLA_BR_FORWARD_DELAY,
IFLA_BR_HELLO_TIME,
IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
__IFLA_BR_MAX,
};
@@ -245,6 +250,7 @@
IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
IFLA_BRPORT_PROXYARP, /* proxy ARP */
IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/linux/mpls.h b/include/linux/mpls.h
new file mode 100644
index 0000000..0893902
--- /dev/null
+++ b/include/linux/mpls.h
@@ -0,0 +1,34 @@
+#ifndef _MPLS_H
+#define _MPLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* Reference: RFC 5462, RFC 3032
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Label | TC |S| TTL |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Label: Label Value, 20 bits
+ * TC: Traffic Class field, 3 bits
+ * S: Bottom of Stack, 1 bit
+ * TTL: Time to Live, 8 bits
+ */
+
+struct mpls_label {
+ __be32 entry;
+};
+
+#define MPLS_LS_LABEL_MASK 0xFFFFF000
+#define MPLS_LS_LABEL_SHIFT 12
+#define MPLS_LS_TC_MASK 0x00000E00
+#define MPLS_LS_TC_SHIFT 9
+#define MPLS_LS_S_MASK 0x00000100
+#define MPLS_LS_S_SHIFT 8
+#define MPLS_LS_TTL_MASK 0x000000FF
+#define MPLS_LS_TTL_SHIFT 0
+
+#endif /* _MPLS_H */
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index 3873a35..2e35c61 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -126,6 +126,7 @@
NDTPA_PROXY_QLEN, /* u32 */
NDTPA_LOCKTIME, /* u64, msecs */
NDTPA_QUEUE_LENBYTES, /* u32 */
+ NDTPA_MCAST_REPROBES, /* u32 */
__NDTPA_MAX
};
#define NDTPA_MAX (__NDTPA_MAX - 1)
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 25731df..bf08e76 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -397,6 +397,8 @@
TCA_BPF_CLASSID,
TCA_BPF_OPS_LEN,
TCA_BPF_OPS,
+ TCA_BPF_FD,
+ TCA_BPF_NAME,
__TCA_BPF_MAX,
};
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 3eb7810..28650a3 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -303,6 +303,9 @@
RTA_TABLE,
RTA_MARK,
RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
__RTA_MAX
};
@@ -332,6 +335,7 @@
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_EXTERNAL 8 /* Route installed externally */
/* Macros to handle hexthops */
@@ -344,6 +348,12 @@
#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[0];
+};
+
/* RTM_CACHEINFO */
struct rta_cacheinfo {
@@ -621,6 +631,8 @@
#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
RTNLGRP_MDB,
#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h
index 5288bd7..07f17cc 100644
--- a/include/linux/tc_act/tc_bpf.h
+++ b/include/linux/tc_act/tc_bpf.h
@@ -24,6 +24,8 @@
TCA_ACT_BPF_PARMS,
TCA_ACT_BPF_OPS_LEN,
TCA_ACT_BPF_OPS,
+ TCA_ACT_BPF_FD,
+ TCA_ACT_BPF_NAME,
__TCA_ACT_BPF_MAX,
};
#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
diff --git a/include/rt_names.h b/include/rt_names.h
index c0ea4f9..921be06 100644
--- a/include/rt_names.h
+++ b/include/rt_names.h
@@ -22,7 +22,7 @@
const char * ll_type_n2a(int type, char *buf, int len);
-const char *ll_addr_n2a(unsigned char *addr, int alen,
+const char *ll_addr_n2a(const unsigned char *addr, int alen,
int type, char *buf, int blen);
int ll_addr_a2n(char *lladdr, int len, const char *arg);
diff --git a/include/utils.h b/include/utils.h
index 9151c4f..c21b59c 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -50,10 +50,11 @@
typedef struct
{
- __u8 family;
- __u8 bytelen;
+ __u16 flags;
+ __u16 bytelen;
__s16 bitlen;
- __u32 flags;
+ /* These next two fields match rtvia */
+ __u16 family;
__u32 data[8];
} inet_prefix;
@@ -77,6 +78,13 @@
u_int8_t ipx_node[IPX_NODE_LEN];
};
+#ifndef AF_MPLS
+# define AF_MPLS 28
+#endif
+
+/* Maximum number of labels the mpls helpers support */
+#define MPLS_MAX_LABELS 8
+
extern __u32 get_addr32(const char *name);
extern int get_addr_1(inet_prefix *dst, const char *arg, int family);
extern int get_prefix_1(inet_prefix *dst, char *arg, int family);
@@ -106,9 +114,12 @@
extern const char *format_host(int af, int len, const void *addr,
char *buf, int buflen);
-extern const char *rt_addr_n2a(int af, const void *addr,
+extern const char *rt_addr_n2a(int af, int len, const void *addr,
char *buf, int buflen);
+extern int read_family(const char *name);
+extern const char *family_name(int family);
+
void missarg(const char *) __attribute__((noreturn));
void invarg(const char *, const char *) __attribute__((noreturn));
void duparg(const char *, const char *) __attribute__((noreturn));
@@ -122,6 +133,9 @@
const char *ipx_ntop(int af, const void *addr, char *str, size_t len);
int ipx_pton(int af, const char *src, void *addr);
+const char *mpls_ntop(int af, const void *addr, char *str, size_t len);
+int mpls_pton(int af, const char *src, void *addr);
+
extern int __iproute2_hz_internal;
extern int __get_hz(void);
@@ -157,6 +171,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#ifndef __check_format_string
+# define __check_format_string(pos_str, pos_args) \
+ __attribute__ ((format (printf, (pos_str), (pos_args))))
+#endif
+
extern int cmdlineno;
extern ssize_t getcmdline(char **line, size_t *len, FILE *in);
extern int makeargs(char *line, char *argv[], int maxargs);
diff --git a/ip/ip.c b/ip/ip.c
index da16b15..f7f214b 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -52,7 +52,7 @@
" netns | l2tp | fou | tcp_metrics | token | netconf }\n"
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
" -h[uman-readable] | -iec |\n"
-" -f[amily] { inet | inet6 | ipx | dnet | bridge | link } |\n"
+" -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
" -4 | -6 | -I | -D | -B | -0 |\n"
" -l[oops] { maximum-addr-flush-attempts } |\n"
" -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n"
@@ -190,21 +190,11 @@
argv++;
if (argc <= 1)
usage();
- if (strcmp(argv[1], "inet") == 0)
- preferred_family = AF_INET;
- else if (strcmp(argv[1], "inet6") == 0)
- preferred_family = AF_INET6;
- else if (strcmp(argv[1], "dnet") == 0)
- preferred_family = AF_DECnet;
- else if (strcmp(argv[1], "link") == 0)
- preferred_family = AF_PACKET;
- else if (strcmp(argv[1], "ipx") == 0)
- preferred_family = AF_IPX;
- else if (strcmp(argv[1], "bridge") == 0)
- preferred_family = AF_BRIDGE;
- else if (strcmp(argv[1], "help") == 0)
+ if (strcmp(argv[1], "help") == 0)
usage();
else
+ preferred_family = read_family(argv[1]);
+ if (preferred_family == AF_UNSPEC)
invarg("invalid protocol family", argv[1]);
} else if (strcmp(opt, "-4") == 0) {
preferred_family = AF_INET;
@@ -216,6 +206,8 @@
preferred_family = AF_IPX;
} else if (strcmp(opt, "-D") == 0) {
preferred_family = AF_DECnet;
+ } else if (strcmp(opt, "-M") == 0) {
+ preferred_family = AF_MPLS;
} else if (strcmp(opt, "-B") == 0) {
preferred_family = AF_BRIDGE;
} else if (matches(opt, "-human") == 0 ||
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 99a6ab5..e582da0 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -85,7 +85,7 @@
fprintf(stderr, " [-]tentative | [-]deprecated | [-]dadfailed | temporary |\n");
fprintf(stderr, " CONFFLAG-LIST ]\n");
fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n");
- fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n");
+ fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n");
fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n");
fprintf(stderr, "LFT := forever | SECONDS\n");
@@ -915,6 +915,10 @@
ifa_flags &= ~IFA_F_NOPREFIXROUTE;
fprintf(fp, "noprefixroute ");
}
+ if (ifa_flags & IFA_F_MCAUTOJOIN) {
+ ifa_flags &= ~IFA_F_MCAUTOJOIN;
+ fprintf(fp, "autojoin ");
+ }
if (!(ifa_flags & IFA_F_PERMANENT)) {
fprintf(fp, "dynamic ");
} else
@@ -1354,6 +1358,9 @@
} else if (strcmp(*argv, "noprefixroute") == 0) {
filter.flags |= IFA_F_NOPREFIXROUTE;
filter.flagmask |= IFA_F_NOPREFIXROUTE;
+ } else if (strcmp(*argv, "autojoin") == 0) {
+ filter.flags |= IFA_F_MCAUTOJOIN;
+ filter.flagmask |= IFA_F_MCAUTOJOIN;
} else if (strcmp(*argv, "dadfailed") == 0) {
filter.flags |= IFA_F_DADFAILED;
filter.flagmask |= IFA_F_DADFAILED;
@@ -1558,6 +1565,16 @@
return 0;
}
+static bool ipaddr_is_multicast(inet_prefix *a)
+{
+ if (a->family == AF_INET)
+ return IN_MULTICAST(ntohl(a->data[0]));
+ else if (a->family == AF_INET6)
+ return IN6_IS_ADDR_MULTICAST(a->data);
+ else
+ return false;
+}
+
static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
{
struct {
@@ -1665,6 +1682,8 @@
ifa_flags |= IFA_F_MANAGETEMPADDR;
} else if (strcmp(*argv, "noprefixroute") == 0) {
ifa_flags |= IFA_F_NOPREFIXROUTE;
+ } else if (strcmp(*argv, "autojoin") == 0) {
+ ifa_flags |= IFA_F_MCAUTOJOIN;
} else {
if (strcmp(*argv, "local") == 0) {
NEXT_ARG();
@@ -1755,6 +1774,11 @@
sizeof(cinfo));
}
+ if ((ifa_flags & IFA_F_MCAUTOJOIN) && !ipaddr_is_multicast(&lcl)) {
+ fprintf(stderr, "autojoin needs multicast address\n");
+ return -1;
+ }
+
if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
return -2;
diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c
index 3009ec9..a573f92 100644
--- a/ip/iplink_bond.c
+++ b/ip/iplink_bond.c
@@ -415,6 +415,7 @@
if (iptb[i])
fprintf(f, "%s",
rt_addr_n2a(AF_INET,
+ RTA_PAYLOAD(iptb[i]),
RTA_DATA(iptb[i]),
buf,
INET_ADDRSTRLEN));
diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c
index 6b5e665..7833a26 100644
--- a/ip/ipmonitor.c
+++ b/ip/ipmonitor.c
@@ -158,6 +158,7 @@
groups |= nl_mgrp(RTNLGRP_IPV6_IFADDR);
groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+ groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
groups |= nl_mgrp(RTNLGRP_IPV4_MROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_MROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_PREFIX);
@@ -235,6 +236,8 @@
groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
if (!preferred_family || preferred_family == AF_INET6)
groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+ if (!preferred_family || preferred_family == AF_MPLS)
+ groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
}
if (lmroute) {
if (!preferred_family || preferred_family == AF_INET)
diff --git a/ip/ipmroute.c b/ip/ipmroute.c
index b4ed9f1..13ac892 100644
--- a/ip/ipmroute.c
+++ b/ip/ipmroute.c
@@ -116,6 +116,7 @@
if (tb[RTA_SRC])
len = snprintf(obuf, sizeof(obuf),
"(%s, ", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[RTA_SRC]),
RTA_DATA(tb[RTA_SRC]),
abuf, sizeof(abuf)));
else
@@ -123,6 +124,7 @@
if (tb[RTA_DST])
snprintf(obuf + len, sizeof(obuf) - len,
"%s)", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[RTA_DST]),
RTA_DATA(tb[RTA_DST]),
abuf, sizeof(abuf)));
else
diff --git a/ip/ipprefix.c b/ip/ipprefix.c
index 02c0efc..26b5961 100644
--- a/ip/ipprefix.c
+++ b/ip/ipprefix.c
@@ -80,7 +80,9 @@
pfx = (struct in6_addr *)RTA_DATA(tb[PREFIX_ADDRESS]);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "%s", rt_addr_n2a(family, pfx,
+ fprintf(fp, "%s", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[PREFIX_ADDRESS]),
+ pfx,
abuf, sizeof(abuf)));
}
fprintf(fp, "/%u ", prefix->prefix_len);
diff --git a/ip/iproute.c b/ip/iproute.c
index 024d401..132a83a 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -23,6 +23,7 @@
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <linux/in_route.h>
+#include <linux/icmpv6.h>
#include <errno.h>
#include "rt_names.h"
@@ -75,19 +76,22 @@
fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n");
fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n");
fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
- fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
- fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
+ fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
+ fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]");
+ fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n");
fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n");
fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n");
fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n");
fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n");
fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n");
+ fprintf(stderr, " [ pref PREF ]\n");
fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n");
fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n");
fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n");
fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n");
+ fprintf(stderr, "PREF := [ low | medium | high ]\n");
fprintf(stderr, "TIME := NUMBER[s|ms]\n");
fprintf(stderr, "BOOL := [1|0]\n");
fprintf(stderr, "FEATURES := ecn\n");
@@ -185,8 +189,15 @@
(r->rtm_family != filter.msrc.family ||
(filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len)))
return 0;
- if (filter.rvia.family && r->rtm_family != filter.rvia.family)
- return 0;
+ if (filter.rvia.family) {
+ int family = r->rtm_family;
+ if (tb[RTA_VIA]) {
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ family = via->rtvia_family;
+ }
+ if (family != filter.rvia.family)
+ return 0;
+ }
if (filter.rprefsrc.family && r->rtm_family != filter.rprefsrc.family)
return 0;
@@ -205,6 +216,12 @@
via.family = r->rtm_family;
if (tb[RTA_GATEWAY])
memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8);
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *rtvia = RTA_DATA(tb[RTA_VIA]);
+ via.family = rtvia->rtvia_family;
+ memcpy(&via.data, rtvia->rtvia_addr, len);
+ }
}
if (filter.rprefsrc.bitlen>0) {
memset(&prefsrc, 0, sizeof(prefsrc));
@@ -339,8 +356,9 @@
if (tb[RTA_DST]) {
if (r->rtm_dst_len != host_len) {
fprintf(fp, "%s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[RTA_DST]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[RTA_DST]),
+ RTA_DATA(tb[RTA_DST]),
+ abuf, sizeof(abuf)),
r->rtm_dst_len
);
} else {
@@ -358,8 +376,9 @@
if (tb[RTA_SRC]) {
if (r->rtm_src_len != host_len) {
fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[RTA_SRC]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[RTA_SRC]),
+ RTA_DATA(tb[RTA_SRC]),
+ abuf, sizeof(abuf)),
r->rtm_src_len
);
} else {
@@ -372,6 +391,13 @@
} else if (r->rtm_src_len) {
fprintf(fp, "from 0/%u ", r->rtm_src_len);
}
+ if (tb[RTA_NEWDST]) {
+ fprintf(fp, "as to %s ", format_host(r->rtm_family,
+ RTA_PAYLOAD(tb[RTA_NEWDST]),
+ RTA_DATA(tb[RTA_NEWDST]),
+ abuf, sizeof(abuf))
+ );
+ }
if (r->rtm_tos && filter.tosmask != -1) {
SPRINT_BUF(b1);
fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
@@ -384,6 +410,14 @@
RTA_DATA(tb[RTA_GATEWAY]),
abuf, sizeof(abuf)));
}
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ fprintf(fp, "via %s %s ",
+ family_name(via->rtvia_family),
+ format_host(via->rtvia_family, len, via->rtvia_addr,
+ abuf, sizeof(abuf)));
+ }
if (tb[RTA_OIF] && filter.oifmask != -1)
fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
@@ -401,6 +435,7 @@
*/
fprintf(fp, " src %s ",
rt_addr_n2a(r->rtm_family,
+ RTA_PAYLOAD(tb[RTA_PREFSRC]),
RTA_DATA(tb[RTA_PREFSRC]),
abuf, sizeof(abuf)));
}
@@ -412,6 +447,8 @@
fprintf(fp, "onlink ");
if (r->rtm_flags & RTNH_F_PERVASIVE)
fprintf(fp, "pervasive ");
+ if (r->rtm_flags & RTNH_F_EXTERNAL)
+ fprintf(fp, "external ");
if (r->rtm_flags & RTM_F_NOTIFY)
fprintf(fp, "notify ");
if (tb[RTA_MARK]) {
@@ -598,6 +635,14 @@
RTA_DATA(tb[RTA_GATEWAY]),
abuf, sizeof(abuf)));
}
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ fprintf(fp, "via %s %s ",
+ family_name(via->rtvia_family),
+ format_host(via->rtvia_family, len, via->rtvia_addr,
+ abuf, sizeof(abuf)));
+ }
if (tb[RTA_FLOW]) {
__u32 to = rta_getattr_u32(tb[RTA_FLOW]);
__u32 from = to>>16;
@@ -629,6 +674,24 @@
nh = RTNH_NEXT(nh);
}
}
+ if (tb[RTA_PREF]) {
+ unsigned int pref = rta_getattr_u8(tb[RTA_PREF]);
+ fprintf(fp, " pref ");
+
+ switch (pref) {
+ case ICMPV6_ROUTER_PREF_LOW:
+ fprintf(fp, "low");
+ break;
+ case ICMPV6_ROUTER_PREF_MEDIUM:
+ fprintf(fp, "medium");
+ break;
+ case ICMPV6_ROUTER_PREF_HIGH:
+ fprintf(fp, "high");
+ break;
+ default:
+ fprintf(fp, "%u", pref);
+ }
+ }
fprintf(fp, "\n");
fflush(fp);
return 0;
@@ -645,12 +708,23 @@
while (++argv, --argc > 0) {
if (strcmp(*argv, "via") == 0) {
inet_prefix addr;
+ int family;
NEXT_ARG();
- get_addr(&addr, *argv, r->rtm_family);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = r->rtm_family;
+ else
+ NEXT_ARG();
+ get_addr(&addr, *argv, family);
if (r->rtm_family == AF_UNSPEC)
r->rtm_family = addr.family;
- rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
- rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+ if (addr.family == r->rtm_family) {
+ rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
+ rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+ } else {
+ rta_addattr_l(rta, 4096, RTA_VIA, &addr.family, addr.bytelen+2);
+ rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen+2;
+ }
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) {
@@ -756,14 +830,33 @@
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = addr.family;
addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen);
- } else if (strcmp(*argv, "via") == 0) {
+ } else if (strcmp(*argv, "as") == 0) {
inet_prefix addr;
- gw_ok = 1;
NEXT_ARG();
+ if (strcmp(*argv, "to") == 0) {
+ NEXT_ARG();
+ }
get_addr(&addr, *argv, req.r.rtm_family);
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = addr.family;
- addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+ addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen);
+ } else if (strcmp(*argv, "via") == 0) {
+ inet_prefix addr;
+ int family;
+ gw_ok = 1;
+ NEXT_ARG();
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = req.r.rtm_family;
+ else
+ NEXT_ARG();
+ get_addr(&addr, *argv, family);
+ if (req.r.rtm_family == AF_UNSPEC)
+ req.r.rtm_family = addr.family;
+ if (addr.family == req.r.rtm_family)
+ addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+ else
+ addattr_l(&req.n, sizeof(req), RTA_VIA, &addr.family, addr.bytelen+2);
} else if (strcmp(*argv, "from") == 0) {
inet_prefix addr;
NEXT_ARG();
@@ -782,7 +875,7 @@
req.r.rtm_tos = tos;
} else if (matches(*argv, "metric") == 0 ||
matches(*argv, "priority") == 0 ||
- matches(*argv, "preference") == 0) {
+ strcmp(*argv, "preference") == 0) {
__u32 metric;
NEXT_ARG();
if (get_u32(&metric, *argv, 0))
@@ -979,6 +1072,18 @@
strcmp(*argv, "oif") == 0) {
NEXT_ARG();
d = *argv;
+ } else if (matches(*argv, "pref") == 0) {
+ __u8 pref;
+ NEXT_ARG();
+ if (strcmp(*argv, "low") == 0)
+ pref = ICMPV6_ROUTER_PREF_LOW;
+ else if (strcmp(*argv, "medium") == 0)
+ pref = ICMPV6_ROUTER_PREF_MEDIUM;
+ else if (strcmp(*argv, "high") == 0)
+ pref = ICMPV6_ROUTER_PREF_HIGH;
+ else if (get_u8(&pref, *argv, 0))
+ invarg("\"pref\" value is invalid\n", *argv);
+ addattr8(&req.n, sizeof(req), RTA_PREF, pref);
} else {
int type;
inet_prefix dst;
@@ -1248,8 +1353,14 @@
get_unsigned(&mark, *argv, 0);
filter.markmask = -1;
} else if (strcmp(*argv, "via") == 0) {
+ int family;
NEXT_ARG();
- get_prefix(&filter.rvia, *argv, do_ipv6);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = do_ipv6;
+ else
+ NEXT_ARG();
+ get_prefix(&filter.rvia, *argv, family);
} else if (strcmp(*argv, "src") == 0) {
NEXT_ARG();
get_prefix(&filter.rprefsrc, *argv, do_ipv6);
@@ -1551,6 +1662,8 @@
tb[RTA_OIF]->rta_type = 0;
if (tb[RTA_GATEWAY])
tb[RTA_GATEWAY]->rta_type = 0;
+ if (tb[RTA_VIA])
+ tb[RTA_VIA]->rta_type = 0;
if (!idev && tb[RTA_IIF])
tb[RTA_IIF]->rta_type = 0;
req.n.nlmsg_flags = NLM_F_REQUEST;
diff --git a/ip/iprule.c b/ip/iprule.c
index 54ed753..967969c 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -82,8 +82,9 @@
if (tb[FRA_SRC]) {
if (r->rtm_src_len != host_len) {
fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[FRA_SRC]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[FRA_SRC]),
+ RTA_DATA(tb[FRA_SRC]),
+ abuf, sizeof(abuf)),
r->rtm_src_len
);
} else {
@@ -102,8 +103,9 @@
if (tb[FRA_DST]) {
if (r->rtm_dst_len != host_len) {
fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[FRA_DST]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[FRA_DST]),
+ RTA_DATA(tb[FRA_DST]),
+ abuf, sizeof(abuf)),
r->rtm_dst_len
);
} else {
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index caf8a28..be84b83 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -342,8 +342,8 @@
printf("%s: %s/ip remote %s local %s ",
p->name,
tnl_strproto(p->iph.protocol),
- p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
- p->iph.saddr ? rt_addr_n2a(AF_INET, &p->iph.saddr, s2, sizeof(s2)) : "any");
+ p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
+ p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any");
if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) {
struct ip_tunnel_prl prl[16];
diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index 95f91a5..9aaf58d 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -288,10 +288,10 @@
fputs(title, fp);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s ", rt_addr_n2a(family,
+ fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr),
saddr, abuf, sizeof(abuf)));
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "dst %s", rt_addr_n2a(family,
+ fprintf(fp, "dst %s", rt_addr_n2a(family, sizeof(id->daddr),
&id->daddr, abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
@@ -455,11 +455,15 @@
fputs(prefix, fp);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s/%u ", rt_addr_n2a(f, &sel->saddr, abuf, sizeof(abuf)),
+ fprintf(fp, "src %s/%u ",
+ rt_addr_n2a(f, sizeof(sel->saddr), &sel->saddr,
+ abuf, sizeof(abuf)),
sel->prefixlen_s);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "dst %s/%u ", rt_addr_n2a(f, &sel->daddr, abuf, sizeof(abuf)),
+ fprintf(fp, "dst %s/%u ",
+ rt_addr_n2a(f, sizeof(sel->daddr), &sel->daddr,
+ abuf, sizeof(abuf)),
sel->prefixlen_d);
if (sel->proto)
@@ -755,7 +759,8 @@
memset(abuf, '\0', sizeof(abuf));
fprintf(fp, "addr %s",
- rt_addr_n2a(family, &e->encap_oa, abuf, sizeof(abuf)));
+ rt_addr_n2a(family, sizeof(e->encap_oa), &e->encap_oa,
+ abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
}
@@ -783,7 +788,7 @@
memset(abuf, '\0', sizeof(abuf));
fprintf(fp, "%s",
- rt_addr_n2a(family, coa,
+ rt_addr_n2a(family, sizeof(*coa), coa,
abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
}
diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c
index 5ed3d5a..cf59a93 100644
--- a/ip/link_ip6tnl.c
+++ b/ip/link_ip6tnl.c
@@ -285,6 +285,7 @@
if (tb[IFLA_IPTUN_REMOTE]) {
fprintf(f, "remote %s ",
rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[IFLA_IPTUN_REMOTE]),
RTA_DATA(tb[IFLA_IPTUN_REMOTE]),
s1, sizeof(s1)));
}
@@ -292,6 +293,7 @@
if (tb[IFLA_IPTUN_LOCAL]) {
fprintf(f, "local %s ",
rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[IFLA_IPTUN_LOCAL]),
RTA_DATA(tb[IFLA_IPTUN_LOCAL]),
s1, sizeof(s1)));
}
diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c
index 50116a7..b2b2d6e 100644
--- a/ip/xfrm_monitor.c
+++ b/ip/xfrm_monitor.c
@@ -227,7 +227,8 @@
buf[0] = 0;
fprintf(fp, "dst %s ",
- rt_addr_n2a(sa_id->family, &sa_id->daddr, buf, sizeof(buf)));
+ rt_addr_n2a(sa_id->family, sizeof(sa_id->daddr), &sa_id->daddr,
+ buf, sizeof(buf)));
fprintf(fp, " reqid 0x%x", reqid);
@@ -246,7 +247,8 @@
xfrm_ae_flags_print(id->flags, arg);
fprintf(fp,"\n\t");
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, &id->saddr,
+ fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family,
+ sizeof(id->saddr), &id->saddr,
abuf, sizeof(abuf)));
xfrm_usersa_print(&id->sa_id, id->reqid, fp);
@@ -262,7 +264,7 @@
char buf[256];
buf[0] = 0;
- fprintf(fp, "%s", rt_addr_n2a(family, a, buf, sizeof(buf)));
+ fprintf(fp, "%s", rt_addr_n2a(family, sizeof(*a), a, buf, sizeof(buf)));
}
static int xfrm_mapping_print(const struct sockaddr_nl *who,
diff --git a/lib/ll_addr.c b/lib/ll_addr.c
index c12ab07..2ce9abf 100644
--- a/lib/ll_addr.c
+++ b/lib/ll_addr.c
@@ -29,7 +29,7 @@
#include "utils.h"
-const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen)
+const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen)
{
int i;
int l;
diff --git a/lib/mpls_ntop.c b/lib/mpls_ntop.c
new file mode 100644
index 0000000..945d6d5
--- /dev/null
+++ b/lib/mpls_ntop.c
@@ -0,0 +1,48 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+static const char *mpls_ntop1(const struct mpls_label *addr, char *buf, size_t buflen)
+{
+ size_t destlen = buflen;
+ char *dest = buf;
+ int count;
+
+ for (count = 0; count < MPLS_MAX_LABELS; count++) {
+ uint32_t entry = ntohl(addr[count].entry);
+ uint32_t label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ int len = snprintf(dest, destlen, "%u", label);
+
+ /* Is this the end? */
+ if (entry & MPLS_LS_S_MASK)
+ return buf;
+
+
+ dest += len;
+ destlen -= len;
+ if (destlen) {
+ *dest = '/';
+ dest++;
+ destlen--;
+ }
+ }
+ errno = -E2BIG;
+ return NULL;
+}
+
+const char *mpls_ntop(int af, const void *addr, char *buf, size_t buflen)
+{
+ switch(af) {
+ case AF_MPLS:
+ errno = 0;
+ return mpls_ntop1((struct mpls_label *)addr, buf, buflen);
+ default:
+ errno = EAFNOSUPPORT;
+ }
+
+ return NULL;
+}
diff --git a/lib/mpls_pton.c b/lib/mpls_pton.c
new file mode 100644
index 0000000..bd448cf
--- /dev/null
+++ b/lib/mpls_pton.c
@@ -0,0 +1,58 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+
+static int mpls_pton1(const char *name, struct mpls_label *addr)
+{
+ char *endp;
+ unsigned count;
+
+ for (count = 0; count < MPLS_MAX_LABELS; count++) {
+ unsigned long label;
+
+ label = strtoul(name, &endp, 0);
+ /* Fail when the label value is out or range */
+ if (label >= (1 << 20))
+ return 0;
+
+ if (endp == name) /* no digits */
+ return 0;
+
+ addr->entry = htonl(label << MPLS_LS_LABEL_SHIFT);
+ if (*endp == '\0') {
+ addr->entry |= htonl(1 << MPLS_LS_S_SHIFT);
+ return 1;
+ }
+
+ /* Bad character in the address */
+ if (*endp != '/')
+ return 0;
+
+ name = endp + 1;
+ addr += 1;
+ }
+ /* The address was too long */
+ return 0;
+}
+
+int mpls_pton(int af, const char *src, void *addr)
+{
+ int err;
+
+ switch(af) {
+ case AF_MPLS:
+ errno = 0;
+ err = mpls_pton1(src, (struct mpls_label *)addr);
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ err = -1;
+ }
+
+ return err;
+}
diff --git a/lib/utils.c b/lib/utils.c
index 0d08a86..428ad8f 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -25,11 +25,13 @@
#include <asm/types.h>
#include <linux/pkt_sched.h>
#include <linux/param.h>
+#include <linux/if_arp.h>
+#include <linux/mpls.h>
#include <time.h>
#include <sys/time.h>
#include <errno.h>
-
+#include "rt_names.h"
#include "utils.h"
#include "namespace.h"
@@ -389,7 +391,7 @@
if (strcmp(name, "default") == 0 ||
strcmp(name, "all") == 0 ||
strcmp(name, "any") == 0) {
- if (family == AF_DECnet)
+ if ((family == AF_DECnet) || (family == AF_MPLS))
return -1;
addr->family = family;
addr->bytelen = (family == AF_INET6 ? 16 : 4);
@@ -397,6 +399,18 @@
return 0;
}
+ if (family == AF_PACKET) {
+ int len;
+ len = ll_addr_a2n((char *)&addr->data, sizeof(addr->data), name);
+ if (len < 0)
+ return -1;
+
+ addr->family = AF_PACKET;
+ addr->bytelen = len;
+ addr->bitlen = len * 8;
+ return 0;
+ }
+
if (strchr(name, ':')) {
addr->family = AF_INET6;
if (family != AF_UNSPEC && family != AF_INET6)
@@ -419,6 +433,23 @@
return 0;
}
+ if (family == AF_MPLS) {
+ int i;
+ addr->family = AF_MPLS;
+ if (mpls_pton(AF_MPLS, name, addr->data) <= 0)
+ return -1;
+ addr->bytelen = 4;
+ addr->bitlen = 20;
+ /* How many bytes do I need? */
+ for (i = 0; i < 8; i++) {
+ if (ntohl(addr->data[i]) & MPLS_LS_S_MASK) {
+ addr->bytelen = (i + 1)*4;
+ break;
+ }
+ }
+ return 0;
+ }
+
addr->family = AF_INET;
if (family != AF_UNSPEC && family != AF_INET)
return -1;
@@ -442,6 +473,8 @@
return 16;
case AF_IPX:
return 80;
+ case AF_MPLS:
+ return 20;
}
return 0;
@@ -463,7 +496,7 @@
if (strcmp(arg, "default") == 0 ||
strcmp(arg, "any") == 0 ||
strcmp(arg, "all") == 0) {
- if (family == AF_DECnet)
+ if ((family == AF_DECnet) || (family = AF_MPLS))
return -1;
dst->family = family;
dst->bytelen = 0;
@@ -497,10 +530,6 @@
int get_addr(inet_prefix *dst, const char *arg, int family)
{
- if (family == AF_PACKET) {
- fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg);
- exit(1);
- }
if (get_addr_1(dst, arg, family)) {
fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg);
exit(1);
@@ -636,12 +665,14 @@
return sysconf(_SC_CLK_TCK);
}
-const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen)
+const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen)
{
switch (af) {
case AF_INET:
case AF_INET6:
return inet_ntop(af, addr, buf, buflen);
+ case AF_MPLS:
+ return mpls_ntop(af, addr, buf, buflen);
case AF_IPX:
return ipx_ntop(af, addr, buf, buflen);
case AF_DECnet:
@@ -650,11 +681,52 @@
memcpy(dna.a_addr, addr, 2);
return dnet_ntop(af, &dna, buf, buflen);
}
+ case AF_PACKET:
+ return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen);
default:
return "???";
}
}
+int read_family(const char *name)
+{
+ int family = AF_UNSPEC;
+ if (strcmp(name, "inet") == 0)
+ family = AF_INET;
+ else if (strcmp(name, "inet6") == 0)
+ family = AF_INET6;
+ else if (strcmp(name, "dnet") == 0)
+ family = AF_DECnet;
+ else if (strcmp(name, "link") == 0)
+ family = AF_PACKET;
+ else if (strcmp(name, "ipx") == 0)
+ family = AF_IPX;
+ else if (strcmp(name, "mpls") == 0)
+ family = AF_MPLS;
+ else if (strcmp(name, "bridge") == 0)
+ family = AF_BRIDGE;
+ return family;
+}
+
+const char *family_name(int family)
+{
+ if (family == AF_INET)
+ return "inet";
+ if (family == AF_INET6)
+ return "inet6";
+ if (family == AF_DECnet)
+ return "dnet";
+ if (family == AF_PACKET)
+ return "link";
+ if (family == AF_IPX)
+ return "ipx";
+ if (family == AF_MPLS)
+ return "mpls";
+ if (family == AF_BRIDGE)
+ return "bridge";
+ return "???";
+}
+
#ifdef RESOLVE_HOSTNAMES
struct namerec
{
@@ -723,7 +795,7 @@
return n;
}
#endif
- return rt_addr_n2a(af, addr, buf, buflen);
+ return rt_addr_n2a(af, len, addr, buf, buflen);
}
diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in
index d53cc76..72d8d77 100644
--- a/man/man8/ip-route.8.in
+++ b/man/man8/ip-route.8.in
@@ -81,18 +81,28 @@
.ti -8
.IR NH " := [ "
.B via
-.IR ADDRESS " ] [ "
+[
+.IR FAMILY " ] " ADDRESS " ] [ "
.B dev
.IR STRING " ] [ "
.B weight
.IR NUMBER " ] " NHFLAGS
.ti -8
+.IR FAMILY " := [ "
+.BR inet " | " inet6 " | " ipx " | " dnet " | " mpls " | " bridge " | " link " ]"
+
+.ti -8
.IR OPTIONS " := " FLAGS " [ "
.B mtu
.IR NUMBER " ] [ "
.B advmss
.IR NUMBER " ] [ "
+.B as
+[
+.B to
+]
+.IR ADDRESS " ]"
.B rtt
.IR TIME " ] [ "
.B rttvar
@@ -119,6 +129,8 @@
.IR BOOL " ] [ "
.B congctl
.IR NAME " ]"
+.B pref
+.IR PREF " ]"
.ti -8
.IR TYPE " := [ "
@@ -148,6 +160,10 @@
.IR FEATURES " := [ "
.BR ecn " | ]"
+.ti -8
+.IR PREF " := [ "
+.BR low " | " medium " | " high " ]"
+
.SH DESCRIPTION
.B ip route
@@ -333,9 +349,10 @@
the output device name.
.TP
-.BI via " ADDRESS"
-the address of the nexthop router. Actually, the sense of this field
-depends on the route type. For normal
+.BI via " [ FAMILY ] ADDRESS"
+the address of the nexthop router, in the address family FAMILY.
+Actually, the sense of this field depends on the route type. For
+normal
.B unicast
routes it is either the true next hop router or, if it is a direct
route installed in BSD compatibility mode, it can be a local address
@@ -472,7 +489,7 @@
argument lists:
.in +8
-.BI via " ADDRESS"
+.BI via " [ FAMILY ] ADDRESS"
- is the nexthop router.
.sp
@@ -551,6 +568,28 @@
.B onlink
pretend that the nexthop is directly attached to this link,
even if it does not match any interface prefix.
+
+.TP
+.BI pref " PREF"
+the IPv6 route preference.
+.I PREF
+is a string specifying the route preference as defined in RFC4191 for Router
+Discovery messages. Namely:
+
+.in +8
+.B low
+- the route has a lowest priority
+.sp
+
+.B medium
+- the route has a default priority
+.sp
+
+.B high
+- the route has a highest priority
+.sp
+
+.in -8
.RE
.TP
@@ -669,7 +708,7 @@
only list routes going via this device.
.TP
-.BI via " PREFIX"
+.BI via " [ FAMILY ] PREFIX"
only list routes going via the nexthop routers selected by
.IR PREFIX "."
diff --git a/man/man8/ip.8 b/man/man8/ip.8
index 4cd71de..44d1ee6 100644
--- a/man/man8/ip.8
+++ b/man/man8/ip.8
@@ -73,7 +73,7 @@
.TP
.BR "\-f" , " \-family " <FAMILY>
Specifies the protocol family to use. The protocol family identifier can be one of
-.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet"
+.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet" , " mpls"
or
.BR link .
If this option is not present,
@@ -115,6 +115,11 @@
.BR "\-family ipx" .
.TP
+.B \-M
+shortcut for
+.BR "\-family mpls" .
+
+.TP
.B \-0
shortcut for
.BR "\-family link" .
diff --git a/tc/Makefile b/tc/Makefile
index d831a15..2eff082 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -89,6 +89,11 @@
endif
endif
+ifeq ($(TC_CONFIG_ELF),y)
+ CFLAGS += -DHAVE_ELF
+ LDLIBS += -lelf
+endif
+
TCOBJ += $(TCMODULES)
LDLIBS += -L. -ltc -lm
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index e2af94e..6d76580 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -34,13 +34,15 @@
fprintf(stderr, "\n");
fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n");
fprintf(stderr, " [from file]: run bytecode-file FILE\n");
+ fprintf(stderr, " [from file]: run object-file FILE\n");
fprintf(stderr, "\n");
fprintf(stderr, " [ action ACTION_SPEC ]\n");
fprintf(stderr, " [ classid CLASSID ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
- fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n");
+ fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
+ fprintf(stderr, "or an ELF file containing eBPF map definitions and bytecode.\n");
fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n");
fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
}
@@ -71,31 +73,40 @@
while (argc > 0) {
if (matches(*argv, "run") == 0) {
- bool from_file;
+ bool from_file = true, ebpf;
struct sock_filter bpf_ops[BPF_MAXINSNS];
- __u16 bpf_len;
int ret;
NEXT_ARG();
if (strcmp(*argv, "bytecode-file") == 0) {
- from_file = true;
+ ebpf = false;
} else if (strcmp(*argv, "bytecode") == 0) {
from_file = false;
+ ebpf = false;
+ } else if (strcmp(*argv, "object-file") == 0) {
+ ebpf = true;
} else {
fprintf(stderr, "What is \"%s\"?\n", *argv);
explain();
return -1;
}
NEXT_ARG();
- ret = bpf_parse_ops(argc, argv, bpf_ops, from_file);
+ ret = ebpf ? bpf_open_object(*argv, BPF_PROG_TYPE_SCHED_CLS) :
+ bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ret < 0) {
- fprintf(stderr, "Illegal \"bytecode\"\n");
+ fprintf(stderr, "%s\n", ebpf ?
+ "Could not load object" :
+ "Illegal \"bytecode\"");
return -1;
}
- bpf_len = ret;
- addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len);
- addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
- bpf_len * sizeof(struct sock_filter));
+ if (ebpf) {
+ addattr32(n, MAX_MSG, TCA_BPF_FD, ret);
+ addattrstrz(n, MAX_MSG, TCA_BPF_NAME, *argv);
+ } else {
+ addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret);
+ addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
+ ret * sizeof(struct sock_filter));
+ }
} else if (matches(*argv, "classid") == 0 ||
strcmp(*argv, "flowid") == 0) {
unsigned handle;
@@ -153,6 +164,11 @@
sprint_tc_classid(rta_getattr_u32(tb[TCA_BPF_CLASSID]), b1));
}
+ if (tb[TCA_BPF_NAME])
+ fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME]));
+ else if (tb[TCA_BPF_FD])
+ fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
+
if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
bpf_print_ops(f, tb[TCA_BPF_OPS],
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c
index c6901d6..3778d6b 100644
--- a/tc/tc_bpf.c
+++ b/tc/tc_bpf.c
@@ -8,6 +8,7 @@
*
* Authors: Daniel Borkmann <dborkman@redhat.com>
* Jiri Pirko <jiri@resnulli.us>
+ * Alexei Starovoitov <ast@plumgrid.com>
*/
#include <stdio.h>
@@ -16,10 +17,19 @@
#include <string.h>
#include <stdbool.h>
#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include <linux/filter.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
+#ifdef HAVE_ELF
+#include <libelf.h>
+#include <gelf.h>
+#endif
+
#include "utils.h"
#include "tc_util.h"
#include "tc_bpf.h"
@@ -144,3 +154,385 @@
fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt,
ops[i].jf, ops[i].k);
}
+
+#ifdef HAVE_ELF
+struct bpf_elf_sec_data {
+ GElf_Shdr sec_hdr;
+ char *sec_name;
+ Elf_Data *sec_data;
+};
+
+static char bpf_log_buf[8192];
+
+static const char *prog_type_section(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return ELF_SECTION_CLASSIFIER;
+ /* case BPF_PROG_TYPE_SCHED_ACT: */
+ /* return ELF_SECTION_ACTION; */
+ default:
+ return NULL;
+ }
+}
+
+static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2);
+static void bpf_dump_error(const char *format, ...)
+{
+ va_list vl;
+
+ va_start(vl, format);
+ vfprintf(stderr, format, vl);
+ va_end(vl);
+
+ fprintf(stderr, "%s", bpf_log_buf);
+ memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
+}
+
+static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
+ unsigned int size_value, unsigned int max_elem)
+{
+ union bpf_attr attr = {
+ .map_type = type,
+ .key_size = size_key,
+ .value_size = size_value,
+ .max_entries = max_elem,
+ };
+
+ return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ unsigned int len, const char *license)
+{
+ union bpf_attr attr = {
+ .prog_type = type,
+ .insns = bpf_ptr_to_u64(insns),
+ .insn_cnt = len / sizeof(struct bpf_insn),
+ .license = bpf_ptr_to_u64(license),
+ .log_buf = bpf_ptr_to_u64(bpf_log_buf),
+ .log_size = sizeof(bpf_log_buf),
+ .log_level = 1,
+ };
+
+ return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns,
+ unsigned int size, const char *license)
+{
+ int prog_fd = bpf_prog_load(type, insns, size, license);
+
+ if (prog_fd < 0)
+ bpf_dump_error("BPF program rejected: %s\n", strerror(errno));
+
+ return prog_fd;
+}
+
+static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
+ unsigned int size_value, unsigned int max_elem)
+{
+ int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
+
+ if (map_fd < 0)
+ bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
+
+ return map_fd;
+}
+
+static void bpf_maps_init(int *map_fds, unsigned int max_fds)
+{
+ int i;
+
+ for (i = 0; i < max_fds; i++)
+ map_fds[i] = -1;
+}
+
+static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds)
+{
+ int i;
+
+ for (i = 0; i < max_fds; i++) {
+ if (map_fds[i] >= 0)
+ close(map_fds[i]);
+ }
+}
+
+static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps,
+ int *map_fds, unsigned int max_fds)
+{
+ int i, ret;
+
+ for (i = 0; i < num_maps && num_maps <= max_fds; i++) {
+ struct bpf_elf_map *map = &maps[i];
+
+ ret = bpf_map_attach(map->type, map->size_key,
+ map->size_value, map->max_elem);
+ if (ret < 0)
+ goto err_unwind;
+
+ map_fds[i] = ret;
+ }
+
+ return 0;
+
+err_unwind:
+ bpf_maps_destroy(map_fds, i);
+ return ret;
+}
+
+static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
+ struct bpf_elf_sec_data *sec_data)
+{
+ GElf_Shdr sec_hdr;
+ Elf_Scn *sec_fd;
+ Elf_Data *sec_edata;
+ char *sec_name;
+
+ memset(sec_data, 0, sizeof(*sec_data));
+
+ sec_fd = elf_getscn(elf_fd, sec_index);
+ if (!sec_fd)
+ return -EINVAL;
+
+ if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
+ return -EIO;
+
+ sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
+ sec_hdr.sh_name);
+ if (!sec_name || !sec_hdr.sh_size)
+ return -ENOENT;
+
+ sec_edata = elf_getdata(sec_fd, NULL);
+ if (!sec_edata || elf_getdata(sec_fd, sec_edata))
+ return -EIO;
+
+ memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
+ sec_data->sec_name = sec_name;
+ sec_data->sec_data = sec_edata;
+
+ return 0;
+}
+
+static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
+ struct bpf_elf_sec_data *data_insn,
+ Elf_Data *sym_tab, int *map_fds, int max_fds)
+{
+ Elf_Data *idata = data_insn->sec_data;
+ GElf_Shdr *rhdr = &data_relo->sec_hdr;
+ int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
+ struct bpf_insn *insns = idata->d_buf;
+ unsigned int num_insns = idata->d_size / sizeof(*insns);
+
+ for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
+ unsigned int ioff, fnum;
+ GElf_Rel relo;
+ GElf_Sym sym;
+
+ if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
+ return -EIO;
+
+ ioff = relo.r_offset / sizeof(struct bpf_insn);
+ if (ioff >= num_insns)
+ return -EINVAL;
+ if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
+ return -EINVAL;
+
+ if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
+ return -EIO;
+
+ fnum = sym.st_value / sizeof(struct bpf_elf_map);
+ if (fnum >= max_fds)
+ return -EINVAL;
+
+ insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[ioff].imm = map_fds[fnum];
+ }
+
+ return 0;
+}
+
+static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+ int *map_fds, unsigned int max_fds,
+ char *license, unsigned int lic_len,
+ Elf_Data **sym_tab)
+{
+ int sec_index, ret = -1;
+
+ for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+ struct bpf_elf_sec_data data_anc;
+
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+ &data_anc);
+ if (ret < 0)
+ continue;
+
+ /* Extract and load eBPF map fds. */
+ if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) {
+ struct bpf_elf_map *maps = data_anc.sec_data->d_buf;
+ unsigned int maps_num = data_anc.sec_data->d_size /
+ sizeof(*maps);
+
+ sec_seen[sec_index] = true;
+ ret = bpf_maps_attach(maps, maps_num, map_fds,
+ max_fds);
+ if (ret < 0)
+ return ret;
+ }
+ /* Extract eBPF license. */
+ else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
+ if (data_anc.sec_data->d_size > lic_len)
+ return -ENOMEM;
+
+ sec_seen[sec_index] = true;
+ memcpy(license, data_anc.sec_data->d_buf,
+ data_anc.sec_data->d_size);
+ }
+ /* Extract symbol table for relocations (map fd fixups). */
+ else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
+ sec_seen[sec_index] = true;
+ *sym_tab = data_anc.sec_data;
+ }
+ }
+
+ return ret;
+}
+
+static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+ enum bpf_prog_type type, char *license,
+ Elf_Data *sym_tab, int *map_fds, unsigned int max_fds)
+{
+ int sec_index, prog_fd = -1;
+
+ for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+ struct bpf_elf_sec_data data_relo, data_insn;
+ int ins_index, ret;
+
+ /* Attach eBPF programs with relocation data (maps). */
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+ &data_relo);
+ if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
+ continue;
+
+ ins_index = data_relo.sec_hdr.sh_info;
+
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
+ &data_insn);
+ if (ret < 0)
+ continue;
+ if (strcmp(data_insn.sec_name, prog_type_section(type)))
+ continue;
+
+ sec_seen[sec_index] = true;
+ sec_seen[ins_index] = true;
+
+ ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab,
+ map_fds, max_fds);
+ if (ret < 0)
+ continue;
+
+ prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
+ data_insn.sec_data->d_size, license);
+ if (prog_fd < 0)
+ continue;
+
+ break;
+ }
+
+ return prog_fd;
+}
+
+static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+ enum bpf_prog_type type, char *license)
+{
+ int sec_index, prog_fd = -1;
+
+ for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+ struct bpf_elf_sec_data data_insn;
+ int ret;
+
+ /* Attach eBPF programs without relocation data. */
+ if (sec_seen[sec_index])
+ continue;
+
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+ &data_insn);
+ if (ret < 0)
+ continue;
+ if (strcmp(data_insn.sec_name, prog_type_section(type)))
+ continue;
+
+ prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
+ data_insn.sec_data->d_size, license);
+ if (prog_fd < 0)
+ continue;
+
+ break;
+ }
+
+ return prog_fd;
+}
+
+int bpf_open_object(const char *path, enum bpf_prog_type type)
+{
+ int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds);
+ char license[ELF_MAX_LICENSE_LEN];
+ int file_fd, prog_fd = -1, ret;
+ Elf_Data *sym_tab = NULL;
+ GElf_Ehdr elf_hdr;
+ bool *sec_seen;
+ Elf *elf_fd;
+
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ return -EINVAL;
+
+ file_fd = open(path, O_RDONLY, 0);
+ if (file_fd < 0)
+ return -errno;
+
+ elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
+ if (!elf_fd) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
+ ret = -EIO;
+ goto out_elf;
+ }
+
+ sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen));
+ if (!sec_seen) {
+ ret = -ENOMEM;
+ goto out_elf;
+ }
+
+ memset(license, 0, sizeof(license));
+ bpf_maps_init(map_fds, max_fds);
+
+ ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds,
+ license, sizeof(license), &sym_tab);
+ if (ret < 0)
+ goto out_maps;
+ if (sym_tab)
+ prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type,
+ license, sym_tab, map_fds, max_fds);
+ if (prog_fd < 0)
+ prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type,
+ license);
+ if (prog_fd < 0)
+ goto out_maps;
+out_sec:
+ free(sec_seen);
+out_elf:
+ elf_end(elf_fd);
+out:
+ close(file_fd);
+ return prog_fd;
+
+out_maps:
+ bpf_maps_destroy(map_fds, max_fds);
+ goto out_sec;
+}
+
+#endif /* HAVE_ELF */
diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h
index 08cca92..ce64747 100644
--- a/tc/tc_bpf.h
+++ b/tc/tc_bpf.h
@@ -13,10 +13,42 @@
#ifndef _TC_BPF_H_
#define _TC_BPF_H_ 1
-#include <stdio.h>
#include <linux/filter.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
+#include <linux/bpf.h>
+#include <sys/syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include "utils.h"
+
+/* Note:
+ *
+ * Below ELF section names and bpf_elf_map structure definition
+ * are not (!) kernel ABI. It's rather a "contract" between the
+ * application and the BPF loader in tc. For compatibility, the
+ * section names should stay as-is. Introduction of aliases, if
+ * needed, are a possibility, though.
+ */
+
+/* ELF section names, etc */
+#define ELF_SECTION_LICENSE "license"
+#define ELF_SECTION_MAPS "maps"
+#define ELF_SECTION_CLASSIFIER "classifier"
+#define ELF_SECTION_ACTION "action"
+
+#define ELF_MAX_MAPS 64
+#define ELF_MAX_LICENSE_LEN 128
+
+/* ELF map definition */
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+};
int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
char **bpf_string, bool *need_release,
@@ -25,4 +57,28 @@
bool from_file);
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+#ifdef HAVE_ELF
+int bpf_open_object(const char *path, enum bpf_prog_type type);
+
+static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+#ifdef __NR_bpf
+ return syscall(__NR_bpf, cmd, attr, size);
+#else
+ errno = ENOSYS;
+ return -1;
#endif
+}
+#else
+static inline int bpf_open_object(const char *path, enum bpf_prog_type type)
+{
+ errno = ENOSYS;
+ return -1;
+}
+#endif /* HAVE_ELF */
+#endif /* _TC_BPF_H_ */