Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for net-next, they are:
1) A couple of cleanups for the netfilter core hook from Eric Biederman.
2) Net namespace hook registration, also from Eric. This adds a dependency with
the rtnl_lock. This should be fine by now but we have to keep an eye on this
because if we ever get the per-subsys nfnl_lock before rtnl we have may
problems in the future. But we have room to remove this in the future by
propagating the complexity to the clients, by registering hooks for the init
netns functions.
3) Update nf_tables to use the new net namespace hook infrastructure, also from
Eric.
4) Three patches to refine and to address problems from the new net namespace
hook infrastructure.
5) Switch to alternate jumpstack in xtables iff the packet is reentering. This
only applies to a very special case, the TEE target, but Eric Dumazet
reports that this is slowing down things for everyone else. So let's only
switch to the alternate jumpstack if the tee target is in used through a
static key. This batch also comes with offline precalculation of the
jumpstack based on the callchain depth. From Florian Westphal.
6) Minimal SCTP multihoming support for our conntrack helper, from Michal
Kubecek.
7) Reduce nf_bridge_info per skbuff scratchpad area to 32 bytes, from Florian
Westphal.
8) Fix several checkpatch errors in bridge netfilter, from Bernhard Thaler.
9) Get rid of useless debug message in ip6t_REJECT, from Subash Abhinov.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 00050df..d788ce6 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -11,6 +11,8 @@
#include <linux/list.h>
#include <linux/static_key.h>
#include <linux/netfilter_defs.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
#ifdef CONFIG_NETFILTER
static inline int NF_DROP_GETERR(int verdict)
@@ -118,6 +120,13 @@
};
/* Function to register/unregister hook points. */
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops);
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops);
+int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n);
+void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n);
+
int nf_register_hook(struct nf_hook_ops *reg);
void nf_unregister_hook(struct nf_hook_ops *reg);
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
@@ -128,33 +137,26 @@
int nf_register_sockopt(struct nf_sockopt_ops *reg);
void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
-extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
-
#ifdef HAVE_JUMP_LABEL
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
-static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
+static inline bool nf_hook_list_active(struct list_head *hook_list,
u_int8_t pf, unsigned int hook)
{
if (__builtin_constant_p(pf) &&
__builtin_constant_p(hook))
return static_key_false(&nf_hooks_needed[pf][hook]);
- return !list_empty(nf_hook_list);
+ return !list_empty(hook_list);
}
#else
-static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
+static inline bool nf_hook_list_active(struct list_head *hook_list,
u_int8_t pf, unsigned int hook)
{
- return !list_empty(nf_hook_list);
+ return !list_empty(hook_list);
}
#endif
-static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
-{
- return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
-}
-
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
/**
@@ -172,10 +174,13 @@
int (*okfn)(struct sock *, struct sk_buff *),
int thresh)
{
- if (nf_hooks_active(pf, hook)) {
+ struct net *net = dev_net(indev ? indev : outdev);
+ struct list_head *hook_list = &net->nf.hooks[pf][hook];
+
+ if (nf_hook_list_active(hook_list, pf, hook)) {
struct nf_hook_state state;
- nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh,
+ nf_hook_state_init(&state, hook_list, hook, thresh,
pf, indev, outdev, sk, okfn);
return nf_hook_slow(skb, &state);
}
@@ -385,4 +390,15 @@
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif
+/**
+ * nf_skb_duplicated - TEE target has sent a packet
+ *
+ * When a xtables target sends a packet, the OUTPUT and POSTROUTING
+ * hooks are traversed again, i.e. nft and xtables are invoked recursively.
+ *
+ * This is used by xtables TEE target to prevent the duplicated skb from
+ * being duplicated again.
+ */
+DECLARE_PER_CPU(bool, nf_skb_duplicated);
+
#endif /*__LINUX_NETFILTER_H*/
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 286098a..b006b71 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -3,6 +3,7 @@
#include <linux/netdevice.h>
+#include <linux/static_key.h>
#include <uapi/linux/netfilter/x_tables.h>
/**
@@ -222,7 +223,6 @@
* @stacksize jumps (number of user chains) can possibly be made.
*/
unsigned int stacksize;
- unsigned int __percpu *stackptr;
void ***jumpstack;
unsigned char entries[0] __aligned(8);
@@ -281,6 +281,12 @@
*/
DECLARE_PER_CPU(seqcount_t, xt_recseq);
+/* xt_tee_enabled - true if x_tables needs to handle reentrancy
+ *
+ * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
+ */
+extern struct static_key xt_tee_enabled;
+
/**
* xt_write_recseq_begin - start of a write section
*
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 6d80fc6..2437b8a 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -17,9 +17,6 @@
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-#define BRNF_BRIDGED_DNAT 0x02
-#define BRNF_NF_BRIDGE_PREROUTING 0x08
-
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
static inline void br_drop_fake_rtable(struct sk_buff *skb)
@@ -63,8 +60,17 @@
{
return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
}
+
+static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
+{
+ return skb->nf_bridge && skb->nf_bridge->in_prerouting;
+}
#else
#define br_drop_fake_rtable(skb) do { } while (0)
+static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
+{
+ return false;
+}
#endif /* CONFIG_BRIDGE_NETFILTER */
#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b7c1286..df9fdf5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -174,17 +174,24 @@
BRNF_PROTO_8021Q,
BRNF_PROTO_PPPOE
} orig_proto:8;
- bool pkt_otherhost;
+ u8 pkt_otherhost:1;
+ u8 in_prerouting:1;
+ u8 bridged_dnat:1;
__u16 frag_max_size;
- unsigned int mask;
struct net_device *physindev;
union {
- struct net_device *physoutdev;
- char neigh_header[8];
- };
- union {
+ /* prerouting: detect dnat in orig/reply direction */
__be32 ipv4_daddr;
struct in6_addr ipv6_daddr;
+
+ /* after prerouting + nat detected: store original source
+ * mac since neigh resolution overwrites it, only used while
+ * skb is out in neigh layer.
+ */
+ char neigh_header[8];
+
+ /* always valid & non-NULL from FORWARD on, for physdev match */
+ struct net_device *physoutdev;
};
};
#endif
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 532e4ba6..38aa498 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -14,5 +14,6 @@
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
+ struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
};
#endif
diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
index ceeefe6..ed4e776 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
@@ -13,6 +13,8 @@
SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
+ SCTP_CONNTRACK_HEARTBEAT_SENT,
+ SCTP_CONNTRACK_HEARTBEAT_ACKED,
SCTP_CONNTRACK_MAX
};
diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
index 1ab0b97..f2c10dc 100644
--- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
+++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h
@@ -92,6 +92,8 @@
CTA_TIMEOUT_SCTP_SHUTDOWN_SENT,
CTA_TIMEOUT_SCTP_SHUTDOWN_RECD,
CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
+ CTA_TIMEOUT_SCTP_HEARTBEAT_SENT,
+ CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED,
__CTA_TIMEOUT_SCTP_MAX
};
#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c8b9bcf..0a6f095 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -49,9 +49,9 @@
static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
-static int brnf_filter_vlan_tagged __read_mostly = 0;
-static int brnf_filter_pppoe_tagged __read_mostly = 0;
-static int brnf_pass_vlan_indev __read_mostly = 0;
+static int brnf_filter_vlan_tagged __read_mostly;
+static int brnf_filter_pppoe_tagged __read_mostly;
+static int brnf_pass_vlan_indev __read_mostly;
#else
#define brnf_call_iptables 1
#define brnf_call_ip6tables 1
@@ -284,7 +284,7 @@
nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
- nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+ nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
ret = neigh->output(neigh, skb);
}
@@ -356,7 +356,7 @@
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -444,7 +444,7 @@
nf_bridge->pkt_otherhost = true;
}
- nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 1;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev);
@@ -850,10 +850,8 @@
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (skb->nf_bridge &&
- !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+ if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
return NF_STOP;
- }
return NF_ACCEPT;
}
@@ -872,7 +870,7 @@
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
- nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+ nf_bridge->bridged_dnat = 0;
BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
@@ -887,7 +885,7 @@
static int br_nf_dev_xmit(struct sk_buff *skb)
{
- if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+ if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb);
return 1;
}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 13b7d1e..77383bf 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -174,7 +174,7 @@
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 0;
if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
skb_dst_drop(skb);
v6ops->route_input(skb);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 92305a1..c416cb3 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -240,7 +240,7 @@
return (struct arpt_entry *)(base + offset);
}
-static inline __pure
+static inline
struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -280,6 +280,9 @@
table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
+ /* No TEE support for arptables, so no need to switch to alternate
+ * stack. All targets that reenter must return absolute verdicts.
+ */
e = get_entry(table_base, private->hook_entry[hook]);
acpar.in = state->in;
@@ -325,11 +328,6 @@
}
if (table_base + v
!= arpt_next_entry(e)) {
-
- if (stackidx >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
jumpstack[stackidx++] = e;
}
@@ -337,9 +335,6 @@
continue;
}
- /* Targets which reenter must return
- * abs. verdicts
- */
acpar.target = t->u.kernel.target;
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
@@ -372,10 +367,13 @@
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
-static int mark_source_chains(const struct xt_table_info *newinfo,
+static int mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -391,6 +389,7 @@
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -445,6 +444,8 @@
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -459,6 +460,10 @@
return 0;
}
+ if (entry0 + newpos != arpt_next_entry(e) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -475,6 +480,7 @@
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -664,9 +670,6 @@
if (ret != 0)
break;
++i;
- if (strcmp(arpt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
@@ -1439,9 +1442,6 @@
break;
}
++i;
- if (strcmp(arpt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6c72fbb..787f99e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -276,7 +276,7 @@
}
#endif
-static inline __pure
+static inline
struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -296,12 +296,13 @@
const char *indev, *outdev;
const void *table_base;
struct ipt_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
ip = ip_hdr(skb);
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
@@ -331,13 +332,21 @@
smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
- pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
- table->name, hook, origptr,
+ pr_debug("Entering %s(hook %u), UF %p\n",
+ table->name, hook,
get_entry(table_base, private->underflow[hook]));
do {
@@ -383,28 +392,24 @@
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr) {
+ if (stackidx == 0) {
e = get_entry(table_base,
private->underflow[hook]);
pr_debug("Underflow (this is normal) "
"to %p\n", e);
} else {
- e = jumpstack[--*stackptr];
+ e = jumpstack[--stackidx];
pr_debug("Pulled %p out from pos %u\n",
- e, *stackptr);
+ e, stackidx);
e = ipt_next_entry(e);
}
continue;
}
if (table_base + v != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
pr_debug("Pushed %p into pos %u\n",
- e, *stackptr - 1);
+ e, stackidx - 1);
}
e = get_entry(table_base, v);
@@ -423,9 +428,8 @@
/* Verdict */
break;
} while (!acpar.hotdrop);
- pr_debug("Exiting %s; resetting sp from %u to %u\n",
- __func__, *stackptr, origptr);
- *stackptr = origptr;
+ pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
+
xt_write_recseq_end(addend);
local_bh_enable();
@@ -439,11 +443,15 @@
}
/* Figures out from what hook each rule can be called: returns 0 if
- there are loops. Puts hook bitmask in comefrom. */
+ * there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -457,6 +465,7 @@
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -518,6 +527,9 @@
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ WARN_ON_ONCE(calldepth == 0);
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -531,9 +543,14 @@
newpos);
return 0;
}
+ if (entry0 + newpos != ipt_next_entry(e) &&
+ !(e->ip.flags & IPT_F_GOTO) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
+ duprintf("Jump rule %u -> %u, calldepth %d\n",
+ pos, newpos, calldepth);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -547,6 +564,7 @@
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -826,9 +844,6 @@
if (ret != 0)
return ret;
++i;
- if (strcmp(ipt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1744,9 +1759,6 @@
if (ret != 0)
break;
++i;
- if (strcmp(ipt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index c88b7d4..b69e82b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -49,12 +49,9 @@
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ if (nf_bridge_in_prerouting(skb))
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
+
if (hooknum == NF_INET_PRE_ROUTING)
return IP_DEFRAG_CONNTRACK_IN + zone;
else
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 3c35ced..4e21f80 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -305,7 +305,7 @@
}
#endif
-static inline __pure struct ip6t_entry *
+static inline struct ip6t_entry *
ip6t_next_entry(const struct ip6t_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -324,12 +324,13 @@
const char *indev, *outdev;
const void *table_base;
struct ip6t_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -357,8 +358,16 @@
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
@@ -406,20 +415,16 @@
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr)
+ if (stackidx == 0)
e = get_entry(table_base,
private->underflow[hook]);
else
- e = ip6t_next_entry(jumpstack[--*stackptr]);
+ e = ip6t_next_entry(jumpstack[--stackidx]);
continue;
}
if (table_base + v != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
}
e = get_entry(table_base, v);
@@ -437,8 +442,6 @@
break;
} while (!acpar.hotdrop);
- *stackptr = origptr;
-
xt_write_recseq_end(addend);
local_bh_enable();
@@ -452,11 +455,15 @@
}
/* Figures out from what hook each rule can be called: returns 0 if
- there are loops. Puts hook bitmask in comefrom. */
+ * there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -470,6 +477,7 @@
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -531,6 +539,8 @@
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -544,6 +554,11 @@
newpos);
return 0;
}
+ if (entry0 + newpos != ip6t_next_entry(e) &&
+ !(e->ipv6.flags & IP6T_F_GOTO) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -560,6 +575,7 @@
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -839,9 +855,6 @@
if (ret != 0)
return ret;
++i;
- if (strcmp(ip6t_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1754,9 +1767,6 @@
if (ret != 0)
break;
++i;
- if (strcmp(ip6t_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 12331ef..567367a 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -35,14 +35,12 @@
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
-
static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_reject_info *reject = par->targinfo;
struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
- pr_debug("%s: medium point\n", __func__);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
@@ -65,9 +63,6 @@
case IP6T_TCP_RESET:
nf_send_reset6(net, skb, par->hooknum);
break;
- default:
- net_info_ratelimited("case %u not handled yet\n", reject->with);
- break;
}
return NF_DROP;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index a45db0b..267fb8d 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -39,12 +39,9 @@
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ if (nf_bridge_in_prerouting(skb))
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
+
if (hooknum == NF_INET_PRE_ROUTING)
return IP6_DEFRAG_CONNTRACK_IN + zone;
else
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a0e5497..2a5a070 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -34,6 +34,9 @@
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
+DEFINE_PER_CPU(bool, nf_skb_duplicated);
+EXPORT_SYMBOL_GPL(nf_skb_duplicated);
+
int nf_register_afinfo(const struct nf_afinfo *afinfo)
{
mutex_lock(&afinfo_mutex);
@@ -52,9 +55,6 @@
}
EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
-struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
-EXPORT_SYMBOL(nf_hooks);
-
#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
@@ -62,63 +62,166 @@
static DEFINE_MUTEX(nf_hook_mutex);
-int nf_register_hook(struct nf_hook_ops *reg)
+static struct list_head *nf_find_hook_list(struct net *net,
+ const struct nf_hook_ops *reg)
{
- struct list_head *nf_hook_list;
+ struct list_head *hook_list = NULL;
+
+ if (reg->pf != NFPROTO_NETDEV)
+ hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
+ else if (reg->hooknum == NF_NETDEV_INGRESS) {
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->dev && dev_net(reg->dev) == net)
+ hook_list = ®->dev->nf_hooks_ingress;
+#endif
+ }
+ return hook_list;
+}
+
+struct nf_hook_entry {
+ const struct nf_hook_ops *orig_ops;
+ struct nf_hook_ops ops;
+};
+
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ struct list_head *hook_list;
+ struct nf_hook_entry *entry;
struct nf_hook_ops *elem;
- mutex_lock(&nf_hook_mutex);
- switch (reg->pf) {
- case NFPROTO_NETDEV:
-#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- BUG_ON(reg->dev == NULL);
- nf_hook_list = ®->dev->nf_hooks_ingress;
- net_inc_ingress_queue();
- break;
- }
-#endif
- /* Fall through. */
- default:
- nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
- break;
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->orig_ops = reg;
+ entry->ops = *reg;
+
+ hook_list = nf_find_hook_list(net, reg);
+ if (!hook_list) {
+ kfree(entry);
+ return -ENOENT;
}
- list_for_each_entry(elem, nf_hook_list, list) {
+ mutex_lock(&nf_hook_mutex);
+ list_for_each_entry(elem, hook_list, list) {
if (reg->priority < elem->priority)
break;
}
- list_add_rcu(®->list, elem->list.prev);
+ list_add_rcu(&entry->ops.list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ net_inc_ingress_queue();
+#endif
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
return 0;
}
-EXPORT_SYMBOL(nf_register_hook);
+EXPORT_SYMBOL(nf_register_net_hook);
-void nf_unregister_hook(struct nf_hook_ops *reg)
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
+ struct list_head *hook_list;
+ struct nf_hook_entry *entry;
+ struct nf_hook_ops *elem;
+
+ hook_list = nf_find_hook_list(net, reg);
+ if (!hook_list)
+ return;
+
mutex_lock(&nf_hook_mutex);
- list_del_rcu(®->list);
- mutex_unlock(&nf_hook_mutex);
- switch (reg->pf) {
- case NFPROTO_NETDEV:
-#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- net_dec_ingress_queue();
+ list_for_each_entry(elem, hook_list, list) {
+ entry = container_of(elem, struct nf_hook_entry, ops);
+ if (entry->orig_ops == reg) {
+ list_del_rcu(&entry->ops.list);
break;
}
- break;
-#endif
- default:
- break;
}
+ mutex_unlock(&nf_hook_mutex);
+ if (&elem->list == hook_list) {
+ WARN(1, "nf_unregister_net_hook: hook not found!\n");
+ return;
+ }
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
+#endif
#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
- nf_queue_nf_hook_drop(reg);
+ nf_queue_nf_hook_drop(net, &entry->ops);
+ kfree(entry);
+}
+EXPORT_SYMBOL(nf_unregister_net_hook);
+
+int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = nf_register_net_hook(net, ®[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ nf_unregister_net_hooks(net, reg, i);
+ return err;
+}
+EXPORT_SYMBOL(nf_register_net_hooks);
+
+void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n)
+{
+ while (n-- > 0)
+ nf_unregister_net_hook(net, ®[n]);
+}
+EXPORT_SYMBOL(nf_unregister_net_hooks);
+
+static LIST_HEAD(nf_hook_list);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ struct net *net, *last;
+ int ret;
+
+ rtnl_lock();
+ for_each_net(net) {
+ ret = nf_register_net_hook(net, reg);
+ if (ret && ret != -ENOENT)
+ goto rollback;
+ }
+ list_add_tail(®->list, &nf_hook_list);
+ rtnl_unlock();
+
+ return 0;
+rollback:
+ last = net;
+ for_each_net(net) {
+ if (net == last)
+ break;
+ nf_unregister_net_hook(net, reg);
+ }
+ rtnl_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(nf_register_hook);
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_del(®->list);
+ for_each_net(net)
+ nf_unregister_net_hook(net, reg);
+ rtnl_unlock();
}
EXPORT_SYMBOL(nf_unregister_hook);
@@ -295,8 +398,46 @@
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
+static int nf_register_hook_list(struct net *net)
+{
+ struct nf_hook_ops *elem;
+ int ret;
+
+ rtnl_lock();
+ list_for_each_entry(elem, &nf_hook_list, list) {
+ ret = nf_register_net_hook(net, elem);
+ if (ret && ret != -ENOENT)
+ goto out_undo;
+ }
+ rtnl_unlock();
+ return 0;
+
+out_undo:
+ list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
+ nf_unregister_net_hook(net, elem);
+ rtnl_unlock();
+ return ret;
+}
+
+static void nf_unregister_hook_list(struct net *net)
+{
+ struct nf_hook_ops *elem;
+
+ rtnl_lock();
+ list_for_each_entry(elem, &nf_hook_list, list)
+ nf_unregister_net_hook(net, elem);
+ rtnl_unlock();
+}
+
static int __net_init netfilter_net_init(struct net *net)
{
+ int i, h, ret;
+
+ for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+ INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+ }
+
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
net->proc_net);
@@ -307,11 +448,16 @@
return -ENOMEM;
}
#endif
- return 0;
+ ret = nf_register_hook_list(net);
+ if (ret)
+ remove_proc_entry("netfilter", net->proc_net);
+
+ return ret;
}
static void __net_exit netfilter_net_exit(struct net *net)
{
+ nf_unregister_hook_list(net);
remove_proc_entry("netfilter", net->proc_net);
}
@@ -322,12 +468,7 @@
int __init netfilter_init(void)
{
- int i, h, ret;
-
- for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
- INIT_LIST_HEAD(&nf_hooks[i][h]);
- }
+ int ret;
ret = register_pernet_subsys(&netfilter_net_ops);
if (ret < 0)
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 7e81416..a2ff7d7 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -137,7 +137,7 @@
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
{
- if (scheduler && scheduler->module)
+ if (scheduler)
module_put(scheduler->module);
}
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b45da90..6719773 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -42,6 +42,8 @@
"SHUTDOWN_SENT",
"SHUTDOWN_RECD",
"SHUTDOWN_ACK_SENT",
+ "HEARTBEAT_SENT",
+ "HEARTBEAT_ACKED",
};
#define SECS * HZ
@@ -57,6 +59,8 @@
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
+ [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
};
#define sNO SCTP_CONNTRACK_NONE
@@ -67,6 +71,8 @@
#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
+#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
#define sIV SCTP_CONNTRACK_MAX
/*
@@ -88,6 +94,10 @@
to that of the SHUTDOWN chunk.
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
+HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
+HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to
+ that of the HEARTBEAT chunk. Secondary connection is
+ established.
*/
/* TODO
@@ -97,36 +107,40 @@
- Check the error type in the reply dir before transitioning from
cookie echoed to closed.
- Sec 5.2.4 of RFC 2960
- - Multi Homing support.
+ - Full Multi Homing support.
*/
/* SCTP conntrack state transitions */
-static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA},
+/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}
},
{
/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA},
+/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA}
}
};
@@ -278,9 +292,16 @@
pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8;
break;
+ case SCTP_CID_HEARTBEAT:
+ pr_debug("SCTP_CID_HEARTBEAT");
+ i = 9;
+ break;
+ case SCTP_CID_HEARTBEAT_ACK:
+ pr_debug("SCTP_CID_HEARTBEAT_ACK");
+ i = 10;
+ break;
default:
- /* Other chunks like DATA, SACK, HEARTBEAT and
- its ACK do not cause a change in state */
+ /* Other chunks like DATA or SACK do not change the state */
pr_debug("Unknown chunk type, Will stay in %s\n",
sctp_conntrack_names[cur_state]);
return cur_state;
@@ -329,6 +350,8 @@
!test_bit(SCTP_CID_COOKIE_ECHO, map) &&
!test_bit(SCTP_CID_ABORT, map) &&
!test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
sh->vtag != ct->proto.sctp.vtag[dir]) {
pr_debug("Verification tag check failed\n");
goto out;
@@ -357,6 +380,16 @@
/* Sec 8.5.1 (D) */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
+ } else if (sch->type == SCTP_CID_HEARTBEAT ||
+ sch->type == SCTP_CID_HEARTBEAT_ACK) {
+ if (ct->proto.sctp.vtag[dir] == 0) {
+ pr_debug("Setting vtag %x for dir %d\n",
+ sh->vtag, dir);
+ ct->proto.sctp.vtag[dir] = sh->vtag;
+ } else if (sh->vtag != ct->proto.sctp.vtag[dir]) {
+ pr_debug("Verification tag check failed\n");
+ goto out_unlock;
+ }
}
old_state = ct->proto.sctp.state;
@@ -466,6 +499,10 @@
/* Sec 8.5.1 (A) */
return false;
}
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
+ pr_debug("Setting vtag %x for secondary conntrack\n",
+ sh->vtag);
+ ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
}
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
@@ -610,6 +647,8 @@
[CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
+ [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
+ [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
@@ -658,6 +697,18 @@
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_sent",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{ }
};
@@ -730,6 +781,8 @@
pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
+ pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT];
+ pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED];
#endif
return 0;
}
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 3992106..0655225 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -19,7 +19,7 @@
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
struct nf_hook_state *state, unsigned int queuenum);
-void nf_queue_nf_hook_drop(struct nf_hook_ops *ops);
+void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8a8b2ab..96777f9 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -105,21 +105,15 @@
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-void nf_queue_nf_hook_drop(struct nf_hook_ops *ops)
+void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
{
const struct nf_queue_handler *qh;
- struct net *net;
- rtnl_lock();
rcu_read_lock();
qh = rcu_dereference(queue_handler);
- if (qh) {
- for_each_net(net) {
- qh->nf_hook_drop(net, ops);
- }
- }
+ if (qh)
+ qh->nf_hook_drop(net, ops);
rcu_read_unlock();
- rtnl_unlock();
}
/*
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index cfe6368..4a41eb9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -130,20 +130,24 @@
int nft_register_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops)
{
+ struct net *net = read_pnet(&basechain->pnet);
+
if (basechain->flags & NFT_BASECHAIN_DISABLED)
return 0;
- return nf_register_hooks(basechain->ops, hook_nops);
+ return nf_register_net_hooks(net, basechain->ops, hook_nops);
}
EXPORT_SYMBOL_GPL(nft_register_basechain);
void nft_unregister_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops)
{
+ struct net *net = read_pnet(&basechain->pnet);
+
if (basechain->flags & NFT_BASECHAIN_DISABLED)
return;
- nf_unregister_hooks(basechain->ops, hook_nops);
+ nf_unregister_net_hooks(net, basechain->ops, hook_nops);
}
EXPORT_SYMBOL_GPL(nft_unregister_basechain);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f77bad4..05d0b03 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -114,7 +114,6 @@
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
const struct nft_chain *chain = ops->priv, *basechain = chain;
- const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
@@ -125,10 +124,6 @@
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
- /* Ignore chains that are not for the current network namespace */
- if (!net_eq(net, chain_net))
- return NF_ACCEPT;
-
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d324fe7..9b42b5e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -67,9 +67,6 @@
[NFPROTO_IPV6] = "ip6",
};
-/* Allow this many total (re)entries. */
-static const unsigned int xt_jumpstack_multiplier = 2;
-
/* Registration hooks for targets. */
int xt_register_target(struct xt_target *target)
{
@@ -688,8 +685,6 @@
kvfree(info->jumpstack);
}
- free_percpu(info->stackptr);
-
kvfree(info);
}
EXPORT_SYMBOL(xt_free_table_info);
@@ -732,15 +727,14 @@
DEFINE_PER_CPU(seqcount_t, xt_recseq);
EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+struct static_key xt_tee_enabled __read_mostly;
+EXPORT_SYMBOL_GPL(xt_tee_enabled);
+
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
int cpu;
- i->stackptr = alloc_percpu(unsigned int);
- if (i->stackptr == NULL)
- return -ENOMEM;
-
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)
i->jumpstack = vzalloc(size);
@@ -749,8 +743,21 @@
if (i->jumpstack == NULL)
return -ENOMEM;
- i->stacksize *= xt_jumpstack_multiplier;
- size = sizeof(void *) * i->stacksize;
+ /* ruleset without jumps -- no stack needed */
+ if (i->stacksize == 0)
+ return 0;
+
+ /* Jumpstack needs to be able to record two full callchains, one
+ * from the first rule set traversal, plus one table reentrancy
+ * via -j TEE without clobbering the callchain that brought us to
+ * TEE target.
+ *
+ * This is done by allocating two jumpstacks per cpu, on reentry
+ * the upper half of the stack is used.
+ *
+ * see the jumpstack setup in ipt_do_table() for more details.
+ */
+ size = sizeof(void *) * i->stacksize * 2u;
for_each_possible_cpu(cpu) {
if (size > PAGE_SIZE)
i->jumpstack[cpu] = vmalloc_node(size,
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index a747eb4..c5d6556 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -37,7 +37,6 @@
};
static const union nf_inet_addr tee_zero_address;
-static DEFINE_PER_CPU(bool, tee_active);
static struct net *pick_net(struct sk_buff *skb)
{
@@ -88,7 +87,7 @@
const struct xt_tee_tginfo *info = par->targinfo;
struct iphdr *iph;
- if (__this_cpu_read(tee_active))
+ if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE;
/*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for
@@ -125,9 +124,9 @@
ip_send_check(iph);
if (tee_tg_route4(skb, info)) {
- __this_cpu_write(tee_active, true);
+ __this_cpu_write(nf_skb_duplicated, true);
ip_local_out(skb);
- __this_cpu_write(tee_active, false);
+ __this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
@@ -170,7 +169,7 @@
{
const struct xt_tee_tginfo *info = par->targinfo;
- if (__this_cpu_read(tee_active))
+ if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
@@ -188,9 +187,9 @@
--iph->hop_limit;
}
if (tee_tg_route6(skb, info)) {
- __this_cpu_write(tee_active, true);
+ __this_cpu_write(nf_skb_duplicated, true);
ip6_local_out(skb);
- __this_cpu_write(tee_active, false);
+ __this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
}
@@ -252,6 +251,7 @@
} else
info->priv = NULL;
+ static_key_slow_inc(&xt_tee_enabled);
return 0;
}
@@ -263,6 +263,7 @@
unregister_netdevice_notifier(&info->priv->notifier);
kfree(info->priv);
}
+ static_key_slow_dec(&xt_tee_enabled);
}
static struct xt_target tee_tg_reg[] __read_mostly = {