Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next
Jeff Kirsher says:
====================
Intel Wired LAN Driver Updates 2015-03-09
This series contains updates to i40e and i40evf.
Greg cleans up some "hello world" strings which were left around from
early development.
Shannon modifies the drive to make sure the sizeof() calls are taking
the size of the actual struct that we care about. Also updates the
NVMUpdate read/write so that it is less noisy when logging. This was
because the NVMUpdate tool does not necessarily know the ReadOnly map of
the current NVM image, and must try reading and writing words that may be
protected. This generates an error out of the Firmware request that the
driver logs. Unfortunately, this ended up spitting out hundreds of
bogus read/write error messages. If a user wants the noisy logging,
the change can be overridden by enabling the NVM update debugging.
Mitch fixes a possible deadlock issue where if a reset occurred when the
netdev is closed, the reset task will hang in napi_disable. Added
ethtool RSS support as suggested by Ben Hutchings.
Jesse fixes a bug introduced in the force writeback code, where the
interrupt rate was set to 0 (maximum) by accident. The driver must
correctly set the NOITR fields to avoid IT update as a side effect
of triggering the software interrupt.
I provided a simple cleanup to make the use of PF/VF consistent, which
was reported by Joe Perches.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c
index 7faad90..5f37eab 100644
--- a/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/iwlwifi/mvm/debugfs-vif.c
@@ -627,7 +627,6 @@
return;
mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir);
- mvmvif->mvm = mvm;
if (!mvmvif->dbgfs_dir) {
IWL_ERR(mvm, "Failed to create debugfs directory under %s\n",
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 5a5d5c8..2042554 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -1346,6 +1346,8 @@
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
int ret;
+ mvmvif->mvm = mvm;
+
/*
* make sure D0i3 exit is completed, otherwise a target access
* during tx queue configuration could be done when still in
diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h
index f4ecd1b..e10172d 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h
@@ -356,6 +356,7 @@
* average signal of beacons retrieved from the firmware
*/
struct iwl_mvm_vif {
+ struct iwl_mvm *mvm;
u16 id;
u16 color;
u8 ap_sta_id;
@@ -418,7 +419,6 @@
#endif
#ifdef CONFIG_IWLWIFI_DEBUGFS
- struct iwl_mvm *mvm;
struct dentry *dbgfs_dir;
struct dentry *dbgfs_slink;
struct iwl_dbgfs_pm dbgfs_pm;
diff --git a/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c
index 1bd10ed..192b74b 100644
--- a/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/iwlwifi/mvm/phy-ctxt.c
@@ -175,8 +175,10 @@
cmd->rxchain_info |= cpu_to_le32(idle_cnt << PHY_RX_CHAIN_CNT_POS);
cmd->rxchain_info |= cpu_to_le32(active_cnt <<
PHY_RX_CHAIN_MIMO_CNT_POS);
+#ifdef CONFIG_IWLWIFI_DEBUGFS
if (unlikely(mvm->dbgfs_rx_phyinfo))
cmd->rxchain_info = cpu_to_le32(mvm->dbgfs_rx_phyinfo);
+#endif
cmd->txchain_info = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm));
}
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index c755e49..bb39113 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -36,44 +36,6 @@
}
}
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
- if (skb->nf_bridge->mask & BRNF_8021Q)
- skb->protocol = htons(ETH_P_8021Q);
- else if (skb->nf_bridge->mask & BRNF_PPPoE)
- skb->protocol = htons(ETH_P_PPP_SES);
-}
-
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- *
- * Only used in br_forward.c
- */
-static inline int nf_bridge_copy_header(struct sk_buff *skb)
-{
- int err;
- unsigned int header_size;
-
- nf_bridge_update_protocol(skb);
- header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow_head(skb, header_size);
- if (err)
- return err;
-
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return 0;
-}
-
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
- return nf_bridge_copy_header(skb);
- return 0;
-}
-
static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
@@ -82,18 +44,6 @@
}
int br_handle_frame_finish(struct sk_buff *skb);
-/* Only used in br_device.c */
-static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
-{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
- skb_pull(skb, ETH_HLEN);
- nf_bridge->mask ^= BRNF_BRIDGED_DNAT;
- skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
- skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
- skb->dev = nf_bridge->physindev;
- return br_handle_frame_finish(skb);
-}
/* This is called by the IP fragmenting code and it ensures there is
* enough room for the encapsulating header (if there is one). */
@@ -119,7 +69,6 @@
}
#else
-#define nf_bridge_maybe_copy_header(skb) (0)
#define nf_bridge_pad(skb) (0)
#define br_drop_fake_rtable(skb) do { } while (0)
#endif /* CONFIG_BRIDGE_NETFILTER */
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 03e928a..8641275 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -5,11 +5,7 @@
#include <net/ip.h>
#include <net/icmp.h>
-static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
-{
- icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
-}
-
+void nf_send_unreach(struct sk_buff *skb_in, int code, int hook);
void nf_send_reset(struct sk_buff *oldskb, int hook);
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 23216d4..0ae445d 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -3,15 +3,8 @@
#include <linux/icmpv6.h>
-static inline void
-nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
- unsigned int hooknum)
-{
- if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
- skb_in->dev = net->loopback_dev;
-
- icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
-}
+void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
+ unsigned int hooknum);
void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9eaaa78..a143aca 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -393,74 +393,6 @@
__attribute__((aligned(__alignof__(struct nft_expr))));
};
-/**
- * struct nft_trans - nf_tables object update in transaction
- *
- * @list: used internally
- * @msg_type: message type
- * @ctx: transaction context
- * @data: internal information related to the transaction
- */
-struct nft_trans {
- struct list_head list;
- int msg_type;
- struct nft_ctx ctx;
- char data[0];
-};
-
-struct nft_trans_rule {
- struct nft_rule *rule;
-};
-
-#define nft_trans_rule(trans) \
- (((struct nft_trans_rule *)trans->data)->rule)
-
-struct nft_trans_set {
- struct nft_set *set;
- u32 set_id;
-};
-
-#define nft_trans_set(trans) \
- (((struct nft_trans_set *)trans->data)->set)
-#define nft_trans_set_id(trans) \
- (((struct nft_trans_set *)trans->data)->set_id)
-
-struct nft_trans_chain {
- bool update;
- char name[NFT_CHAIN_MAXNAMELEN];
- struct nft_stats __percpu *stats;
- u8 policy;
-};
-
-#define nft_trans_chain_update(trans) \
- (((struct nft_trans_chain *)trans->data)->update)
-#define nft_trans_chain_name(trans) \
- (((struct nft_trans_chain *)trans->data)->name)
-#define nft_trans_chain_stats(trans) \
- (((struct nft_trans_chain *)trans->data)->stats)
-#define nft_trans_chain_policy(trans) \
- (((struct nft_trans_chain *)trans->data)->policy)
-
-struct nft_trans_table {
- bool update;
- bool enable;
-};
-
-#define nft_trans_table_update(trans) \
- (((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans) \
- (((struct nft_trans_table *)trans->data)->enable)
-
-struct nft_trans_elem {
- struct nft_set *set;
- struct nft_set_elem elem;
-};
-
-#define nft_trans_elem_set(trans) \
- (((struct nft_trans_elem *)trans->data)->set)
-#define nft_trans_elem(trans) \
- (((struct nft_trans_elem *)trans->data)->elem)
-
static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
{
return (struct nft_expr *)&rule->data[0];
@@ -528,6 +460,25 @@
NFT_CHAIN_T_MAX
};
+/**
+ * struct nf_chain_type - nf_tables chain type info
+ *
+ * @name: name of the type
+ * @type: numeric identifier
+ * @family: address family
+ * @owner: module owner
+ * @hook_mask: mask of valid hooks
+ * @hooks: hookfn overrides
+ */
+struct nf_chain_type {
+ const char *name;
+ enum nft_chain_type type;
+ int family;
+ struct module *owner;
+ unsigned int hook_mask;
+ nf_hookfn *hooks[NF_MAX_HOOKS];
+};
+
int nft_chain_validate_dependency(const struct nft_chain *chain,
enum nft_chain_type type);
int nft_chain_validate_hooks(const struct nft_chain *chain,
@@ -584,7 +535,7 @@
u64 hgenerator;
u32 use;
u16 flags;
- char name[];
+ char name[NFT_TABLE_MAXNAMELEN];
};
/**
@@ -614,25 +565,6 @@
int nft_register_afinfo(struct net *, struct nft_af_info *);
void nft_unregister_afinfo(struct nft_af_info *);
-/**
- * struct nf_chain_type - nf_tables chain type info
- *
- * @name: name of the type
- * @type: numeric identifier
- * @family: address family
- * @owner: module owner
- * @hook_mask: mask of valid hooks
- * @hooks: hookfn overrides
- */
-struct nf_chain_type {
- const char *name;
- enum nft_chain_type type;
- int family;
- struct module *owner;
- unsigned int hook_mask;
- nf_hookfn *hooks[NF_MAX_HOOKS];
-};
-
int nft_register_chain_type(const struct nf_chain_type *);
void nft_unregister_chain_type(const struct nf_chain_type *);
@@ -657,4 +589,72 @@
#define MODULE_ALIAS_NFT_SET() \
MODULE_ALIAS("nft-set")
+/**
+ * struct nft_trans - nf_tables object update in transaction
+ *
+ * @list: used internally
+ * @msg_type: message type
+ * @ctx: transaction context
+ * @data: internal information related to the transaction
+ */
+struct nft_trans {
+ struct list_head list;
+ int msg_type;
+ struct nft_ctx ctx;
+ char data[0];
+};
+
+struct nft_trans_rule {
+ struct nft_rule *rule;
+};
+
+#define nft_trans_rule(trans) \
+ (((struct nft_trans_rule *)trans->data)->rule)
+
+struct nft_trans_set {
+ struct nft_set *set;
+ u32 set_id;
+};
+
+#define nft_trans_set(trans) \
+ (((struct nft_trans_set *)trans->data)->set)
+#define nft_trans_set_id(trans) \
+ (((struct nft_trans_set *)trans->data)->set_id)
+
+struct nft_trans_chain {
+ bool update;
+ char name[NFT_CHAIN_MAXNAMELEN];
+ struct nft_stats __percpu *stats;
+ u8 policy;
+};
+
+#define nft_trans_chain_update(trans) \
+ (((struct nft_trans_chain *)trans->data)->update)
+#define nft_trans_chain_name(trans) \
+ (((struct nft_trans_chain *)trans->data)->name)
+#define nft_trans_chain_stats(trans) \
+ (((struct nft_trans_chain *)trans->data)->stats)
+#define nft_trans_chain_policy(trans) \
+ (((struct nft_trans_chain *)trans->data)->policy)
+
+struct nft_trans_table {
+ bool update;
+ bool enable;
+};
+
+#define nft_trans_table_update(trans) \
+ (((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_table_enable(trans) \
+ (((struct nft_trans_table *)trans->data)->enable)
+
+struct nft_trans_elem {
+ struct nft_set *set;
+ struct nft_set_elem elem;
+};
+
+#define nft_trans_elem_set(trans) \
+ (((struct nft_trans_elem *)trans->data)->set)
+#define nft_trans_elem(trans) \
+ (((struct nft_trans_elem *)trans->data)->elem)
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index c24060e..4d6597a 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -9,6 +9,7 @@
struct netns_xt {
struct list_head tables[NFPROTO_NUMPROTO];
bool notrack_deprecated_warning;
+ bool clusterip_deprecated_warning;
#if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
struct ebt_table *broute_table;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c605d30..6d778ef 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -213,7 +213,7 @@
const struct tcf_proto *,
struct tcf_result *);
int (*init)(struct tcf_proto*);
- void (*destroy)(struct tcf_proto*);
+ bool (*destroy)(struct tcf_proto*, bool);
unsigned long (*get)(struct tcf_proto*, u32 handle);
int (*change)(struct net *net, struct sk_buff *,
@@ -399,7 +399,7 @@
const struct Qdisc_ops *ops, u32 parentid);
void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
-void tcf_destroy(struct tcf_proto *tp);
+bool tcf_destroy(struct tcf_proto *tp, bool force);
void tcf_destroy_chain(struct tcf_proto __rcu **fl);
/* Reset all TX qdiscs greater then index of a device. */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 832bc46..b978393 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_NF_TABLES_H
#define _LINUX_NF_TABLES_H
+#define NFT_TABLE_MAXNAMELEN 32
#define NFT_CHAIN_MAXNAMELEN 32
#define NFT_USERDATA_MAXLEN 256
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ffd379d..294cbcc 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,13 +36,10 @@
u16 vid = 0;
rcu_read_lock();
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
- br_nf_pre_routing_finish_bridge_slow(skb);
+ if (br_nf_prerouting_finish_bridge(skb)) {
rcu_read_unlock();
return NETDEV_TX_OK;
}
-#endif
u64_stats_update_begin(&brstats->syncp);
brstats->tx_packets++;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 1238fab..3304a54 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -37,9 +37,7 @@
int br_dev_queue_push_xmit(struct sk_buff *skb)
{
- /* ip_fragment doesn't copy the MAC header */
- if (nf_bridge_maybe_copy_header(skb) ||
- !is_skb_forwardable(skb->dev, skb)) {
+ if (!is_skb_forwardable(skb->dev, skb)) {
kfree_skb(skb);
} else {
skb_push(skb, ETH_HLEN);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0ee453f..a8361c7 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -239,6 +239,14 @@
return -1;
}
+static void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+ if (skb->nf_bridge->mask & BRNF_8021Q)
+ skb->protocol = htons(ETH_P_8021Q);
+ else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ skb->protocol = htons(ETH_P_PPP_SES);
+}
+
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
* bridge PRE_ROUTING hook. */
@@ -764,23 +772,53 @@
}
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+static bool nf_bridge_copy_header(struct sk_buff *skb)
+{
+ int err;
+ unsigned int header_size;
+
+ nf_bridge_update_protocol(skb);
+ header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+ err = skb_cow_head(skb, header_size);
+ if (err)
+ return false;
+
+ skb_copy_to_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+ __skb_push(skb, nf_bridge_encap_header_len(skb));
+ return true;
+}
+
+static int br_nf_push_frag_xmit(struct sk_buff *skb)
+{
+ if (!nf_bridge_copy_header(skb)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return br_dev_queue_push_xmit(skb);
+}
+
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
int ret;
int frag_max_size;
+ unsigned int mtu_reserved;
+ if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
+ return br_dev_queue_push_xmit(skb);
+
+ mtu_reserved = nf_bridge_mtu_reduction(skb);
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
- if (skb->protocol == htons(ETH_P_IP) &&
- skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
- !skb_is_gso(skb)) {
+ if (skb->len + mtu_reserved > skb->dev->mtu) {
frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
if (br_parse_ip_options(skb))
/* Drop invalid packet */
return NF_DROP;
IPCB(skb)->frag_max_size = frag_max_size;
- ret = ip_fragment(skb, br_dev_queue_push_xmit);
+ ret = ip_fragment(skb, br_nf_push_frag_xmit);
} else
ret = br_dev_queue_push_xmit(skb);
@@ -854,6 +892,38 @@
return NF_ACCEPT;
}
+/* This is called when br_netfilter has called into iptables/netfilter,
+ * and DNAT has taken place on a bridge-forwarded packet.
+ *
+ * neigh->output has created a new MAC header, with local br0 MAC
+ * as saddr.
+ *
+ * This restores the original MAC saddr of the bridged packet
+ * before invoking bridge forward logic to transmit the packet.
+ */
+static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
+{
+ struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+ skb_pull(skb, ETH_HLEN);
+ nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+
+ skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
+ skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+ skb->dev = nf_bridge->physindev;
+ br_handle_frame_finish(skb);
+}
+
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+ if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+ br_nf_pre_routing_finish_bridge_slow(skb);
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_nf_prerouting_finish_bridge);
+
void br_netfilter_enable(void)
{
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c32e279..f0a0438 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -765,10 +765,15 @@
/* br_netfilter.c */
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb);
int br_nf_core_init(void);
void br_nf_core_fini(void);
void br_netfilter_rtable_init(struct net_bridge *);
#else
+static inline int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+ return 0;
+}
static inline int br_nf_core_init(void) { return 0; }
static inline void br_nf_core_fini(void) {}
#define br_netfilter_rtable_init(x)
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 3244aea..5c6c965 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -21,6 +21,7 @@
#include <net/ip.h>
#include <net/ip6_checksum.h>
#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv6.h>
#include "../br_private.h"
static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
@@ -36,7 +37,12 @@
skb_pull(nskb, ETH_HLEN);
}
-static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook)
+/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
+ * or the bridge port (NF_BRIDGE PREROUTING).
+ */
+static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook)
{
struct sk_buff *nskb;
struct iphdr *niph;
@@ -65,11 +71,12 @@
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
}
-static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook,
- u8 code)
+static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook, u8 code)
{
struct sk_buff *nskb;
struct iphdr *niph;
@@ -77,8 +84,9 @@
unsigned int len;
void *payload;
__wsum csum;
+ u8 proto;
- if (!nft_bridge_iphdr_validate(oldskb))
+ if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
return;
/* IP header checks: fragment. */
@@ -91,7 +99,17 @@
if (!pskb_may_pull(oldskb, len))
return;
- if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0))
+ if (pskb_trim_rcsum(oldskb, htons(ip_hdr(oldskb)->tot_len)))
+ return;
+
+ if (ip_hdr(oldskb)->protocol == IPPROTO_TCP ||
+ ip_hdr(oldskb)->protocol == IPPROTO_UDP)
+ proto = ip_hdr(oldskb)->protocol;
+ else
+ proto = 0;
+
+ if (!skb_csum_unnecessary(oldskb) &&
+ nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
return;
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) +
@@ -120,11 +138,13 @@
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
}
static void nft_reject_br_send_v6_tcp_reset(struct net *net,
- struct sk_buff *oldskb, int hook)
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook)
{
struct sk_buff *nskb;
const struct tcphdr *oth;
@@ -152,12 +172,37 @@
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
+}
+
+static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int thoff;
+ __be16 fo;
+ u8 proto = ip6h->nexthdr;
+
+ if (skb->csum_bad)
+ return false;
+
+ if (skb_csum_unnecessary(skb))
+ return true;
+
+ if (ip6h->payload_len &&
+ pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
+ return false;
+
+ thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+ if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+ return false;
+
+ return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
}
static void nft_reject_br_send_v6_unreach(struct net *net,
- struct sk_buff *oldskb, int hook,
- u8 code)
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook, u8 code)
{
struct sk_buff *nskb;
struct ipv6hdr *nip6h;
@@ -176,6 +221,9 @@
if (!pskb_may_pull(oldskb, len))
return;
+ if (!reject6_br_csum_ok(oldskb, hook))
+ return;
+
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) +
LL_MAX_HEADER + len, GFP_ATOMIC);
if (!nskb)
@@ -205,7 +253,7 @@
nft_reject_br_push_etherhdr(oldskb, nskb);
- br_deliver(br_port_get_rcu(oldskb->dev), nskb);
+ br_deliver(br_port_get_rcu(dev), nskb);
}
static void nft_reject_bridge_eval(const struct nft_expr *expr,
@@ -224,16 +272,16 @@
case htons(ETH_P_IP):
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nft_reject_br_send_v4_unreach(pkt->skb,
+ nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
pkt->ops->hooknum,
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
- nft_reject_br_send_v4_tcp_reset(pkt->skb,
+ nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in,
pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
- nft_reject_br_send_v4_unreach(pkt->skb,
+ nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
pkt->ops->hooknum,
nft_reject_icmp_code(priv->icmp_code));
break;
@@ -242,16 +290,16 @@
case htons(ETH_P_IPV6):
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nft_reject_br_send_v6_unreach(net, pkt->skb,
+ nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
pkt->ops->hooknum,
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
- nft_reject_br_send_v6_tcp_reset(net, pkt->skb,
+ nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in,
pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
- nft_reject_br_send_v6_unreach(net, pkt->skb,
+ nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
pkt->ops->hooknum,
nft_reject_icmpv6_code(priv->icmp_code));
break;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 59f883d..fb20f36 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,37 @@
If unsure, say Y.
+if NF_TABLES
+
+config NF_TABLES_IPV4
+ tristate "IPv4 nf_tables support"
+ help
+ This option enables the IPv4 support for nf_tables.
+
+if NF_TABLES_IPV4
+
+config NFT_CHAIN_ROUTE_IPV4
+ tristate "IPv4 nf_tables route chain support"
+ help
+ This option enables the "route" chain for IPv4 in nf_tables. This
+ chain type is used to force packet re-routing after mangling header
+ fields such as the source, destination, type of service and
+ the packet mark.
+
+config NFT_REJECT_IPV4
+ select NF_REJECT_IPV4
+ default NFT_REJECT
+ tristate
+
+endif # NF_TABLES_IPV4
+
+config NF_TABLES_ARP
+ tristate "ARP nf_tables support"
+ help
+ This option enables the ARP support for nf_tables.
+
+endif # NF_TABLES
+
config NF_LOG_ARP
tristate "ARP packet logging"
default m if NETFILTER_ADVANCED=n
@@ -46,37 +77,10 @@
default m if NETFILTER_ADVANCED=n
select NF_LOG_COMMON
-config NF_TABLES_IPV4
- depends on NF_TABLES
- tristate "IPv4 nf_tables support"
- help
- This option enables the IPv4 support for nf_tables.
-
-config NFT_CHAIN_ROUTE_IPV4
- depends on NF_TABLES_IPV4
- tristate "IPv4 nf_tables route chain support"
- help
- This option enables the "route" chain for IPv4 in nf_tables. This
- chain type is used to force packet re-routing after mangling header
- fields such as the source, destination, type of service and
- the packet mark.
-
config NF_REJECT_IPV4
tristate "IPv4 packet rejection"
default m if NETFILTER_ADVANCED=n
-config NFT_REJECT_IPV4
- depends on NF_TABLES_IPV4
- select NF_REJECT_IPV4
- default NFT_REJECT
- tristate
-
-config NF_TABLES_ARP
- depends on NF_TABLES
- tristate "ARP nf_tables support"
- help
- This option enables the ARP support for nf_tables.
-
config NF_NAT_IPV4
tristate "IPv4 NAT"
depends on NF_CONNTRACK_IPV4
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e90f83a..f75e9df 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -418,6 +418,13 @@
if (ret < 0)
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
+
+ if (!par->net->xt.clusterip_deprecated_warning) {
+ pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
+ "use xt_cluster instead\n");
+ par->net->xt.clusterip_deprecated_warning = true;
+ }
+
return ret;
}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 8f48f55..87907d4 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -34,31 +34,32 @@
reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipt_reject_info *reject = par->targinfo;
+ int hook = par->hooknum;
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
- nf_send_unreach(skb, ICMP_NET_UNREACH);
+ nf_send_unreach(skb, ICMP_NET_UNREACH, hook);
break;
case IPT_ICMP_HOST_UNREACHABLE:
- nf_send_unreach(skb, ICMP_HOST_UNREACH);
+ nf_send_unreach(skb, ICMP_HOST_UNREACH, hook);
break;
case IPT_ICMP_PROT_UNREACHABLE:
- nf_send_unreach(skb, ICMP_PROT_UNREACH);
+ nf_send_unreach(skb, ICMP_PROT_UNREACH, hook);
break;
case IPT_ICMP_PORT_UNREACHABLE:
- nf_send_unreach(skb, ICMP_PORT_UNREACH);
+ nf_send_unreach(skb, ICMP_PORT_UNREACH, hook);
break;
case IPT_ICMP_NET_PROHIBITED:
- nf_send_unreach(skb, ICMP_NET_ANO);
+ nf_send_unreach(skb, ICMP_NET_ANO, hook);
break;
case IPT_ICMP_HOST_PROHIBITED:
- nf_send_unreach(skb, ICMP_HOST_ANO);
+ nf_send_unreach(skb, ICMP_HOST_ANO, hook);
break;
case IPT_ICMP_ADMIN_PROHIBITED:
- nf_send_unreach(skb, ICMP_PKT_FILTERED);
+ nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
break;
case IPT_TCP_RESET:
- nf_send_reset(skb, par->hooknum);
+ nf_send_reset(skb, hook);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 536da7b..b7405eb 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -164,4 +164,27 @@
}
EXPORT_SYMBOL_GPL(nf_send_reset);
+void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
+{
+ struct iphdr *iph = ip_hdr(skb_in);
+ u8 proto;
+
+ if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+ return;
+
+ if (skb_csum_unnecessary(skb_in)) {
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+ return;
+ }
+
+ if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
+ proto = iph->protocol;
+ else
+ proto = 0;
+
+ if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach);
+
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index d729542..16a5d4d 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,7 +27,8 @@
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code);
+ nf_send_unreach(pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
break;
case NFT_REJECT_TCP_RST:
nf_send_reset(pkt->skb, pkt->ops->hooknum);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a069822..ca69983 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,14 +25,16 @@
To compile it as a module, choose M here. If unsure, say N.
+if NF_TABLES
+
config NF_TABLES_IPV6
- depends on NF_TABLES
tristate "IPv6 nf_tables support"
help
This option enables the IPv6 support for nf_tables.
+if NF_TABLES_IPV6
+
config NFT_CHAIN_ROUTE_IPV6
- depends on NF_TABLES_IPV6
tristate "IPv6 nf_tables route chain support"
help
This option enables the "route" chain for IPv6 in nf_tables. This
@@ -40,16 +42,18 @@
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
-config NF_REJECT_IPV6
- tristate "IPv6 packet rejection"
- default m if NETFILTER_ADVANCED=n
-
config NFT_REJECT_IPV6
- depends on NF_TABLES_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
tristate
+endif # NF_TABLES_IPV6
+endif # NF_TABLES
+
+config NF_REJECT_IPV6
+ tristate "IPv6 packet rejection"
+ default m if NETFILTER_ADVANCED=n
+
config NF_LOG_IPV6
tristate "IPv6 packet logging"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index d05b364..68e0bb4 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -208,4 +208,39 @@
}
EXPORT_SYMBOL_GPL(nf_send_reset6);
+static bool reject6_csum_ok(struct sk_buff *skb, int hook)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int thoff;
+ __be16 fo;
+ u8 proto;
+
+ if (skb->csum_bad)
+ return false;
+
+ if (skb_csum_unnecessary(skb))
+ return true;
+
+ proto = ip6h->nexthdr;
+ thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+
+ if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+ return false;
+
+ return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
+}
+
+void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
+ unsigned char code, unsigned int hooknum)
+{
+ if (!reject6_csum_ok(skb_in, hooknum))
+ return;
+
+ if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
+ skb_in->dev = net->loopback_dev;
+
+ icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach6);
+
MODULE_LICENSE("GPL");
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index f4286ee..dfca485 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -7,9 +7,9 @@
default n
---help---
MultiProtocol Label Switching routes packets through logical
- circuits. Originally conceved as a way of routing packets at
+ circuits. Originally conceived as a way of routing packets at
hardware speeds (before hardware was capable of routing ipv4 packets),
- MPLS remains as simple way of making tunnels.
+ MPLS remains a simple way of making tunnels.
If you have not heard of MPLS you probably want to say N here.
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c68c3b4..971cd75 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -438,8 +438,10 @@
To compile it as a module, choose M here.
+if NF_TABLES
+
config NF_TABLES_INET
- depends on NF_TABLES && IPV6
+ depends on IPV6
select NF_TABLES_IPV4
select NF_TABLES_IPV6
tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support"
@@ -447,21 +449,18 @@
This option enables support for a mixed IPv4/IPv6 "inet" table.
config NFT_EXTHDR
- depends on NF_TABLES
tristate "Netfilter nf_tables IPv6 exthdr module"
help
This option adds the "exthdr" expression that you can use to match
IPv6 extension headers.
config NFT_META
- depends on NF_TABLES
tristate "Netfilter nf_tables meta module"
help
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
config NFT_CT
- depends on NF_TABLES
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
@@ -469,42 +468,36 @@
connection tracking information such as the flow state.
config NFT_RBTREE
- depends on NF_TABLES
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
config NFT_HASH
- depends on NF_TABLES
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
config NFT_COUNTER
- depends on NF_TABLES
tristate "Netfilter nf_tables counter module"
help
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
config NFT_LOG
- depends on NF_TABLES
tristate "Netfilter nf_tables log module"
help
This option adds the "log" expression that you can use to log
packets matching some criteria.
config NFT_LIMIT
- depends on NF_TABLES
tristate "Netfilter nf_tables limit module"
help
This option adds the "limit" expression that you can use to
ratelimit rule matchings.
config NFT_MASQ
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables masquerade support"
@@ -513,7 +506,6 @@
to perform NAT in the masquerade flavour.
config NFT_REDIR
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables redirect support"
@@ -522,7 +514,6 @@
to perform NAT in the redirect flavour.
config NFT_NAT
- depends on NF_TABLES
depends on NF_CONNTRACK
select NF_NAT
tristate "Netfilter nf_tables nat module"
@@ -531,7 +522,6 @@
typical Network Address Translation (NAT) packet transformations.
config NFT_QUEUE
- depends on NF_TABLES
depends on NETFILTER_XTABLES
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
@@ -540,7 +530,6 @@
infrastructure (also known as NFQUEUE) from nftables.
config NFT_REJECT
- depends on NF_TABLES
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
help
@@ -554,7 +543,6 @@
tristate
config NFT_COMPAT
- depends on NF_TABLES
depends on NETFILTER_XTABLES
tristate "Netfilter x_tables over nf_tables module"
help
@@ -562,6 +550,8 @@
x_tables match/target extensions over the nf_tables
framework.
+endif # NF_TABLES
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 199fd0f..284b20c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -401,7 +401,8 @@
}
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
- [NFTA_TABLE_NAME] = { .type = NLA_STRING },
+ [NFTA_TABLE_NAME] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
@@ -686,13 +687,13 @@
if (!try_module_get(afi->owner))
return -EAFNOSUPPORT;
- table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
if (table == NULL) {
module_put(afi->owner);
return -ENOMEM;
}
- nla_strlcpy(table->name, name, nla_len(name));
+ nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3b90eb2..77165bf 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -21,6 +21,48 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
+enum nft_trace {
+ NFT_TRACE_RULE,
+ NFT_TRACE_RETURN,
+ NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+ [NFT_TRACE_RULE] = "rule",
+ [NFT_TRACE_RETURN] = "return",
+ [NFT_TRACE_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+static void __nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
+}
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ if (unlikely(pkt->skb->nf_trace))
+ __nft_trace_packet(pkt, chain, rulenum, type);
+}
+
static void nft_cmp_fast_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1])
{
@@ -66,40 +108,6 @@
int rulenum;
};
-enum nft_trace {
- NFT_TRACE_RULE,
- NFT_TRACE_RETURN,
- NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
- [NFT_TRACE_RULE] = "rule",
- [NFT_TRACE_RETURN] = "return",
- [NFT_TRACE_POLICY] = "policy",
-};
-
-static struct nf_loginfo trace_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 4,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static void nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
-{
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
- nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
- pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
- chain->table->name, chain->name, comments[type],
- rulenum);
-}
-
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
@@ -146,8 +154,7 @@
data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
continue;
}
break;
@@ -157,37 +164,28 @@
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
return data[NFT_REG_VERDICT].verdict;
}
switch (data[NFT_REG_VERDICT].verdict) {
case NFT_JUMP:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rule = rule;
jumpstack[stackptr].rulenum = rulenum;
stackptr++;
- chain = data[NFT_REG_VERDICT].chain;
- goto do_chain;
+ /* fall through */
case NFT_GOTO:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
chain = data[NFT_REG_VERDICT].chain;
goto do_chain;
- case NFT_RETURN:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
- break;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN)))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+ rulenum++;
+ /* fall through */
+ case NFT_RETURN:
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
break;
default:
WARN_ON(1);
@@ -201,8 +199,7 @@
goto next_rule;
}
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+ nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 7b5f9d5..9287711 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -28,14 +28,16 @@
case NFPROTO_IPV4:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code);
+ nf_send_unreach(pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
break;
case NFT_REJECT_TCP_RST:
nf_send_reset(pkt->skb, pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
nf_send_unreach(pkt->skb,
- nft_reject_icmp_code(priv->icmp_code));
+ nft_reject_icmp_code(priv->icmp_code),
+ pkt->ops->hooknum);
break;
}
break;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index baef987..8b0470e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -286,7 +286,7 @@
RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
err = 0;
goto errout;
}
@@ -301,14 +301,20 @@
err = -EEXIST;
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
goto errout;
}
break;
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
- if (err == 0)
+ if (err == 0) {
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+ if (tcf_destroy(tp, false)) {
+ struct tcf_proto *next = rtnl_dereference(tp->next);
+
+ RCU_INIT_POINTER(*back, next);
+ }
+ }
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
@@ -329,7 +335,7 @@
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
} else {
if (tp_created)
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index fc399db..0b8c3ac 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -96,11 +96,14 @@
kfree(f);
}
-static void basic_destroy(struct tcf_proto *tp)
+static bool basic_destroy(struct tcf_proto *tp, bool force)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
+ if (!force && !list_empty(&head->flist))
+ return false;
+
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
@@ -108,6 +111,7 @@
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6f7ed8f..243c9f2 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -137,11 +137,14 @@
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
+ if (!force && !list_empty(&head->plist))
+ return false;
+
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
@@ -150,6 +153,7 @@
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221697a..ea611b21 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -143,14 +143,18 @@
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
+ if (!force)
+ return false;
+
if (head) {
RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
}
+ return true;
}
static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 4614103..a620c4e 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -557,17 +557,21 @@
return 0;
}
-static void flow_destroy(struct tcf_proto *tp)
+static bool flow_destroy(struct tcf_proto *tp, bool force)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
+ if (!force && !list_empty(&head->filters))
+ return false;
+
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 9d9aa3e..715e01e 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -133,14 +133,20 @@
kfree(f);
}
-static void fw_destroy(struct tcf_proto *tp)
+static bool fw_destroy(struct tcf_proto *tp, bool force)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
if (head == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h = 0; h < HTSIZE; h++)
+ if (rcu_access_pointer(head->ht[h]))
+ return false;
+ }
for (h = 0; h < HTSIZE; h++) {
while ((f = rtnl_dereference(head->ht[h])) != NULL) {
@@ -152,6 +158,7 @@
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index bb8a602..08a3b0a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -277,13 +277,20 @@
kfree(f);
}
-static void route4_destroy(struct tcf_proto *tp)
+static bool route4_destroy(struct tcf_proto *tp, bool force)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h1 = 0; h1 <= 256; h1++) {
+ if (rcu_access_pointer(head->table[h1]))
+ return false;
+ }
+ }
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
@@ -308,6 +315,7 @@
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index edd8ade..02fa827 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -291,13 +291,20 @@
kfree_rcu(f, rcu);
}
-static void rsvp_destroy(struct tcf_proto *tp)
+static bool rsvp_destroy(struct tcf_proto *tp, bool force)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h1 = 0; h1 < 256; h1++) {
+ if (rcu_access_pointer(data->ht[h1]))
+ return false;
+ }
+ }
RCU_INIT_POINTER(tp->root, NULL);
@@ -319,6 +326,7 @@
}
}
kfree_rcu(data, rcu);
+ return true;
}
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index bd49bf5..a557dba 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -468,11 +468,14 @@
}
}
-static void tcindex_destroy(struct tcf_proto *tp)
+static bool tcindex_destroy(struct tcf_proto *tp, bool force)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
+ if (!force)
+ return false;
+
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
walker.count = 0;
walker.skip = 0;
@@ -481,6 +484,7 @@
RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&p->rcu, __tcindex_destroy);
+ return true;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 09487af..375e51b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -460,13 +460,35 @@
return -ENOENT;
}
-static void u32_destroy(struct tcf_proto *tp)
+static bool ht_empty(struct tc_u_hnode *ht)
+{
+ unsigned int h;
+
+ for (h = 0; h <= ht->divisor; h++)
+ if (rcu_access_pointer(ht->ht[h]))
+ return false;
+
+ return true;
+}
+
+static bool u32_destroy(struct tcf_proto *tp, bool force)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
+ if (!force) {
+ if (root_ht) {
+ if (root_ht->refcnt > 1)
+ return false;
+ if (root_ht->refcnt == 1) {
+ if (!ht_empty(root_ht))
+ return false;
+ }
+ }
+ }
+
if (root_ht && --root_ht->refcnt == 0)
u32_destroy_hnode(tp, root_ht);
@@ -491,6 +513,7 @@
}
tp->data = NULL;
+ return true;
}
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 243b7d1..ad9eed7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1858,11 +1858,15 @@
}
EXPORT_SYMBOL(tc_classify);
-void tcf_destroy(struct tcf_proto *tp)
+bool tcf_destroy(struct tcf_proto *tp, bool force)
{
- tp->ops->destroy(tp);
- module_put(tp->ops->owner);
- kfree_rcu(tp, rcu);
+ if (tp->ops->destroy(tp, force)) {
+ module_put(tp->ops->owner);
+ kfree_rcu(tp, rcu);
+ return true;
+ }
+
+ return false;
}
void tcf_destroy_chain(struct tcf_proto __rcu **fl)
@@ -1871,7 +1875,7 @@
while ((tp = rtnl_dereference(*fl)) != NULL) {
RCU_INIT_POINTER(*fl, tp->next);
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
}
}
EXPORT_SYMBOL(tcf_destroy_chain);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 19e4e72..aba6aa2 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -33,7 +33,7 @@
return -EOPNOTSUPP;
return ops->ndo_switch_parent_id_get(dev, psid);
}
-EXPORT_SYMBOL(netdev_switch_parent_id_get);
+EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get);
/**
* netdev_switch_port_stp_update - Notify switch device port of STP
@@ -52,7 +52,7 @@
WARN_ON(!ops->ndo_switch_parent_id_get);
return ops->ndo_switch_port_stp_update(dev, state);
}
-EXPORT_SYMBOL(netdev_switch_port_stp_update);
+EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update);
static DEFINE_MUTEX(netdev_switch_mutex);
static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain);
@@ -74,7 +74,7 @@
mutex_unlock(&netdev_switch_mutex);
return err;
}
-EXPORT_SYMBOL(register_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(register_netdev_switch_notifier);
/**
* unregister_netdev_switch_notifier - Unregister nofifier
@@ -92,7 +92,7 @@
mutex_unlock(&netdev_switch_mutex);
return err;
}
-EXPORT_SYMBOL(unregister_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
/**
* call_netdev_switch_notifiers - Call nofifiers
@@ -115,7 +115,7 @@
mutex_unlock(&netdev_switch_mutex);
return err;
}
-EXPORT_SYMBOL(call_netdev_switch_notifiers);
+EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers);
/**
* netdev_switch_port_bridge_setlink - Notify switch device port of bridge
@@ -140,7 +140,7 @@
return ops->ndo_bridge_setlink(dev, nlh, flags);
}
-EXPORT_SYMBOL(netdev_switch_port_bridge_setlink);
+EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink);
/**
* netdev_switch_port_bridge_dellink - Notify switch device port of bridge
@@ -165,7 +165,7 @@
return ops->ndo_bridge_dellink(dev, nlh, flags);
}
-EXPORT_SYMBOL(netdev_switch_port_bridge_dellink);
+EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink);
/**
* ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink
@@ -195,7 +195,7 @@
return ret;
}
-EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink);
+EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink);
/**
* ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink
@@ -225,7 +225,7 @@
return ret;
}
-EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
+EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink);
static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
{
@@ -331,7 +331,7 @@
return err;
}
-EXPORT_SYMBOL(netdev_switch_fib_ipv4_add);
+EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add);
/**
* netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch
@@ -369,7 +369,7 @@
return err;
}
-EXPORT_SYMBOL(netdev_switch_fib_ipv4_del);
+EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del);
/**
* netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation
@@ -389,4 +389,4 @@
fib_flush_external(fi->fib_net);
fi->fib_net->ipv4.fib_offload_disabled = true;
}
-EXPORT_SYMBOL(netdev_switch_fib_ipv4_abort);
+EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 95c514a..934947f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1318,12 +1318,12 @@
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
break;
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- break;
err = -EAGAIN;
if (!timeo)
break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
}
finish_wait(sk_sleep(sk), &wait);
*timeop = timeo;
@@ -2026,12 +2026,12 @@
err = -EINVAL;
if (sock->state != SS_LISTENING)
break;
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- break;
err = -EAGAIN;
if (!timeo)
break;
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ break;
}
finish_wait(sk_sleep(sk), &wait);
return err;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 0d10001..fc2fb11 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -162,7 +162,7 @@
err = -ENODEV;
goto tx_error;
}
- if (htons(dst->proto) == ETH_P_IP) {
+ if (dst->proto == htons(ETH_P_IP)) {
struct flowi4 fl = {
.daddr = dst->ipv4.s_addr,
.saddr = src->ipv4.s_addr,
@@ -334,7 +334,7 @@
struct udp_media_addr *remote;
struct udp_media_addr local = {0};
struct udp_port_cfg udp_conf = {0};
- struct udp_tunnel_sock_cfg tuncfg = {0};
+ struct udp_tunnel_sock_cfg tuncfg = {NULL};
ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
if (!ub)
@@ -351,7 +351,7 @@
rcu_assign_pointer(b->media_ptr, ub);
rcu_assign_pointer(ub->bearer, b);
tipc_udp_media_addr_set(&b->addr, &local);
- if (htons(local.proto) == ETH_P_IP) {
+ if (local.proto == htons(ETH_P_IP)) {
struct net_device *dev;
dev = __ip_dev_find(net, local.ipv4.s_addr, false);
@@ -366,7 +366,7 @@
b->mtu = dev->mtu - sizeof(struct iphdr)
- sizeof(struct udphdr);
#if IS_ENABLED(CONFIG_IPV6)
- } else if (htons(local.proto) == ETH_P_IPV6) {
+ } else if (local.proto == htons(ETH_P_IPV6)) {
udp_conf.family = AF_INET6;
udp_conf.use_udp6_tx_checksums = true;
udp_conf.use_udp6_rx_checksums = true;