Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next
Jeff Kirsher says:
====================
Intel Wired LAN Driver Updates 2014-11-11
This series contains updates to i40e, i40evf and ixgbe.
Kamil updated the i40e and i40evf driver to poll the firmware slower
since we were polling faster than the firmware could respond.
Shannon updates i40e to add a check to keep the service_task from
running the periodic tasks more than once per second, while still
allowing quick action to service the events.
Jesse cleans up the throttle rate code by fixing the minimum interrupt
throttle rate and removing some unused defines.
Mitch makes the early init admin queue message receive code more robust
by handling messages in a loop and ignoring those that we are not
interested in. This also gets rid of some scary log messages that
really do not indicate a problem.
Don provides several ixgbe patches, first fixes an issue with x540
completion timeout where on topologies including few levels of PCIe
switching for x540 can run into an unexpected completion error. Cleans
up the functionality in ixgbe_ndo_set_vf_vlan() in preparation for
future work. Adds support for x550 MAC's to the driver.
v2:
- Remove code comment in patch 01 of the series, based on feedback from
David Liaght
- Updated the "goto" to "break" statements in patch 06 of the series,
based on feedback from Sergei Shtylyov
- Initialized the variable err due to the possibility of use before
being assigned a value in patch 07 of the series
- Added patch "ixgbe: add helper function for setting RSS key in
preparation of X550" since it is needed for the addition of X550 MAC
support
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 04892b8..e26c607 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -120,10 +120,14 @@
warnings
--------
-This controls console messages from the networking stack that can occur because
-of problems on the network like duplicate address or bad checksums. Normally,
-this should be enabled, but if the problem persists the messages can be
-disabled.
+This sysctl is now unused.
+
+This was used to control console messages from the networking stack that
+occur because of problems on the network like duplicate address or bad
+checksums.
+
+These messages are now emitted at KERN_DEBUG and can generally be enabled
+and controlled by the dynamic_debug facility.
netdev_budget
-------------
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 3de1394..e2fe070 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -87,4 +87,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 6e6cd15..92121b0 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index ed94e5e..60f60f5 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -82,6 +82,8 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index ca2c6e6..2c68902 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -80,5 +80,7 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index a1b49ba..09a93fb 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -89,4 +89,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 6c9a24b..e858981 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index a14baa2..2e9ee8c 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -98,4 +98,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 6aa3ce1..f3492e8c9 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index fe35cea..7984a1c 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -79,4 +79,6 @@
#define SO_BPF_EXTENSIONS 0x4029
+#define SO_INCOMING_CPU 0x402A
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a9c3e2e..3474e4e 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -87,4 +87,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d110e28..d3fa80d 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -361,6 +361,11 @@
protocol));
break;
case BPF_ANC | SKF_AD_IFINDEX:
+ case BPF_ANC | SKF_AD_HATYPE:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+ ifindex) != 4);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+ type) != 2);
PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
dev));
PPC_CMPDI(r_scratch1, 0);
@@ -368,14 +373,18 @@
PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
} else {
/* Exit, returning 0; first pass hits here. */
- PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
+ PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12);
PPC_LI(r_ret, 0);
PPC_JMP(exit_addr);
}
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
- ifindex) != 4);
- PPC_LWZ_OFFS(r_A, r_scratch1,
+ if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
+ PPC_LWZ_OFFS(r_A, r_scratch1,
offsetof(struct net_device, ifindex));
+ } else {
+ PPC_LHZ_OFFS(r_A, r_scratch1,
+ offsetof(struct net_device, type));
+ }
+
break;
case BPF_ANC | SKF_AD_MARK:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index e031332..8457636 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -86,4 +86,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 54d9608..4a8003a 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -76,6 +76,8 @@
#define SO_BPF_EXTENSIONS 0x0032
+#define SO_INCOMING_CPU 0x0033
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 39acec0c..c46f6a6 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -91,4 +91,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* _XTENSA_SOCKET_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 8ea4d5b..6c64323 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -115,7 +115,7 @@
"tso_packets",
"xmit_more",
"queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed",
- "rx_csum_good", "rx_csum_none", "tx_chksum_offload",
+ "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload",
/* packet statistics */
"broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0efbae9..d1eb25d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1893,6 +1893,7 @@
priv->rx_ring[i]->packets = 0;
priv->rx_ring[i]->csum_ok = 0;
priv->rx_ring[i]->csum_none = 0;
+ priv->rx_ring[i]->csum_complete = 0;
}
}
@@ -2503,6 +2504,10 @@
/* Query for default mac and max mtu */
priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
+ if (mdev->dev->caps.rx_checksum_flags_port[priv->port] &
+ MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP)
+ priv->flags |= MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP;
+
/* Set default MAC */
dev->addr_len = ETH_ALEN;
mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 134b12e..6cb8007 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -155,11 +155,13 @@
stats->rx_bytes = 0;
priv->port_stats.rx_chksum_good = 0;
priv->port_stats.rx_chksum_none = 0;
+ priv->port_stats.rx_chksum_complete = 0;
for (i = 0; i < priv->rx_ring_num; i++) {
stats->rx_packets += priv->rx_ring[i]->packets;
stats->rx_bytes += priv->rx_ring[i]->bytes;
priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok;
priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none;
+ priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete;
}
stats->tx_packets = 0;
stats->tx_bytes = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5a193f4..ccd9517 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -42,6 +42,10 @@
#include <linux/vmalloc.h>
#include <linux/irq.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_checksum.h>
+#endif
+
#include "mlx4_en.h"
static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
@@ -643,6 +647,86 @@
}
}
+/* When hardware doesn't strip the vlan, we need to calculate the checksum
+ * over it and add it to the hardware's checksum calculation
+ */
+static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum,
+ struct vlan_hdr *vlanh)
+{
+ return csum_add(hw_checksum, *(__wsum *)vlanh);
+}
+
+/* Although the stack expects checksum which doesn't include the pseudo
+ * header, the HW adds it. To address that, we are subtracting the pseudo
+ * header checksum from the checksum value provided by the HW.
+ */
+static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
+ struct iphdr *iph)
+{
+ __u16 length_for_csum = 0;
+ __wsum csum_pseudo_header = 0;
+
+ length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2));
+ csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ length_for_csum, iph->protocol, 0);
+ skb->csum = csum_sub(hw_checksum, csum_pseudo_header);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/* In IPv6 packets, besides subtracting the pseudo header checksum,
+ * we also compute/add the IP header checksum which
+ * is not added by the HW.
+ */
+static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
+ struct ipv6hdr *ipv6h)
+{
+ __wsum csum_pseudo_hdr = 0;
+
+ if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
+ return -1;
+ hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8));
+
+ csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
+ sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
+ csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len);
+ csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr));
+
+ skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
+ skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0));
+ return 0;
+}
+#endif
+static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
+ int hwtstamp_rx_filter)
+{
+ __wsum hw_checksum = 0;
+
+ void *hdr = (u8 *)va + sizeof(struct ethhdr);
+
+ hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
+
+ if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) &&
+ hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) {
+ /* next protocol non IPv4 or IPv6 */
+ if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
+ != htons(ETH_P_IP) &&
+ ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
+ != htons(ETH_P_IPV6))
+ return -1;
+ hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
+ hdr += sizeof(struct vlan_hdr);
+ }
+
+ if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4))
+ get_fixed_ipv4_csum(hw_checksum, skb, hdr);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
+ if (get_fixed_ipv6_csum(hw_checksum, skb, hdr))
+ return -1;
+#endif
+ return 0;
+}
+
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -744,73 +828,95 @@
(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
if (likely(dev->features & NETIF_F_RXCSUM)) {
- if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
- (cqe->checksum == cpu_to_be16(0xffff))) {
- ring->csum_ok++;
- /* This packet is eligible for GRO if it is:
- * - DIX Ethernet (type interpretation)
- * - TCP/IP (v4)
- * - without IP options
- * - not an IP fragment
- * - no LLS polling in progress
- */
- if (!mlx4_en_cq_busy_polling(cq) &&
- (dev->features & NETIF_F_GRO)) {
- struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
- if (!gro_skb)
- goto next;
-
- nr = mlx4_en_complete_rx_desc(priv,
- rx_desc, frags, gro_skb,
- length);
- if (!nr)
- goto next;
-
- skb_shinfo(gro_skb)->nr_frags = nr;
- gro_skb->len = length;
- gro_skb->data_len = length;
- gro_skb->ip_summed = CHECKSUM_UNNECESSARY;
-
- if (l2_tunnel)
- gro_skb->csum_level = 1;
- if ((cqe->vlan_my_qpn &
- cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
- u16 vid = be16_to_cpu(cqe->sl_vid);
-
- __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid);
- }
-
- if (dev->features & NETIF_F_RXHASH)
- skb_set_hash(gro_skb,
- be32_to_cpu(cqe->immed_rss_invalid),
- PKT_HASH_TYPE_L3);
-
- skb_record_rx_queue(gro_skb, cq->ring);
- skb_mark_napi_id(gro_skb, &cq->napi);
-
- if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
- timestamp = mlx4_en_get_cqe_ts(cqe);
- mlx4_en_fill_hwtstamps(mdev,
- skb_hwtstamps(gro_skb),
- timestamp);
- }
-
- napi_gro_frags(&cq->napi);
- goto next;
+ if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+ MLX4_CQE_STATUS_UDP)) {
+ if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+ cqe->checksum == cpu_to_be16(0xffff)) {
+ ip_summed = CHECKSUM_UNNECESSARY;
+ ring->csum_ok++;
+ } else {
+ ip_summed = CHECKSUM_NONE;
+ ring->csum_none++;
}
-
- /* GRO not possible, complete processing here */
- ip_summed = CHECKSUM_UNNECESSARY;
} else {
- ip_summed = CHECKSUM_NONE;
- ring->csum_none++;
+ if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
+ (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
+ MLX4_CQE_STATUS_IPV6))) {
+ ip_summed = CHECKSUM_COMPLETE;
+ ring->csum_complete++;
+ } else {
+ ip_summed = CHECKSUM_NONE;
+ ring->csum_none++;
+ }
}
} else {
ip_summed = CHECKSUM_NONE;
ring->csum_none++;
}
+ /* This packet is eligible for GRO if it is:
+ * - DIX Ethernet (type interpretation)
+ * - TCP/IP (v4)
+ * - without IP options
+ * - not an IP fragment
+ * - no LLS polling in progress
+ */
+ if (!mlx4_en_cq_busy_polling(cq) &&
+ (dev->features & NETIF_F_GRO)) {
+ struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
+ if (!gro_skb)
+ goto next;
+
+ nr = mlx4_en_complete_rx_desc(priv,
+ rx_desc, frags, gro_skb,
+ length);
+ if (!nr)
+ goto next;
+
+ if (ip_summed == CHECKSUM_COMPLETE) {
+ void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
+ if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) {
+ ip_summed = CHECKSUM_NONE;
+ ring->csum_none++;
+ ring->csum_complete--;
+ }
+ }
+
+ skb_shinfo(gro_skb)->nr_frags = nr;
+ gro_skb->len = length;
+ gro_skb->data_len = length;
+ gro_skb->ip_summed = ip_summed;
+
+ if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
+ gro_skb->encapsulation = 1;
+ if ((cqe->vlan_my_qpn &
+ cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
+ u16 vid = be16_to_cpu(cqe->sl_vid);
+
+ __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid);
+ }
+
+ if (dev->features & NETIF_F_RXHASH)
+ skb_set_hash(gro_skb,
+ be32_to_cpu(cqe->immed_rss_invalid),
+ PKT_HASH_TYPE_L3);
+
+ skb_record_rx_queue(gro_skb, cq->ring);
+ skb_mark_napi_id(gro_skb, &cq->napi);
+
+ if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
+ timestamp = mlx4_en_get_cqe_ts(cqe);
+ mlx4_en_fill_hwtstamps(mdev,
+ skb_hwtstamps(gro_skb),
+ timestamp);
+ }
+
+ napi_gro_frags(&cq->napi);
+ goto next;
+ }
+
+ /* GRO not possible, complete processing here */
skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
if (!skb) {
priv->stats.rx_dropped++;
@@ -822,6 +928,14 @@
goto next;
}
+ if (ip_summed == CHECKSUM_COMPLETE) {
+ if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) {
+ ip_summed = CHECKSUM_NONE;
+ ring->csum_complete--;
+ ring->csum_none++;
+ }
+ }
+
skb->ip_summed = ip_summed;
skb->protocol = eth_type_trans(skb, dev);
skb_record_rx_queue(skb, cq->ring);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 9f82196..2f6ba42 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1629,6 +1629,7 @@
struct mlx4_init_hca_param init_hca;
u64 icm_size;
int err;
+ struct mlx4_config_dev_params params;
if (!mlx4_is_slave(dev)) {
err = mlx4_QUERY_FW(dev);
@@ -1762,6 +1763,14 @@
goto unmap_bf;
}
+ /* Query CONFIG_DEV parameters */
+ err = mlx4_config_dev_retrieval(dev, ¶ms);
+ if (err && err != -ENOTSUPP) {
+ mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
+ } else if (!err) {
+ dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
+ dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
+ }
priv->eq_table.inta_pin = adapter.inta_pin;
memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ef83d12..de45674 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -326,6 +326,7 @@
#endif
unsigned long csum_ok;
unsigned long csum_none;
+ unsigned long csum_complete;
int hwtstamp_rx_filter;
cpumask_var_t affinity_mask;
};
@@ -449,6 +450,7 @@
unsigned long rx_alloc_failed;
unsigned long rx_chksum_good;
unsigned long rx_chksum_none;
+ unsigned long rx_chksum_complete;
unsigned long tx_chksum_offload;
#define NUM_PORT_STATS 9
};
@@ -507,7 +509,8 @@
MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2),
/* whether we need to drop packets that hardware loopback-ed */
MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3),
- MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4)
+ MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4),
+ MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5),
};
#define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE)
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 33b85ba..7d3af19 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -14,7 +14,7 @@
if STMMAC_ETH
config STMMAC_PLATFORM
- bool "STMMAC Platform bus support"
+ tristate "STMMAC Platform bus support"
depends on STMMAC_ETH
default y
---help---
@@ -27,7 +27,7 @@
If unsure, say N.
config STMMAC_PCI
- bool "STMMAC PCI bus support"
+ tristate "STMMAC PCI bus support"
depends on STMMAC_ETH && PCI
---help---
This is to select the Synopsys DWMAC available on PCI devices,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 034da70..ac4d562 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -1,9 +1,12 @@
obj-$(CONFIG_STMMAC_ETH) += stmmac.o
-stmmac-$(CONFIG_STMMAC_PCI) += stmmac_pci.o
-stmmac-$(CONFIG_STMMAC_PLATFORM) += stmmac_platform.o dwmac-meson.o \
- dwmac-sunxi.o dwmac-sti.o \
- dwmac-socfpga.o
stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \
- chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \
- dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \
+ chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \
+ dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \
mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o $(stmmac-y)
+
+obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o
+stmmac-platform-objs:= stmmac_platform.o dwmac-meson.o dwmac-sunxi.o \
+ dwmac-sti.o dwmac-socfpga.o
+
+obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
+stmmac-pci-objs:= stmmac_pci.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index bd75ee8..c0a3919 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -134,65 +134,4 @@
void stmmac_disable_eee_mode(struct stmmac_priv *priv);
bool stmmac_eee_init(struct stmmac_priv *priv);
-#ifdef CONFIG_STMMAC_PLATFORM
-extern struct platform_driver stmmac_pltfr_driver;
-
-static inline int stmmac_register_platform(void)
-{
- int err;
-
- err = platform_driver_register(&stmmac_pltfr_driver);
- if (err)
- pr_err("stmmac: failed to register the platform driver\n");
-
- return err;
-}
-
-static inline void stmmac_unregister_platform(void)
-{
- platform_driver_unregister(&stmmac_pltfr_driver);
-}
-#else
-static inline int stmmac_register_platform(void)
-{
- pr_debug("stmmac: do not register the platf driver\n");
-
- return 0;
-}
-
-static inline void stmmac_unregister_platform(void)
-{
-}
-#endif /* CONFIG_STMMAC_PLATFORM */
-
-#ifdef CONFIG_STMMAC_PCI
-extern struct pci_driver stmmac_pci_driver;
-static inline int stmmac_register_pci(void)
-{
- int err;
-
- err = pci_register_driver(&stmmac_pci_driver);
- if (err)
- pr_err("stmmac: failed to register the PCI driver\n");
-
- return err;
-}
-
-static inline void stmmac_unregister_pci(void)
-{
- pci_unregister_driver(&stmmac_pci_driver);
-}
-#else
-static inline int stmmac_register_pci(void)
-{
- pr_debug("stmmac: do not register the PCI driver\n");
-
- return 0;
-}
-
-static inline void stmmac_unregister_pci(void)
-{
-}
-#endif /* CONFIG_STMMAC_PCI */
-
#endif /* __STMMAC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 53db11b..0f1c146 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2885,6 +2885,7 @@
return ERR_PTR(ret);
}
+EXPORT_SYMBOL_GPL(stmmac_dvr_probe);
/**
* stmmac_dvr_remove
@@ -2914,8 +2915,8 @@
return 0;
}
+EXPORT_SYMBOL_GPL(stmmac_dvr_remove);
-#ifdef CONFIG_PM
int stmmac_suspend(struct net_device *ndev)
{
struct stmmac_priv *priv = netdev_priv(ndev);
@@ -2957,6 +2958,7 @@
priv->oldduplex = -1;
return 0;
}
+EXPORT_SYMBOL_GPL(stmmac_suspend);
int stmmac_resume(struct net_device *ndev)
{
@@ -3003,37 +3005,7 @@
return 0;
}
-#endif /* CONFIG_PM */
-
-/* Driver can be configured w/ and w/ both PCI and Platf drivers
- * depending on the configuration selected.
- */
-static int __init stmmac_init(void)
-{
- int ret;
-
- ret = stmmac_register_platform();
- if (ret)
- goto err;
- ret = stmmac_register_pci();
- if (ret)
- goto err_pci;
- return 0;
-err_pci:
- stmmac_unregister_platform();
-err:
- pr_err("stmmac: driver registration failed\n");
- return ret;
-}
-
-static void __exit stmmac_exit(void)
-{
- stmmac_unregister_platform();
- stmmac_unregister_pci();
-}
-
-module_init(stmmac_init);
-module_exit(stmmac_exit);
+EXPORT_SYMBOL_GPL(stmmac_resume);
#ifndef MODULE
static int __init stmmac_cmdline_opt(char *str)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 5084699..77a6d68 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -158,7 +158,7 @@
MODULE_DEVICE_TABLE(pci, stmmac_id_table);
-struct pci_driver stmmac_pci_driver = {
+static struct pci_driver stmmac_pci_driver = {
.name = STMMAC_RESOURCE_NAME,
.id_table = stmmac_id_table,
.probe = stmmac_pci_probe,
@@ -168,6 +168,8 @@
},
};
+module_pci_driver(stmmac_pci_driver);
+
MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PCI driver");
MODULE_AUTHOR("Rayagond Kokatanur <rayagond.kokatanur@vayavyalabs.com>");
MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 9f18401..e22a960 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -362,7 +362,7 @@
return ret;
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int stmmac_pltfr_suspend(struct device *dev)
{
int ret;
@@ -388,13 +388,12 @@
return stmmac_resume(ndev);
}
-
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
static SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops,
- stmmac_pltfr_suspend, stmmac_pltfr_resume);
+ stmmac_pltfr_suspend, stmmac_pltfr_resume);
-struct platform_driver stmmac_pltfr_driver = {
+static struct platform_driver stmmac_pltfr_driver = {
.probe = stmmac_pltfr_probe,
.remove = stmmac_pltfr_remove,
.driver = {
@@ -402,9 +401,11 @@
.owner = THIS_MODULE,
.pm = &stmmac_pltfr_pm_ops,
.of_match_table = of_match_ptr(stmmac_dt_ids),
- },
+ },
};
+module_platform_driver(stmmac_pltfr_driver);
+
MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PLATFORM driver");
MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index d07bf4c..26423ad 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1670,12 +1670,14 @@
static int smsc95xx_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
- struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
- u8 suspend_flags = pdata->suspend_flags;
+ struct smsc95xx_priv *pdata;
+ u8 suspend_flags;
int ret;
u32 val;
BUG_ON(!dev);
+ pdata = (struct smsc95xx_priv *)(dev->data[0]);
+ suspend_flags = pdata->suspend_flags;
netdev_dbg(dev->net, "resume suspend_flags=0x%02x\n", suspend_flags);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5cc5eac..3d9bff0 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -497,6 +497,7 @@
u16 hca_core_clock;
u64 phys_port_id[MLX4_MAX_PORTS + 1];
int tunnel_offload_mode;
+ u8 rx_checksum_flags_port[MLX4_MAX_PORTS + 1];
};
struct mlx4_buf_list {
diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h
index a059465..42aa054 100644
--- a/include/net/irda/irda.h
+++ b/include/net/irda/irda.h
@@ -77,9 +77,9 @@
#define IRDA_ASSERT_LABEL(label)
#endif /* CONFIG_IRDA_DEBUG */
-#define IRDA_WARNING(args...) do { if (net_ratelimit()) printk(KERN_WARNING args); } while (0)
-#define IRDA_MESSAGE(args...) do { if (net_ratelimit()) printk(KERN_INFO args); } while (0)
-#define IRDA_ERROR(args...) do { if (net_ratelimit()) printk(KERN_ERR args); } while (0)
+#define IRDA_ERROR net_err_ratelimited
+#define IRDA_WARNING net_warn_ratelimited
+#define IRDA_MESSAGE net_info_ratelimited
/*
* Magic numbers used by Linux-IrDA. Random numbers which must be unique to
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index dedfb18..eb070b3 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -220,6 +220,13 @@
struct pneigh_entry **phash_buckets;
};
+enum {
+ NEIGH_ARP_TABLE = 0,
+ NEIGH_ND_TABLE = 1,
+ NEIGH_DN_TABLE = 2,
+ NEIGH_NR_TABLES,
+};
+
static inline int neigh_parms_family(struct neigh_parms *p)
{
return p->tbl->family;
@@ -240,8 +247,8 @@
#define NEIGH_UPDATE_F_ISROUTER 0x40000000
#define NEIGH_UPDATE_F_ADMIN 0x80000000
-void neigh_table_init(struct neigh_table *tbl);
-int neigh_table_clear(struct neigh_table *tbl);
+void neigh_table_init(int index, struct neigh_table *tbl);
+int neigh_table_clear(int index, struct neigh_table *tbl);
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev);
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
diff --git a/include/net/sock.h b/include/net/sock.h
index 6767d75..83a669f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -273,6 +273,7 @@
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_rxhash: flow hash received from netif layer
+ * @sk_incoming_cpu: record cpu processing incoming packets
* @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions
* @sk_protinfo: private area, net family specific, when not using slab
@@ -350,6 +351,12 @@
#ifdef CONFIG_RPS
__u32 sk_rxhash;
#endif
+ u16 sk_incoming_cpu;
+ /* 16bit hole
+ * Warned : sk_incoming_cpu can be set from softirq,
+ * Do not use this hole without fully understanding possible issues.
+ */
+
__u32 sk_txhash;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_napi_id;
@@ -833,6 +840,11 @@
return sk->sk_backlog_rcv(sk, skb);
}
+static inline void sk_incoming_cpu_update(struct sock *sk)
+{
+ sk->sk_incoming_cpu = raw_smp_processor_id();
+}
+
static inline void sock_rps_record_flow_hash(__u32 hash)
{
#ifdef CONFIG_RPS
@@ -2276,13 +2288,6 @@
bool sk_capable(const struct sock *sk, int cap);
bool sk_net_capable(const struct sock *sk, int cap);
-/*
- * Enable debug/info messages
- */
-extern int net_msg_warn;
-#define LIMIT_NETDEBUG(fmt, args...) \
- do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)
-
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 2caadab..9a28a51 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -40,7 +40,7 @@
* checksum. UDP-Lite (like IPv6) mandates checksums, hence packets
* with a zero checksum field are illegal. */
if (uh->check == 0) {
- LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: zeroed checksum field\n");
+ net_dbg_ratelimited("UDPLite: zeroed checksum field\n");
return 1;
}
@@ -52,8 +52,8 @@
/*
* Coverage length violates RFC 3828: log and discard silently.
*/
- LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: bad csum coverage %d/%d\n",
- cscov, skb->len);
+ net_dbg_ratelimited("UDPLite: bad csum coverage %d/%d\n",
+ cscov, skb->len);
return 1;
} else if (cscov < skb->len) {
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index ea0796b..f541cce 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -82,4 +82,6 @@
#define SO_BPF_EXTENSIONS 48
+#define SO_INCOMING_CPU 49
+
#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 26c36c4..3a6dcaa 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -157,6 +157,11 @@
* notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
* %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
* specified there.
+ * @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION
+ * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
+ * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the
+ * output port is actually a tunnel port. Contains the output tunnel key
+ * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_PACKET_* commands.
@@ -167,6 +172,8 @@
OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
+ OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_*
+ attributes. */
__OVS_PACKET_ATTR_MAX
};
@@ -315,6 +322,8 @@
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
+ OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
+ OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
__OVS_TUNNEL_KEY_ATTR_MAX
};
@@ -448,6 +457,8 @@
OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
+ OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error
+ * logging should be suppressed. */
__OVS_FLOW_ATTR_MAX
};
@@ -480,11 +491,15 @@
* message should be sent. Required.
* @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
* copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
+ * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
+ * tunnel info.
*/
enum ovs_userspace_attr {
OVS_USERSPACE_ATTR_UNSPEC,
OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
+ OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port
+ * to get tunnel info. */
__OVS_USERSPACE_ATTR_MAX
};
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index edd0411..8e38f17 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -56,7 +56,6 @@
static void neigh_update_notify(struct neighbour *neigh);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
-static struct neigh_table *neigh_tables;
#ifdef CONFIG_PROC_FS
static const struct file_operations neigh_stat_seq_fops;
#endif
@@ -87,13 +86,8 @@
the most complicated procedure, which we allow is dev->hard_header.
It is supposed, that dev->hard_header is simplistic and does
not make callbacks to neighbour tables.
-
- The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
- list of neighbour tables. This list is used only in process context,
*/
-static DEFINE_RWLOCK(neigh_tbl_lock);
-
static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
{
kfree_skb(skb);
@@ -1520,7 +1514,9 @@
static struct lock_class_key neigh_table_proxy_queue_class;
-static void neigh_table_init_no_netlink(struct neigh_table *tbl)
+static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
+
+void neigh_table_init(int index, struct neigh_table *tbl)
{
unsigned long now = jiffies;
unsigned long phsize;
@@ -1566,34 +1562,14 @@
tbl->last_flush = now;
tbl->last_rand = now + tbl->parms.reachable_time * 20;
-}
-void neigh_table_init(struct neigh_table *tbl)
-{
- struct neigh_table *tmp;
-
- neigh_table_init_no_netlink(tbl);
- write_lock(&neigh_tbl_lock);
- for (tmp = neigh_tables; tmp; tmp = tmp->next) {
- if (tmp->family == tbl->family)
- break;
- }
- tbl->next = neigh_tables;
- neigh_tables = tbl;
- write_unlock(&neigh_tbl_lock);
-
- if (unlikely(tmp)) {
- pr_err("Registering multiple tables for family %d\n",
- tbl->family);
- dump_stack();
- }
+ neigh_tables[index] = tbl;
}
EXPORT_SYMBOL(neigh_table_init);
-int neigh_table_clear(struct neigh_table *tbl)
+int neigh_table_clear(int index, struct neigh_table *tbl)
{
- struct neigh_table **tp;
-
+ neigh_tables[index] = NULL;
/* It is not clean... Fix it to unload IPv6 module safely */
cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
@@ -1601,14 +1577,6 @@
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
pr_crit("neighbour leakage\n");
- write_lock(&neigh_tbl_lock);
- for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
- if (*tp == tbl) {
- *tp = tbl->next;
- break;
- }
- }
- write_unlock(&neigh_tbl_lock);
call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
neigh_hash_free_rcu);
@@ -1626,12 +1594,32 @@
}
EXPORT_SYMBOL(neigh_table_clear);
+static struct neigh_table *neigh_find_table(int family)
+{
+ struct neigh_table *tbl = NULL;
+
+ switch (family) {
+ case AF_INET:
+ tbl = neigh_tables[NEIGH_ARP_TABLE];
+ break;
+ case AF_INET6:
+ tbl = neigh_tables[NEIGH_ND_TABLE];
+ break;
+ case AF_DECnet:
+ tbl = neigh_tables[NEIGH_DN_TABLE];
+ break;
+ }
+
+ return tbl;
+}
+
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *dst_attr;
struct neigh_table *tbl;
+ struct neighbour *neigh;
struct net_device *dev = NULL;
int err = -EINVAL;
@@ -1652,39 +1640,31 @@
}
}
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
- struct neighbour *neigh;
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (tbl == NULL)
+ return -EAFNOSUPPORT;
- if (tbl->family != ndm->ndm_family)
- continue;
- read_unlock(&neigh_tbl_lock);
+ if (nla_len(dst_attr) < tbl->key_len)
+ goto out;
- if (nla_len(dst_attr) < tbl->key_len)
- goto out;
-
- if (ndm->ndm_flags & NTF_PROXY) {
- err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
- goto out;
- }
-
- if (dev == NULL)
- goto out;
-
- neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
- if (neigh == NULL) {
- err = -ENOENT;
- goto out;
- }
-
- err = neigh_update(neigh, NULL, NUD_FAILED,
- NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
- neigh_release(neigh);
+ if (ndm->ndm_flags & NTF_PROXY) {
+ err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
goto out;
}
- read_unlock(&neigh_tbl_lock);
- err = -EAFNOSUPPORT;
+
+ if (dev == NULL)
+ goto out;
+
+ neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+ if (neigh == NULL) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = neigh_update(neigh, NULL, NUD_FAILED,
+ NEIGH_UPDATE_F_OVERRIDE |
+ NEIGH_UPDATE_F_ADMIN);
+ neigh_release(neigh);
out:
return err;
@@ -1692,11 +1672,14 @@
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
+ int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct neigh_table *tbl;
struct net_device *dev = NULL;
+ struct neighbour *neigh;
+ void *dst, *lladdr;
int err;
ASSERT_RTNL();
@@ -1720,70 +1703,60 @@
goto out;
}
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
- int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
- struct neighbour *neigh;
- void *dst, *lladdr;
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (tbl == NULL)
+ return -EAFNOSUPPORT;
- if (tbl->family != ndm->ndm_family)
- continue;
- read_unlock(&neigh_tbl_lock);
+ if (nla_len(tb[NDA_DST]) < tbl->key_len)
+ goto out;
+ dst = nla_data(tb[NDA_DST]);
+ lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
- if (nla_len(tb[NDA_DST]) < tbl->key_len)
- goto out;
- dst = nla_data(tb[NDA_DST]);
- lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+ if (ndm->ndm_flags & NTF_PROXY) {
+ struct pneigh_entry *pn;
- if (ndm->ndm_flags & NTF_PROXY) {
- struct pneigh_entry *pn;
-
- err = -ENOBUFS;
- pn = pneigh_lookup(tbl, net, dst, dev, 1);
- if (pn) {
- pn->flags = ndm->ndm_flags;
- err = 0;
- }
- goto out;
- }
-
- if (dev == NULL)
- goto out;
-
- neigh = neigh_lookup(tbl, dst, dev);
- if (neigh == NULL) {
- if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- err = -ENOENT;
- goto out;
- }
-
- neigh = __neigh_lookup_errno(tbl, dst, dev);
- if (IS_ERR(neigh)) {
- err = PTR_ERR(neigh);
- goto out;
- }
- } else {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
- err = -EEXIST;
- neigh_release(neigh);
- goto out;
- }
-
- if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
- flags &= ~NEIGH_UPDATE_F_OVERRIDE;
- }
-
- if (ndm->ndm_flags & NTF_USE) {
- neigh_event_send(neigh, NULL);
+ err = -ENOBUFS;
+ pn = pneigh_lookup(tbl, net, dst, dev, 1);
+ if (pn) {
+ pn->flags = ndm->ndm_flags;
err = 0;
- } else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
- neigh_release(neigh);
+ }
goto out;
}
- read_unlock(&neigh_tbl_lock);
- err = -EAFNOSUPPORT;
+ if (dev == NULL)
+ goto out;
+
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (neigh == NULL) {
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ neigh = __neigh_lookup_errno(tbl, dst, dev);
+ if (IS_ERR(neigh)) {
+ err = PTR_ERR(neigh);
+ goto out;
+ }
+ } else {
+ if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ err = -EEXIST;
+ neigh_release(neigh);
+ goto out;
+ }
+
+ if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+ flags &= ~NEIGH_UPDATE_F_OVERRIDE;
+ }
+
+ if (ndm->ndm_flags & NTF_USE) {
+ neigh_event_send(neigh, NULL);
+ err = 0;
+ } else
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ neigh_release(neigh);
+
out:
return err;
}
@@ -1982,7 +1955,8 @@
struct neigh_table *tbl;
struct ndtmsg *ndtmsg;
struct nlattr *tb[NDTA_MAX+1];
- int err;
+ bool found = false;
+ int err, tidx;
err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
nl_neightbl_policy);
@@ -1995,19 +1969,21 @@
}
ndtmsg = nlmsg_data(nlh);
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+
+ for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
+ tbl = neigh_tables[tidx];
+ if (!tbl)
+ continue;
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
continue;
-
- if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
+ if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
+ found = true;
break;
+ }
}
- if (tbl == NULL) {
- err = -ENOENT;
- goto errout_locked;
- }
+ if (!found)
+ return -ENOENT;
/*
* We acquire tbl->lock to be nice to the periodic timers and
@@ -2118,8 +2094,6 @@
errout_tbl_lock:
write_unlock_bh(&tbl->lock);
-errout_locked:
- read_unlock(&neigh_tbl_lock);
errout:
return err;
}
@@ -2134,10 +2108,13 @@
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
+ for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
struct neigh_parms *p;
+ tbl = neigh_tables[tidx];
+ if (!tbl)
+ continue;
+
if (tidx < tbl_skip || (family && tbl->family != family))
continue;
@@ -2168,7 +2145,6 @@
neigh_skip = 0;
}
out:
- read_unlock(&neigh_tbl_lock);
cb->args[0] = tidx;
cb->args[1] = nidx;
@@ -2351,7 +2327,6 @@
int proxy = 0;
int err;
- read_lock(&neigh_tbl_lock);
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
/* check for full ndmsg structure presence, family member is
@@ -2363,8 +2338,11 @@
s_t = cb->args[0];
- for (tbl = neigh_tables, t = 0; tbl;
- tbl = tbl->next, t++) {
+ for (t = 0; t < NEIGH_NR_TABLES; t++) {
+ tbl = neigh_tables[t];
+
+ if (!tbl)
+ continue;
if (t < s_t || (family && tbl->family != family))
continue;
if (t > s_t)
@@ -2377,7 +2355,6 @@
if (err < 0)
break;
}
- read_unlock(&neigh_tbl_lock);
cb->args[0] = t;
return skb->len;
diff --git a/net/core/sock.c b/net/core/sock.c
index ac56dd0..0725cf0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1213,6 +1213,10 @@
v.val = sk->sk_max_pacing_rate;
break;
+ case SO_INCOMING_CPU:
+ v.val = sk->sk_incoming_cpu;
+ break;
+
default:
return -ENOPROTOOPT;
}
@@ -1517,6 +1521,7 @@
newsk->sk_err = 0;
newsk->sk_priority = 0;
+ newsk->sk_incoming_cpu = raw_smp_processor_id();
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cf9cd13..f93f092 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -26,6 +26,8 @@
static int one = 1;
static int ushort_max = USHRT_MAX;
+static int net_msg_warn; /* Unused, but still a sysctl */
+
#ifdef CONFIG_RPS
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
diff --git a/net/core/utils.c b/net/core/utils.c
index efc76dd..7b80388 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -33,9 +33,6 @@
#include <asm/byteorder.h>
#include <asm/uaccess.h>
-int net_msg_warn __read_mostly = 1;
-EXPORT_SYMBOL(net_msg_warn);
-
DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
/*
* All net warning printk()s should be guarded by this function.
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index c8121ce..7ca7c31 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -591,7 +591,7 @@
void __init dn_neigh_init(void)
{
- neigh_table_init(&dn_neigh_table);
+ neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
proc_create("decnet_neigh", S_IRUGO, init_net.proc_net,
&dn_neigh_seq_fops);
}
@@ -599,5 +599,5 @@
void __exit dn_neigh_cleanup(void)
{
remove_proc_entry("decnet_neigh", init_net.proc_net);
- neigh_table_clear(&dn_neigh_table);
+ neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table);
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index dd646a8..4648f12 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -192,12 +192,12 @@
*/
drv = dsa_switch_probe(host_dev, pd->sw_addr, &name);
if (drv == NULL) {
- printk(KERN_ERR "%s[%d]: could not detect attached switch\n",
- dst->master_netdev->name, index);
+ netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
+ index);
return ERR_PTR(-EINVAL);
}
- printk(KERN_INFO "%s[%d]: detected a %s switch\n",
- dst->master_netdev->name, index, name);
+ netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n",
+ index, name);
/*
@@ -225,7 +225,8 @@
if (!strcmp(name, "cpu")) {
if (dst->cpu_switch != -1) {
- printk(KERN_ERR "multiple cpu ports?!\n");
+ netdev_err(dst->master_netdev,
+ "multiple cpu ports?!\n");
ret = -EINVAL;
goto out;
}
@@ -320,10 +321,8 @@
slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]);
if (slave_dev == NULL) {
- printk(KERN_ERR "%s[%d]: can't create dsa "
- "slave device for port %d(%s)\n",
- dst->master_netdev->name,
- index, i, pd->port_names[i]);
+ netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n",
+ index, i, pd->port_names[i]);
continue;
}
@@ -701,15 +700,13 @@
static int dsa_probe(struct platform_device *pdev)
{
- static int dsa_version_printed;
struct dsa_platform_data *pd = pdev->dev.platform_data;
struct net_device *dev;
struct dsa_switch_tree *dst;
int i, ret;
- if (!dsa_version_printed++)
- printk(KERN_NOTICE "Distributed Switch Architecture "
- "driver version %s\n", dsa_driver_version);
+ pr_notice_once("Distributed Switch Architecture driver version %s\n",
+ dsa_driver_version);
if (pdev->dev.of_node) {
ret = dsa_of_probe(pdev);
@@ -753,9 +750,8 @@
ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev);
if (IS_ERR(ds)) {
- printk(KERN_ERR "%s[%d]: couldn't create dsa switch "
- "instance (error %ld)\n", dev->name, i,
- PTR_ERR(ds));
+ netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
+ i, PTR_ERR(ds));
continue;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0ea466d..528380a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -532,7 +532,7 @@
*/
ret = of_phy_register_fixed_link(port_dn);
if (ret) {
- pr_err("failed to register fixed PHY\n");
+ netdev_err(slave_dev, "failed to register fixed PHY\n");
return;
}
phy_is_fixed = true;
@@ -558,8 +558,8 @@
phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
p->phy_interface);
} else {
- pr_info("attached PHY at address %d [%s]\n",
- p->phy->addr, p->phy->drv->name);
+ netdev_info(slave_dev, "attached PHY at address %d [%s]\n",
+ p->phy->addr, p->phy->drv->name);
}
}
@@ -657,8 +657,8 @@
ret = register_netdev(slave_dev);
if (ret) {
- printk(KERN_ERR "%s: error %d registering interface %s\n",
- master->name, ret, slave_dev->name);
+ netdev_err(master, "error %d registering interface %s\n",
+ ret, slave_dev->name);
free_netdev(slave_dev);
return NULL;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 16acb59..205e147 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1292,7 +1292,7 @@
void __init arp_init(void)
{
- neigh_table_init(&arp_tbl);
+ neigh_table_init(NEIGH_ARP_TABLE, &arp_tbl);
dev_add_pack(&arp_packet_type);
arp_proc_init();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5882f58..36b7bfa 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -784,8 +784,8 @@
*/
switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
default:
- LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
- &iph->daddr);
+ net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
+ &iph->daddr);
break;
case 2:
goto out;
@@ -798,8 +798,8 @@
}
break;
case ICMP_SR_FAILED:
- LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"),
- &iph->daddr);
+ net_dbg_ratelimited("%pI4: Source Route Failed\n",
+ &iph->daddr);
break;
default:
break;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 19419b6..e792035 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -458,6 +458,6 @@
". Dropping fragment.\n";
if (PTR_ERR(q) == -ENOBUFS)
- LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg);
+ net_dbg_ratelimited("%s%s", prefix, msg);
}
EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4d964da..e5b6d0d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -618,8 +618,7 @@
return 0;
out_nomem:
- LIMIT_NETDEBUG(KERN_ERR pr_fmt("queue_glue: no memory for gluing queue %p\n"),
- qp);
+ net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp);
err = -ENOMEM;
goto out_fail;
out_oversize:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5f979c7..d91436b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5854,12 +5854,12 @@
struct inet_request_sock *ireq = inet_rsk(req);
if (family == AF_INET)
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
- &ireq->ir_rmt_addr, port);
+ net_dbg_ratelimited("drop open request from %pI4/%u\n",
+ &ireq->ir_rmt_addr, port);
#if IS_ENABLED(CONFIG_IPV6)
else if (family == AF_INET6)
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
- &ireq->ir_v6_rmt_addr, port);
+ net_dbg_ratelimited("drop open request from %pI6/%u\n",
+ &ireq->ir_v6_rmt_addr, port);
#endif
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9c7d762..2c6a955 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1429,6 +1429,7 @@
struct dst_entry *dst = sk->sk_rx_dst;
sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, 0) == NULL) {
@@ -1450,6 +1451,7 @@
if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb);
+ sk_mark_napi_id(sk, skb);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
@@ -1661,7 +1663,7 @@
if (sk_filter(sk, skb))
goto discard_and_relse;
- sk_mark_napi_id(sk, skb);
+ sk_incoming_cpu_update(sk);
skb->dev = NULL;
bh_lock_sock_nested(sk);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 9b21ae8b..1829c7f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -374,17 +374,19 @@
*/
struct inet_sock *inet = inet_sk(sk);
if (sk->sk_family == AF_INET) {
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
- &inet->inet_daddr,
- ntohs(inet->inet_dport), inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
+ &inet->inet_daddr,
+ ntohs(inet->inet_dport),
+ inet->inet_num,
+ tp->snd_una, tp->snd_nxt);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
- &sk->sk_v6_daddr,
- ntohs(inet->inet_dport), inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
+ &sk->sk_v6_daddr,
+ ntohs(inet->inet_dport),
+ inet->inet_num,
+ tp->snd_una, tp->snd_nxt);
}
#endif
if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5d0fdca..1b6e9d5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1051,7 +1051,7 @@
/* ... which is an evident application bug. --ANK */
release_sock(sk);
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n"));
+ net_dbg_ratelimited("cork app bug 2\n");
err = -EINVAL;
goto out;
}
@@ -1133,7 +1133,7 @@
if (unlikely(!up->pending)) {
release_sock(sk);
- LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n"));
+ net_dbg_ratelimited("udp cork app bug 3\n");
return -EINVAL;
}
@@ -1445,6 +1445,7 @@
if (inet_sk(sk)->inet_daddr) {
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
+ sk_incoming_cpu_update(sk);
}
rc = sock_queue_rcv_skb(sk, skb);
@@ -1546,8 +1547,8 @@
* provided by the application."
*/
if (up->pcrlen == 0) { /* full coverage was set */
- LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n",
- UDP_SKB_CB(skb)->cscov, skb->len);
+ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
+ UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
/* The next case involves violating the min. coverage requested
@@ -1557,8 +1558,8 @@
* Therefore the above ...()->partial_cov statement is essential.
*/
if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
- LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
+ UDP_SKB_CB(skb)->cscov, up->pcrlen);
goto drop;
}
}
@@ -1827,11 +1828,11 @@
return 0;
short_packet:
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
- proto == IPPROTO_UDPLITE ? "Lite" : "",
- &saddr, ntohs(uh->source),
- ulen, skb->len,
- &daddr, ntohs(uh->dest));
+ net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
+ proto == IPPROTO_UDPLITE ? "Lite" : "",
+ &saddr, ntohs(uh->source),
+ ulen, skb->len,
+ &daddr, ntohs(uh->dest));
goto drop;
csum_error:
@@ -1839,10 +1840,10 @@
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
- proto == IPPROTO_UDPLITE ? "Lite" : "",
- &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
- ulen);
+ net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
+ proto == IPPROTO_UDPLITE ? "Lite" : "",
+ &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
+ ulen);
UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop:
UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 06e8978..251fcb4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1411,10 +1411,8 @@
if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
score->addr_type & IPV6_ADDR_MULTICAST)) {
- LIMIT_NETDEBUG(KERN_DEBUG
- "ADDRCONF: unspecified / multicast address "
- "assigned as unicast address on %s",
- dev->name);
+ net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
+ dev->name);
continue;
}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 6d16eb0..8ab1989 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -272,10 +272,9 @@
ipv6_rearrange_destopt(iph, exthdr.opth);
case NEXTHDR_HOP:
if (!zero_out_mutable_opts(exthdr.opth)) {
- LIMIT_NETDEBUG(
- KERN_WARNING "overrun %sopts\n",
- nexthdr == NEXTHDR_HOP ?
- "hop" : "dest");
+ net_dbg_ratelimited("overrun %sopts\n",
+ nexthdr == NEXTHDR_HOP ?
+ "hop" : "dest");
return -EINVAL;
}
break;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 5c6996e..cc11396 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -893,8 +893,8 @@
break;
}
default:
- LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
- cmsg->cmsg_type);
+ net_dbg_ratelimited("invalid cmsg type: %d\n",
+ cmsg->cmsg_type);
err = -EINVAL;
goto exit_f;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index d21d7b2..d2c2d74 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -286,8 +286,8 @@
err = -EINVAL;
padlen = nexthdr[0];
if (padlen + 2 + alen >= elen) {
- LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage "
- "padlen=%d, elen=%d\n", padlen + 2, elen - alen);
+ net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
+ padlen + 2, elen - alen);
goto out;
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 601d896..a7bbbe4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -184,7 +184,7 @@
int ret;
if (opt->dsthao) {
- LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
+ net_dbg_ratelimited("hao duplicated\n");
goto discard;
}
opt->dsthao = opt->dst1;
@@ -193,14 +193,14 @@
hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
if (hao->length != 16) {
- LIMIT_NETDEBUG(
- KERN_DEBUG "hao invalid option length = %d\n", hao->length);
+ net_dbg_ratelimited("hao invalid option length = %d\n",
+ hao->length);
goto discard;
}
if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
- LIMIT_NETDEBUG(
- KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr);
+ net_dbg_ratelimited("hao is not an unicast addr: %pI6\n",
+ &hao->addr);
goto discard;
}
@@ -551,8 +551,8 @@
memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
return true;
}
- LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
- nh[optoff + 1]);
+ net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n",
+ nh[optoff + 1]);
kfree_skb(skb);
return false;
}
@@ -566,8 +566,8 @@
u32 pkt_len;
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
- LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
- nh[optoff+1]);
+ net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
+ nh[optoff+1]);
IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
IPSTATS_MIB_INHDRERRORS);
goto drop;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 62c1037..0929340 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -338,7 +338,7 @@
* anycast.
*/
if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
+ net_dbg_ratelimited("icmp6_send: acast source\n");
dst_release(dst);
return ERR_PTR(-EINVAL);
}
@@ -452,7 +452,7 @@
* and anycast addresses will be checked later.
*/
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
+ net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n");
return;
}
@@ -460,7 +460,7 @@
* Never answer to a ICMP packet.
*/
if (is_ineligible(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
+ net_dbg_ratelimited("icmp6_send: no reply to icmp error\n");
return;
}
@@ -509,7 +509,7 @@
len = skb->len - msg.offset;
len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
if (len < 0) {
- LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
+ net_dbg_ratelimited("icmp: len problem\n");
goto out_dst_release;
}
@@ -706,9 +706,8 @@
daddr = &ipv6_hdr(skb)->daddr;
if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
- LIMIT_NETDEBUG(KERN_DEBUG
- "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
- saddr, daddr);
+ net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
+ saddr, daddr);
goto csum_error;
}
@@ -781,7 +780,7 @@
if (type & ICMPV6_INFOMSG_MASK)
break;
- LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
+ net_dbg_ratelimited("icmpv6: msg of unknown type\n");
/*
* error of unknown type.
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index f61429d..b9779d4 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -97,16 +97,17 @@
return -1;
if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
- LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
- mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+ net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n",
+ mh->ip6mh_hdrlen,
+ mip6_mh_len(mh->ip6mh_type));
mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
skb_network_header_len(skb));
return -1;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
- LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
- mh->ip6mh_proto);
+ net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n",
+ mh->ip6mh_proto);
mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
skb_network_header_len(skb));
return -1;
@@ -288,7 +289,7 @@
* XXX: packet if HAO exists.
*/
if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
- LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
+ net_dbg_ratelimited("mip6: hao exists already, override\n");
return offset;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 4cb45c1..2c9f6bf 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1763,7 +1763,7 @@
/*
* Initialize the neighbour table
*/
- neigh_table_init(&nd_tbl);
+ neigh_table_init(NEIGH_ND_TABLE, &nd_tbl);
#ifdef CONFIG_SYSCTL
err = neigh_sysctl_register(NULL, &nd_tbl.parms,
@@ -1796,6 +1796,6 @@
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
- neigh_table_clear(&nd_tbl);
+ neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl);
unregister_pernet_subsys(&ndisc_net_ops);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d38e6a8..398377a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -36,7 +36,7 @@
err = dst->error;
if (err) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
- LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
+ net_dbg_ratelimited("ip6_route_me_harder: No more route\n");
dst_release(dst);
return err;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ace29b6..1985b49 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1293,6 +1293,7 @@
struct dst_entry *dst = sk->sk_rx_dst;
sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
@@ -1322,6 +1323,7 @@
*/
if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb);
+ sk_mark_napi_id(sk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
@@ -1454,7 +1456,7 @@
if (sk_filter(sk, skb))
goto discard_and_relse;
- sk_mark_napi_id(sk, skb);
+ sk_incoming_cpu_update(sk);
skb->dev = NULL;
bh_lock_sock_nested(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b756355..0ba3de4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -577,6 +577,7 @@
if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
+ sk_incoming_cpu_update(sk);
}
rc = sock_queue_rcv_skb(sk, skb);
@@ -659,15 +660,13 @@
if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
if (up->pcrlen == 0) { /* full coverage was set */
- LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
- " %d while full coverage %d requested\n",
- UDP_SKB_CB(skb)->cscov, skb->len);
+ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
+ UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
- LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
- "too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
+ UDP_SKB_CB(skb)->cscov, up->pcrlen);
goto drop;
}
}
@@ -760,9 +759,9 @@
/* RFC 2460 section 8.1 says that we SHOULD log
* this error. Well, it is reasonable.
*/
- LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
- &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
- &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
+ net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
+ &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
}
/*
@@ -930,14 +929,11 @@
return 0;
short_packet:
- LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
- proto == IPPROTO_UDPLITE ? "-Lite" : "",
- saddr,
- ntohs(uh->source),
- ulen,
- skb->len,
- daddr,
- ntohs(uh->dest));
+ net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
+ saddr, ntohs(uh->source),
+ ulen, skb->len,
+ daddr, ntohs(uh->dest));
goto discard;
csum_error:
UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
@@ -1289,7 +1285,7 @@
/* ... which is an evident application bug. --ANK */
release_sock(sk);
- LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
+ net_dbg_ratelimited("udp cork app bug 2\n");
err = -EINVAL;
goto out;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index f7e5891..394efa6 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -69,7 +69,7 @@
fifo->tail = 0;
}
-static bool action_fifo_is_empty(struct action_fifo *fifo)
+static bool action_fifo_is_empty(const struct action_fifo *fifo)
{
return (fifo->head == fifo->tail);
}
@@ -92,7 +92,7 @@
/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
- struct sw_flow_key *key,
+ const struct sw_flow_key *key,
const struct nlattr *attr)
{
struct action_fifo *fifo;
@@ -109,6 +109,16 @@
return da;
}
+static void invalidate_flow_key(struct sw_flow_key *key)
+{
+ key->eth.type = htons(0);
+}
+
+static bool is_flow_key_valid(const struct sw_flow_key *key)
+{
+ return !!key->eth.type;
+}
+
static int make_writable(struct sk_buff *skb, int write_len)
{
if (!pskb_may_pull(skb, write_len))
@@ -120,7 +130,7 @@
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}
-static int push_mpls(struct sk_buff *skb,
+static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_mpls *mpls)
{
__be32 *new_mpls_lse;
@@ -151,10 +161,12 @@
skb_set_inner_protocol(skb, skb->protocol);
skb->protocol = mpls->mpls_ethertype;
+ invalidate_flow_key(key);
return 0;
}
-static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
+static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+ const __be16 ethertype)
{
struct ethhdr *hdr;
int err;
@@ -181,10 +193,13 @@
hdr->h_proto = ethertype;
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
+
+ invalidate_flow_key(key);
return 0;
}
-static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
+ const __be32 *mpls_lse)
{
__be32 *stack;
int err;
@@ -196,13 +211,12 @@
stack = (__be32 *)skb_mpls_header(skb);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(*stack), *mpls_lse };
-
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
~skb->csum);
}
*stack = *mpls_lse;
-
+ key->mpls.top_lse = *mpls_lse;
return 0;
}
@@ -237,7 +251,7 @@
return 0;
}
-static int pop_vlan(struct sk_buff *skb)
+static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{
__be16 tci;
int err;
@@ -255,9 +269,12 @@
}
/* move next vlan tag to hw accel tag */
if (likely(skb->protocol != htons(ETH_P_8021Q) ||
- skb->len < VLAN_ETH_HLEN))
+ skb->len < VLAN_ETH_HLEN)) {
+ key->eth.tci = 0;
return 0;
+ }
+ invalidate_flow_key(key);
err = __pop_vlan_tci(skb, &tci);
if (unlikely(err))
return err;
@@ -266,7 +283,8 @@
return 0;
}
-static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_action_push_vlan *vlan)
{
if (unlikely(vlan_tx_tag_present(skb))) {
u16 current_tag;
@@ -283,12 +301,15 @@
skb->csum = csum_add(skb->csum, csum_partial(skb->data
+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+ invalidate_flow_key(key);
+ } else {
+ key->eth.tci = vlan->vlan_tci;
}
__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
return 0;
}
-static int set_eth_addr(struct sk_buff *skb,
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_key_ethernet *eth_key)
{
int err;
@@ -303,11 +324,13 @@
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+ ether_addr_copy(key->eth.src, eth_key->eth_src);
+ ether_addr_copy(key->eth.dst, eth_key->eth_dst);
return 0;
}
static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
- __be32 *addr, __be32 new_addr)
+ __be32 *addr, __be32 new_addr)
{
int transport_len = skb->len - skb_transport_offset(skb);
@@ -386,7 +409,8 @@
nh->ttl = new_ttl;
}
-static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_ipv4 *ipv4_key)
{
struct iphdr *nh;
int err;
@@ -398,22 +422,31 @@
nh = ip_hdr(skb);
- if (ipv4_key->ipv4_src != nh->saddr)
+ if (ipv4_key->ipv4_src != nh->saddr) {
set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+ key->ipv4.addr.src = ipv4_key->ipv4_src;
+ }
- if (ipv4_key->ipv4_dst != nh->daddr)
+ if (ipv4_key->ipv4_dst != nh->daddr) {
set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+ key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+ }
- if (ipv4_key->ipv4_tos != nh->tos)
+ if (ipv4_key->ipv4_tos != nh->tos) {
ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+ key->ip.tos = nh->tos;
+ }
- if (ipv4_key->ipv4_ttl != nh->ttl)
+ if (ipv4_key->ipv4_ttl != nh->ttl) {
set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+ key->ip.ttl = ipv4_key->ipv4_ttl;
+ }
return 0;
}
-static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_ipv6 *ipv6_key)
{
struct ipv6hdr *nh;
int err;
@@ -429,9 +462,12 @@
saddr = (__be32 *)&nh->saddr;
daddr = (__be32 *)&nh->daddr;
- if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
+ if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
ipv6_key->ipv6_src, true);
+ memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
+ sizeof(ipv6_key->ipv6_src));
+ }
if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
unsigned int offset = 0;
@@ -445,12 +481,18 @@
set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
ipv6_key->ipv6_dst, recalc_csum);
+ memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
+ sizeof(ipv6_key->ipv6_dst));
}
set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
- set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
- nh->hop_limit = ipv6_key->ipv6_hlimit;
+ key->ip.tos = ipv6_get_dsfield(nh);
+ set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
+ key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+
+ nh->hop_limit = ipv6_key->ipv6_hlimit;
+ key->ip.ttl = ipv6_key->ipv6_hlimit;
return 0;
}
@@ -478,7 +520,8 @@
}
}
-static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_udp *udp_port_key)
{
struct udphdr *uh;
int err;
@@ -489,16 +532,21 @@
return err;
uh = udp_hdr(skb);
- if (udp_port_key->udp_src != uh->source)
+ if (udp_port_key->udp_src != uh->source) {
set_udp_port(skb, &uh->source, udp_port_key->udp_src);
+ key->tp.src = udp_port_key->udp_src;
+ }
- if (udp_port_key->udp_dst != uh->dest)
+ if (udp_port_key->udp_dst != uh->dest) {
set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
+ key->tp.dst = udp_port_key->udp_dst;
+ }
return 0;
}
-static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_tcp *tcp_port_key)
{
struct tcphdr *th;
int err;
@@ -509,17 +557,21 @@
return err;
th = tcp_hdr(skb);
- if (tcp_port_key->tcp_src != th->source)
+ if (tcp_port_key->tcp_src != th->source) {
set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+ key->tp.src = tcp_port_key->tcp_src;
+ }
- if (tcp_port_key->tcp_dst != th->dest)
+ if (tcp_port_key->tcp_dst != th->dest) {
set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+ key->tp.dst = tcp_port_key->tcp_dst;
+ }
return 0;
}
-static int set_sctp(struct sk_buff *skb,
- const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_sctp *sctp_port_key)
{
struct sctphdr *sh;
int err;
@@ -546,6 +598,8 @@
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ key->tp.src = sctp_port_key->sctp_src;
+ key->tp.dst = sctp_port_key->sctp_dst;
}
return 0;
@@ -564,14 +618,15 @@
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr)
{
+ struct ovs_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.key = key;
upcall.userdata = NULL;
upcall.portid = 0;
+ upcall.egress_tun_info = NULL;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
@@ -583,10 +638,27 @@
case OVS_USERSPACE_ATTR_PID:
upcall.portid = nla_get_u32(a);
break;
+
+ case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
+ /* Get out tunnel info. */
+ struct vport *vport;
+
+ vport = ovs_vport_rcu(dp, nla_get_u32(a));
+ if (vport) {
+ int err;
+
+ err = ovs_vport_get_egress_tun_info(vport, skb,
+ &info);
+ if (!err)
+ upcall.egress_tun_info = &info;
+ }
+ break;
}
+
+ } /* End of switch. */
}
- return ovs_dp_upcall(dp, skb, &upcall);
+ return ovs_dp_upcall(dp, skb, key, &upcall);
}
static int sample(struct datapath *dp, struct sk_buff *skb,
@@ -656,18 +728,20 @@
key->ovs_flow_hash = hash;
}
-static int execute_set_action(struct sk_buff *skb,
- const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct nlattr *nested_attr)
{
int err = 0;
switch (nla_type(nested_attr)) {
case OVS_KEY_ATTR_PRIORITY:
skb->priority = nla_get_u32(nested_attr);
+ key->phy.priority = skb->priority;
break;
case OVS_KEY_ATTR_SKB_MARK:
skb->mark = nla_get_u32(nested_attr);
+ key->phy.skb_mark = skb->mark;
break;
case OVS_KEY_ATTR_TUNNEL_INFO:
@@ -675,31 +749,31 @@
break;
case OVS_KEY_ATTR_ETHERNET:
- err = set_eth_addr(skb, nla_data(nested_attr));
+ err = set_eth_addr(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_IPV4:
- err = set_ipv4(skb, nla_data(nested_attr));
+ err = set_ipv4(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_IPV6:
- err = set_ipv6(skb, nla_data(nested_attr));
+ err = set_ipv6(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_TCP:
- err = set_tcp(skb, nla_data(nested_attr));
+ err = set_tcp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_UDP:
- err = set_udp(skb, nla_data(nested_attr));
+ err = set_udp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_SCTP:
- err = set_sctp(skb, nla_data(nested_attr));
+ err = set_sctp(skb, key, nla_data(nested_attr));
break;
case OVS_KEY_ATTR_MPLS:
- err = set_mpls(skb, nla_data(nested_attr));
+ err = set_mpls(skb, key, nla_data(nested_attr));
break;
}
@@ -711,11 +785,15 @@
const struct nlattr *a, int rem)
{
struct deferred_action *da;
- int err;
- err = ovs_flow_key_update(skb, key);
- if (err)
- return err;
+ if (!is_flow_key_valid(key)) {
+ int err;
+
+ err = ovs_flow_key_update(skb, key);
+ if (err)
+ return err;
+ }
+ BUG_ON(!is_flow_key_valid(key));
if (!nla_is_last(a, rem)) {
/* Recirc action is the not the last action
@@ -752,7 +830,8 @@
/* Every output action needs a separate clone of 'skb', but the common
* case is just a single output action, so that doing a clone and
* then freeing the original skbuff is wasteful. So the following code
- * is slightly obscure just to avoid that. */
+ * is slightly obscure just to avoid that.
+ */
int prev_port = -1;
const struct nlattr *a;
int rem;
@@ -784,21 +863,21 @@
break;
case OVS_ACTION_ATTR_PUSH_MPLS:
- err = push_mpls(skb, nla_data(a));
+ err = push_mpls(skb, key, nla_data(a));
break;
case OVS_ACTION_ATTR_POP_MPLS:
- err = pop_mpls(skb, nla_get_be16(a));
+ err = pop_mpls(skb, key, nla_get_be16(a));
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
- err = push_vlan(skb, nla_data(a));
+ err = push_vlan(skb, key, nla_data(a));
if (unlikely(err)) /* skb already freed. */
return err;
break;
case OVS_ACTION_ATTR_POP_VLAN:
- err = pop_vlan(skb);
+ err = pop_vlan(skb, key);
break;
case OVS_ACTION_ATTR_RECIRC:
@@ -813,7 +892,7 @@
break;
case OVS_ACTION_ATTR_SET:
- err = execute_set_action(skb, nla_data(a));
+ err = execute_set_action(skb, key, nla_data(a));
break;
case OVS_ACTION_ATTR_SAMPLE:
@@ -865,7 +944,8 @@
/* Execute a list of actions against 'skb'. */
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_actions *acts, struct sw_flow_key *key)
+ const struct sw_flow_actions *acts,
+ struct sw_flow_key *key)
{
int level = this_cpu_read(exec_actions_level);
int err;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 014485e..ab141d4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -59,7 +59,7 @@
#include "vport-netdev.h"
int ovs_net_id __read_mostly;
-EXPORT_SYMBOL(ovs_net_id);
+EXPORT_SYMBOL_GPL(ovs_net_id);
static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
@@ -131,13 +131,15 @@
else
return 1;
}
-EXPORT_SYMBOL(lockdep_ovsl_is_held);
+EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
#endif
static struct vport *new_vport(const struct vport_parms *);
static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
+ const struct sw_flow_key *,
const struct dp_upcall_info *);
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
+ const struct sw_flow_key *,
const struct dp_upcall_info *);
/* Must be called with rcu_read_lock. */
@@ -176,7 +178,7 @@
return vport->ops->get_name(vport);
}
-static int get_dpifindex(struct datapath *dp)
+static int get_dpifindex(const struct datapath *dp)
{
struct vport *local;
int ifindex;
@@ -271,10 +273,10 @@
int error;
upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.key = key;
upcall.userdata = NULL;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
- error = ovs_dp_upcall(dp, skb, &upcall);
+ upcall.egress_tun_info = NULL;
+ error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
else
@@ -298,6 +300,7 @@
}
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
struct dp_stats_percpu *stats;
@@ -309,9 +312,9 @@
}
if (!skb_is_gso(skb))
- err = queue_userspace_packet(dp, skb, upcall_info);
+ err = queue_userspace_packet(dp, skb, key, upcall_info);
else
- err = queue_gso_packets(dp, skb, upcall_info);
+ err = queue_gso_packets(dp, skb, key, upcall_info);
if (err)
goto err;
@@ -328,39 +331,43 @@
}
static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
unsigned short gso_type = skb_shinfo(skb)->gso_type;
- struct dp_upcall_info later_info;
struct sw_flow_key later_key;
struct sk_buff *segs, *nskb;
+ struct ovs_skb_cb ovs_cb;
int err;
+ ovs_cb = *OVS_CB(skb);
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
+ *OVS_CB(skb) = ovs_cb;
if (IS_ERR(segs))
return PTR_ERR(segs);
if (segs == NULL)
return -EINVAL;
+ if (gso_type & SKB_GSO_UDP) {
+ /* The initial flow key extracted by ovs_flow_key_extract()
+ * in this case is for a first fragment, so we need to
+ * properly mark later fragments.
+ */
+ later_key = *key;
+ later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+ }
+
/* Queue all of the segments. */
skb = segs;
do {
- err = queue_userspace_packet(dp, skb, upcall_info);
+ *OVS_CB(skb) = ovs_cb;
+ if (gso_type & SKB_GSO_UDP && skb != segs)
+ key = &later_key;
+
+ err = queue_userspace_packet(dp, skb, key, upcall_info);
if (err)
break;
- if (skb == segs && gso_type & SKB_GSO_UDP) {
- /* The initial flow key extracted by ovs_flow_extract()
- * in this case is for a first fragment, so we need to
- * properly mark later fragments.
- */
- later_key = *upcall_info->key;
- later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-
- later_info = *upcall_info;
- later_info.key = &later_key;
- upcall_info = &later_info;
- }
} while ((skb = skb->next));
/* Free all of the segments. */
@@ -375,7 +382,7 @@
return err;
}
-static size_t upcall_msg_size(const struct nlattr *userdata,
+static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
unsigned int hdrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -383,13 +390,18 @@
+ nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
/* OVS_PACKET_ATTR_USERDATA */
- if (userdata)
- size += NLA_ALIGN(userdata->nla_len);
+ if (upcall_info->userdata)
+ size += NLA_ALIGN(upcall_info->userdata->nla_len);
+
+ /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
+ if (upcall_info->egress_tun_info)
+ size += nla_total_size(ovs_tun_key_attr_size());
return size;
}
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
{
struct ovs_header *upcall;
@@ -440,7 +452,7 @@
else
hlen = skb->len;
- len = upcall_msg_size(upcall_info->userdata, hlen);
+ len = upcall_msg_size(upcall_info, hlen);
user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
@@ -452,7 +464,7 @@
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+ err = ovs_nla_put_flow(key, key, user_skb);
BUG_ON(err);
nla_nest_end(user_skb, nla);
@@ -461,6 +473,14 @@
nla_len(upcall_info->userdata),
nla_data(upcall_info->userdata));
+ if (upcall_info->egress_tun_info) {
+ nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
+ err = ovs_nla_put_egress_tunnel_key(user_skb,
+ upcall_info->egress_tun_info);
+ BUG_ON(err);
+ nla_nest_end(user_skb, nla);
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -506,6 +526,7 @@
struct vport *input_vport;
int len;
int err;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
err = -EINVAL;
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -539,12 +560,12 @@
goto err_kfree_skb;
err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
- &flow->key);
+ &flow->key, log);
if (err)
goto err_flow_free;
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
- &flow->key, &acts);
+ &flow->key, &acts, log);
if (err)
goto err_flow_free;
@@ -613,7 +634,7 @@
.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
};
-static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
+static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
struct ovs_dp_megaflow_stats *mega_stats)
{
int i;
@@ -835,15 +856,16 @@
struct sw_flow_actions *acts;
struct sw_flow_match match;
int error;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
/* Must have key and actions. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR("Flow key attribute not present in new flow.\n");
+ OVS_NLERR(log, "Flow key attr not present in new flow.");
goto error;
}
if (!a[OVS_FLOW_ATTR_ACTIONS]) {
- OVS_NLERR("Flow actions attribute not present in new flow.\n");
+ OVS_NLERR(log, "Flow actions attr not present in new flow.");
goto error;
}
@@ -858,8 +880,8 @@
/* Extract key. */
ovs_match_init(&match, &new_flow->unmasked_key, &mask);
- error = ovs_nla_get_match(&match,
- a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+ error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto err_kfree_flow;
@@ -867,9 +889,9 @@
/* Validate actions. */
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
- &acts);
+ &acts, log);
if (error) {
- OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+ OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
goto err_kfree_flow;
}
@@ -922,6 +944,7 @@
}
/* The unmasked key has to be the same for flow updates. */
if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
+ /* Look for any overlapping flow. */
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (!flow) {
error = -ENOENT;
@@ -964,16 +987,18 @@
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
const struct sw_flow_key *key,
- const struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask,
+ bool log)
{
struct sw_flow_actions *acts;
struct sw_flow_key masked_key;
int error;
ovs_flow_mask_key(&masked_key, key, mask);
- error = ovs_nla_copy_actions(a, &masked_key, &acts);
+ error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
if (error) {
- OVS_NLERR("Actions may not be safe on all matching packets.\n");
+ OVS_NLERR(log,
+ "Actions may not be safe on all matching packets");
return ERR_PTR(error);
}
@@ -992,23 +1017,25 @@
struct sw_flow_actions *old_acts = NULL, *acts = NULL;
struct sw_flow_match match;
int error;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
/* Extract key. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR("Flow key attribute not present in set flow.\n");
+ OVS_NLERR(log, "Flow key attribute not present in set flow.");
goto error;
}
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match,
- a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+ error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto error;
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask);
+ acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
+ log);
if (IS_ERR(acts)) {
error = PTR_ERR(acts);
goto error;
@@ -1089,14 +1116,16 @@
struct datapath *dp;
struct sw_flow_match match;
int err;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
if (!a[OVS_FLOW_ATTR_KEY]) {
- OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
+ OVS_NLERR(log,
+ "Flow get message rejected, Key attribute missing.");
return -EINVAL;
}
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
if (err)
return err;
@@ -1137,10 +1166,12 @@
struct datapath *dp;
struct sw_flow_match match;
int err;
+ bool log = !a[OVS_FLOW_ATTR_PROBE];
if (likely(a[OVS_FLOW_ATTR_KEY])) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+ log);
if (unlikely(err))
return err;
}
@@ -1230,8 +1261,10 @@
static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+ [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+ [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
};
static const struct genl_ops dp_flow_genl_ops[] = {
@@ -1332,7 +1365,7 @@
/* Called with rcu_read_lock or ovs_mutex. */
static struct datapath *lookup_datapath(struct net *net,
- struct ovs_header *ovs_header,
+ const struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
{
struct datapath *dp;
@@ -1360,7 +1393,7 @@
dp->user_features = 0;
}
-static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
+static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
if (a[OVS_DP_ATTR_USER_FEATURES])
dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
@@ -1724,7 +1757,7 @@
/* Called with ovs_mutex or RCU read lock. */
static struct vport *lookup_vport(struct net *net,
- struct ovs_header *ovs_header,
+ const struct ovs_header *ovs_header,
struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
{
struct datapath *dp;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1c56a80..3ece945 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -108,18 +108,18 @@
/**
* struct dp_upcall - metadata to include with a packet to send to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
- * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
* @userdata: If nonnull, its variable-length value is passed to userspace as
* %OVS_PACKET_ATTR_USERDATA.
- * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
- * packet is sent and the packet is accounted in the datapath's @n_lost
+ * @portid: Netlink portid to which packet should be sent. If @portid is 0
+ * then no packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
+ * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
*/
struct dp_upcall_info {
- u8 cmd;
- const struct sw_flow_key *key;
+ const struct ovs_tunnel_info *egress_tun_info;
const struct nlattr *userdata;
u32 portid;
+ u8 cmd;
};
/**
@@ -149,7 +149,7 @@
#define rcu_dereference_ovsl(p) \
rcu_dereference_check(p, lockdep_ovsl_is_held())
-static inline struct net *ovs_dp_get_net(struct datapath *dp)
+static inline struct net *ovs_dp_get_net(const struct datapath *dp)
{
return read_pnet(&dp->net);
}
@@ -185,23 +185,23 @@
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
- const struct dp_upcall_info *);
+ const struct sw_flow_key *, const struct dp_upcall_info *);
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_actions *acts, struct sw_flow_key *);
+ const struct sw_flow_actions *, struct sw_flow_key *);
void ovs_dp_notify_wq(struct work_struct *work);
int action_fifos_init(void);
void action_fifos_exit(void);
-#define OVS_NLERR(fmt, ...) \
+#define OVS_NLERR(logging_allowed, fmt, ...) \
do { \
- if (net_ratelimit()) \
- pr_info("netlink: " fmt, ##__VA_ARGS__); \
+ if (logging_allowed && net_ratelimit()) \
+ pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \
} while (0)
#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 90a2101..70bef2a 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -66,7 +66,7 @@
#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct flow_stats *stats;
int node = numa_node_id();
@@ -679,7 +679,7 @@
return key_extract(skb, key);
}
-int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
+int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
/* Extract metadata from packet. */
@@ -712,12 +712,12 @@
int ovs_flow_key_extract_userspace(const struct nlattr *attr,
struct sk_buff *skb,
- struct sw_flow_key *key)
+ struct sw_flow_key *key, bool log)
{
int err;
/* Extract metadata from netlink attributes. */
- err = ovs_nla_get_flow_metadata(attr, key);
+ err = ovs_nla_get_flow_metadata(attr, key, log);
if (err)
return err;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 4962bee..a8b30f3 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,8 +37,8 @@
/* Used to memset ovs_key_ipv4_tunnel padding. */
#define OVS_TUNNEL_KEY_SIZE \
- (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \
- FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
+ (offsetof(struct ovs_key_ipv4_tunnel, tp_dst) + \
+ FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst))
struct ovs_key_ipv4_tunnel {
__be64 tun_id;
@@ -47,11 +47,13 @@
__be16 tun_flags;
u8 ipv4_tos;
u8 ipv4_ttl;
+ __be16 tp_src;
+ __be16 tp_dst;
} __packed __aligned(4); /* Minimize padding. */
struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel;
- struct geneve_opt *options;
+ const struct geneve_opt *options;
u8 options_len;
};
@@ -64,27 +66,59 @@
FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
opt_len))
-static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
- const struct iphdr *iph,
- __be64 tun_id, __be16 tun_flags,
- struct geneve_opt *opts,
- u8 opts_len)
+static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+ __be32 saddr, __be32 daddr,
+ u8 tos, u8 ttl,
+ __be16 tp_src,
+ __be16 tp_dst,
+ __be64 tun_id,
+ __be16 tun_flags,
+ const struct geneve_opt *opts,
+ u8 opts_len)
{
tun_info->tunnel.tun_id = tun_id;
- tun_info->tunnel.ipv4_src = iph->saddr;
- tun_info->tunnel.ipv4_dst = iph->daddr;
- tun_info->tunnel.ipv4_tos = iph->tos;
- tun_info->tunnel.ipv4_ttl = iph->ttl;
+ tun_info->tunnel.ipv4_src = saddr;
+ tun_info->tunnel.ipv4_dst = daddr;
+ tun_info->tunnel.ipv4_tos = tos;
+ tun_info->tunnel.ipv4_ttl = ttl;
tun_info->tunnel.tun_flags = tun_flags;
- /* clear struct padding. */
- memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
- sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
+ /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
+ * the upper tunnel are used.
+ * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
+ */
+ tun_info->tunnel.tp_src = tp_src;
+ tun_info->tunnel.tp_dst = tp_dst;
+
+ /* Clear struct padding. */
+ if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE)
+ memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE,
+ 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
tun_info->options = opts;
tun_info->options_len = opts_len;
}
+static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+ const struct iphdr *iph,
+ __be16 tp_src,
+ __be16 tp_dst,
+ __be64 tun_id,
+ __be16 tun_flags,
+ const struct geneve_opt *opts,
+ u8 opts_len)
+{
+ __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
+ iph->tos, iph->ttl,
+ tp_src, tp_dst,
+ tun_id, tun_flags,
+ opts, opts_len);
+}
+
+#define OVS_SW_FLOW_KEY_METADATA_SIZE \
+ (offsetof(struct sw_flow_key, recirc_id) + \
+ FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+
struct sw_flow_key {
u8 tun_opts[255];
u8 tun_opts_len;
@@ -210,18 +244,19 @@
} __packed;
void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
- struct sk_buff *);
+ const struct sk_buff *);
void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
unsigned long *used, __be16 *tcp_flags);
void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
-int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb,
+int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
+ struct sk_buff *skb,
struct sw_flow_key *key);
/* Extract key from packet coming from userspace. */
int ovs_flow_key_extract_userspace(const struct nlattr *attr,
struct sk_buff *skb,
- struct sw_flow_key *key);
+ struct sw_flow_key *key, bool log);
#endif /* flow.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ed31097..c0d066d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -112,7 +112,7 @@
} while (0)
static bool match_validate(const struct sw_flow_match *match,
- u64 key_attrs, u64 mask_attrs)
+ u64 key_attrs, u64 mask_attrs, bool log)
{
u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
u64 mask_allowed = key_attrs; /* At most allow all key attributes */
@@ -230,21 +230,41 @@
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
- OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
- (unsigned long long)key_attrs, (unsigned long long)key_expected);
+ OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
+ (unsigned long long)key_attrs,
+ (unsigned long long)key_expected);
return false;
}
if ((mask_attrs & mask_allowed) != mask_attrs) {
/* Mask attributes check failed. */
- OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
- (unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
+ OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
+ (unsigned long long)mask_attrs,
+ (unsigned long long)mask_allowed);
return false;
}
return true;
}
+size_t ovs_tun_key_attr_size(void)
+{
+ /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
+ * updating this function.
+ */
+ return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
+ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
+ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+}
+
size_t ovs_key_attr_size(void)
{
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
@@ -254,15 +274,7 @@
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
- + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
- + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ + ovs_tun_key_attr_size()
+ nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
@@ -318,7 +330,7 @@
static int __parse_flow_nlattrs(const struct nlattr *attr,
const struct nlattr *a[],
- u64 *attrsp, bool nz)
+ u64 *attrsp, bool log, bool nz)
{
const struct nlattr *nla;
u64 attrs;
@@ -330,21 +342,20 @@
int expected_len;
if (type > OVS_KEY_ATTR_MAX) {
- OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+ OVS_NLERR(log, "Key type %d is out of range max %d",
type, OVS_KEY_ATTR_MAX);
return -EINVAL;
}
if (attrs & (1 << type)) {
- OVS_NLERR("Duplicate key attribute (type %d).\n", type);
+ OVS_NLERR(log, "Duplicate key (type %d).", type);
return -EINVAL;
}
expected_len = ovs_key_lens[type];
if (nla_len(nla) != expected_len && expected_len != -1) {
- OVS_NLERR("Key attribute has unexpected length (type=%d"
- ", length=%d, expected=%d).\n", type,
- nla_len(nla), expected_len);
+ OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
+ type, nla_len(nla), expected_len);
return -EINVAL;
}
@@ -354,7 +365,7 @@
}
}
if (rem) {
- OVS_NLERR("Message has %d unknown bytes.\n", rem);
+ OVS_NLERR(log, "Message has %d unknown bytes.", rem);
return -EINVAL;
}
@@ -363,28 +374,84 @@
}
static int parse_flow_mask_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u64 *attrsp)
+ const struct nlattr *a[], u64 *attrsp,
+ bool log)
{
- return __parse_flow_nlattrs(attr, a, attrsp, true);
+ return __parse_flow_nlattrs(attr, a, attrsp, log, true);
}
static int parse_flow_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u64 *attrsp)
+ const struct nlattr *a[], u64 *attrsp,
+ bool log)
{
- return __parse_flow_nlattrs(attr, a, attrsp, false);
+ return __parse_flow_nlattrs(attr, a, attrsp, log, false);
+}
+
+static int genev_tun_opt_from_nlattr(const struct nlattr *a,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
+ nla_len(a), sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (nla_len(a) % 4 != 0) {
+ OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
+ nla_len(a));
+ return -EINVAL;
+ }
+
+ /* We need to record the length of the options passed
+ * down, otherwise packets with the same format but
+ * additional options will be silently matched.
+ */
+ if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+ false);
+ } else {
+ /* This is somewhat unusual because it looks at
+ * both the key and mask while parsing the
+ * attributes (and by extension assumes the key
+ * is parsed first). Normally, we would verify
+ * that each is the correct length and that the
+ * attributes line up in the validate function.
+ * However, that is difficult because this is
+ * variable length and we won't have the
+ * information later.
+ */
+ if (match->key->tun_opts_len != nla_len(a)) {
+ OVS_NLERR(log, "Geneve option len %d != mask len %d",
+ match->key->tun_opts_len, nla_len(a));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+ }
+
+ opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
+ nla_len(a));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+ nla_len(a), is_mask);
+ return 0;
}
static int ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask)
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
{
struct nlattr *a;
int rem;
bool ttl = false;
__be16 tun_flags = 0;
- unsigned long opt_key_offset;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
+ int err;
+
static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
@@ -393,20 +460,21 @@
[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+ [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
+ [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
};
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
- OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
- type, OVS_TUNNEL_KEY_ATTR_MAX);
+ OVS_NLERR(log, "Tunnel attr %d out of range max %d",
+ type, OVS_TUNNEL_KEY_ATTR_MAX);
return -EINVAL;
}
if (ovs_tunnel_key_lens[type] != nla_len(a) &&
ovs_tunnel_key_lens[type] != -1) {
- OVS_NLERR("IPv4 tunnel attribute type has unexpected "
- " length (type=%d, length=%d, expected=%d).\n",
+ OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
type, nla_len(a), ovs_tunnel_key_lens[type]);
return -EINVAL;
}
@@ -440,62 +508,26 @@
case OVS_TUNNEL_KEY_ATTR_CSUM:
tun_flags |= TUNNEL_CSUM;
break;
+ case OVS_TUNNEL_KEY_ATTR_TP_SRC:
+ SW_FLOW_KEY_PUT(match, tun_key.tp_src,
+ nla_get_be16(a), is_mask);
+ break;
+ case OVS_TUNNEL_KEY_ATTR_TP_DST:
+ SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
+ nla_get_be16(a), is_mask);
+ break;
case OVS_TUNNEL_KEY_ATTR_OAM:
tun_flags |= TUNNEL_OAM;
break;
case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+ err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
+ if (err)
+ return err;
+
tun_flags |= TUNNEL_OPTIONS_PRESENT;
- if (nla_len(a) > sizeof(match->key->tun_opts)) {
- OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n",
- nla_len(a),
- sizeof(match->key->tun_opts));
- return -EINVAL;
- }
-
- if (nla_len(a) % 4 != 0) {
- OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
- nla_len(a));
- return -EINVAL;
- }
-
- /* We need to record the length of the options passed
- * down, otherwise packets with the same format but
- * additional options will be silently matched.
- */
- if (!is_mask) {
- SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
- false);
- } else {
- /* This is somewhat unusual because it looks at
- * both the key and mask while parsing the
- * attributes (and by extension assumes the key
- * is parsed first). Normally, we would verify
- * that each is the correct length and that the
- * attributes line up in the validate function.
- * However, that is difficult because this is
- * variable length and we won't have the
- * information later.
- */
- if (match->key->tun_opts_len != nla_len(a)) {
- OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
- match->key->tun_opts_len,
- nla_len(a));
- return -EINVAL;
- }
-
- SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
- true);
- }
-
- opt_key_offset = (unsigned long)GENEVE_OPTS(
- (struct sw_flow_key *)0,
- nla_len(a));
- SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
- nla_data(a), nla_len(a),
- is_mask);
break;
default:
- OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n",
+ OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
type);
return -EINVAL;
}
@@ -504,18 +536,19 @@
SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
if (rem > 0) {
- OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
+ OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
+ rem);
return -EINVAL;
}
if (!is_mask) {
if (!match->key->tun_key.ipv4_dst) {
- OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+ OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
if (!ttl) {
- OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+ OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
return -EINVAL;
}
}
@@ -548,6 +581,12 @@
if ((output->tun_flags & TUNNEL_CSUM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
+ if (output->tp_src &&
+ nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
+ return -EMSGSIZE;
+ if (output->tp_dst &&
+ nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
+ return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
@@ -559,7 +598,6 @@
return 0;
}
-
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
const struct ovs_key_ipv4_tunnel *output,
const struct geneve_opt *tun_opts,
@@ -580,8 +618,17 @@
return 0;
}
+int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
+ const struct ovs_tunnel_info *egress_tun_info)
+{
+ return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
+ egress_tun_info->options,
+ egress_tun_info->options_len);
+}
+
static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
- const struct nlattr **a, bool is_mask)
+ const struct nlattr **a, bool is_mask,
+ bool log)
{
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
@@ -609,7 +656,7 @@
if (is_mask) {
in_port = 0xffffffff; /* Always exact match in_port. */
} else if (in_port >= DP_MAX_PORTS) {
- OVS_NLERR("Port (%d) exceeds maximum allowable (%d).\n",
+ OVS_NLERR(log, "Port %d exceeds max allowable %d",
in_port, DP_MAX_PORTS);
return -EINVAL;
}
@@ -628,7 +675,7 @@
}
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask))
+ is_mask, log))
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
@@ -636,11 +683,12 @@
}
static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
- const struct nlattr **a, bool is_mask)
+ const struct nlattr **a, bool is_mask,
+ bool log)
{
int err;
- err = metadata_from_nlattrs(match, &attrs, a, is_mask);
+ err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
if (err)
return err;
@@ -661,9 +709,9 @@
tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
if (!(tci & htons(VLAN_TAG_PRESENT))) {
if (is_mask)
- OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+ OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
else
- OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
+ OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
return -EINVAL;
}
@@ -680,8 +728,8 @@
/* Always exact match EtherType. */
eth_type = htons(0xffff);
} else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
- OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
- ntohs(eth_type), ETH_P_802_3_MIN);
+ OVS_NLERR(log, "EtherType %x is less than min %x",
+ ntohs(eth_type), ETH_P_802_3_MIN);
return -EINVAL;
}
@@ -696,8 +744,8 @@
ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
- ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
+ OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
+ ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, ip.proto,
@@ -720,8 +768,8 @@
ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
- OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
- ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
+ OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
+ ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
return -EINVAL;
}
SW_FLOW_KEY_PUT(match, ipv6.label,
@@ -751,7 +799,7 @@
arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
- OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+ OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
arp_key->arp_op);
return -EINVAL;
}
@@ -852,7 +900,7 @@
}
if (attrs != 0) {
- OVS_NLERR("Unknown key attributes (%llx).\n",
+ OVS_NLERR(log, "Unknown key attributes %llx",
(unsigned long long)attrs);
return -EINVAL;
}
@@ -893,10 +941,14 @@
* of this flow.
* @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
* attribute specifies the mask field of the wildcarded flow.
+ * @log: Boolean to allow kernel error logging. Normally true, but when
+ * probing for feature compatibility this should be passed in as false to
+ * suppress unnecessary error logging.
*/
int ovs_nla_get_match(struct sw_flow_match *match,
const struct nlattr *nla_key,
- const struct nlattr *nla_mask)
+ const struct nlattr *nla_mask,
+ bool log)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
const struct nlattr *encap;
@@ -906,7 +958,7 @@
bool encap_valid = false;
int err;
- err = parse_flow_nlattrs(nla_key, a, &key_attrs);
+ err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
if (err)
return err;
@@ -917,7 +969,7 @@
if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
(key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
- OVS_NLERR("Invalid Vlan frame.\n");
+ OVS_NLERR(log, "Invalid Vlan frame.");
return -EINVAL;
}
@@ -928,22 +980,22 @@
encap_valid = true;
if (tci & htons(VLAN_TAG_PRESENT)) {
- err = parse_flow_nlattrs(encap, a, &key_attrs);
+ err = parse_flow_nlattrs(encap, a, &key_attrs, log);
if (err)
return err;
} else if (!tci) {
/* Corner case for truncated 802.1Q header. */
if (nla_len(encap)) {
- OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
+ OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
return -EINVAL;
}
} else {
- OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+ OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
return -EINVAL;
}
}
- err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+ err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
if (err)
return err;
@@ -977,7 +1029,7 @@
nla_mask = newmask;
}
- err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs);
+ err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
if (err)
goto free_newmask;
@@ -989,7 +1041,7 @@
__be16 tci = 0;
if (!encap_valid) {
- OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+ OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
err = -EINVAL;
goto free_newmask;
}
@@ -1001,12 +1053,13 @@
if (eth_type == htons(0xffff)) {
mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
encap = a[OVS_KEY_ATTR_ENCAP];
- err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+ err = parse_flow_mask_nlattrs(encap, a,
+ &mask_attrs, log);
if (err)
goto free_newmask;
} else {
- OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
- ntohs(eth_type));
+ OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
+ ntohs(eth_type));
err = -EINVAL;
goto free_newmask;
}
@@ -1015,18 +1068,19 @@
tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
if (!(tci & htons(VLAN_TAG_PRESENT))) {
- OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+ OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
+ ntohs(tci));
err = -EINVAL;
goto free_newmask;
}
}
- err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+ err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
if (err)
goto free_newmask;
}
- if (!match_validate(match, key_attrs, mask_attrs))
+ if (!match_validate(match, key_attrs, mask_attrs, log))
err = -EINVAL;
free_newmask:
@@ -1039,6 +1093,9 @@
* @key: Receives extracted in_port, priority, tun_key and skb_mark.
* @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence.
+ * @log: Boolean to allow kernel error logging. Normally true, but when
+ * probing for feature compatibility this should be passed in as false to
+ * suppress unnecessary error logging.
*
* This parses a series of Netlink attributes that form a flow key, which must
* take the same form accepted by flow_from_nlattrs(), but only enough of it to
@@ -1047,14 +1104,15 @@
*/
int ovs_nla_get_flow_metadata(const struct nlattr *attr,
- struct sw_flow_key *key)
+ struct sw_flow_key *key,
+ bool log)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
struct sw_flow_match match;
u64 attrs = 0;
int err;
- err = parse_flow_nlattrs(attr, a, &attrs);
+ err = parse_flow_nlattrs(attr, a, &attrs, log);
if (err)
return -EINVAL;
@@ -1063,7 +1121,7 @@
key->phy.in_port = DP_MAX_PORTS;
- return metadata_from_nlattrs(&match, &attrs, a, false);
+ return metadata_from_nlattrs(&match, &attrs, a, false, log);
}
int ovs_nla_put_flow(const struct sw_flow_key *swkey,
@@ -1283,12 +1341,12 @@
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-static struct sw_flow_actions *nla_alloc_flow_actions(int size)
+static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
{
struct sw_flow_actions *sfa;
if (size > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR("Flow action size (%u bytes) exceeds maximum", size);
+ OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
return ERR_PTR(-EINVAL);
}
@@ -1308,7 +1366,7 @@
}
static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
- int attr_len)
+ int attr_len, bool log)
{
struct sw_flow_actions *acts;
@@ -1328,7 +1386,7 @@
new_acts_size = MAX_ACTIONS_BUFSIZE;
}
- acts = nla_alloc_flow_actions(new_acts_size);
+ acts = nla_alloc_flow_actions(new_acts_size, log);
if (IS_ERR(acts))
return (void *)acts;
@@ -1343,11 +1401,11 @@
}
static struct nlattr *__add_action(struct sw_flow_actions **sfa,
- int attrtype, void *data, int len)
+ int attrtype, void *data, int len, bool log)
{
struct nlattr *a;
- a = reserve_sfa_size(sfa, nla_attr_size(len));
+ a = reserve_sfa_size(sfa, nla_attr_size(len), log);
if (IS_ERR(a))
return a;
@@ -1362,11 +1420,11 @@
}
static int add_action(struct sw_flow_actions **sfa, int attrtype,
- void *data, int len)
+ void *data, int len, bool log)
{
struct nlattr *a;
- a = __add_action(sfa, attrtype, data, len);
+ a = __add_action(sfa, attrtype, data, len, log);
if (IS_ERR(a))
return PTR_ERR(a);
@@ -1374,12 +1432,12 @@
}
static inline int add_nested_action_start(struct sw_flow_actions **sfa,
- int attrtype)
+ int attrtype, bool log)
{
int used = (*sfa)->actions_len;
int err;
- err = add_action(sfa, attrtype, NULL, 0);
+ err = add_action(sfa, attrtype, NULL, 0, log);
if (err)
return err;
@@ -1398,12 +1456,12 @@
static int __ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci);
+ __be16 eth_type, __be16 vlan_tci, bool log);
static int validate_and_copy_sample(const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci)
+ __be16 eth_type, __be16 vlan_tci, bool log)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -1429,19 +1487,19 @@
return -EINVAL;
/* validation done, copy sample action. */
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32));
+ nla_data(probability), sizeof(u32), log);
if (err)
return err;
- st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
+ st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
if (st_acts < 0)
return st_acts;
err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
- eth_type, vlan_tci);
+ eth_type, vlan_tci, log);
if (err)
return err;
@@ -1478,7 +1536,7 @@
}
static int validate_and_copy_set_tun(const struct nlattr *attr,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa, bool log)
{
struct sw_flow_match match;
struct sw_flow_key key;
@@ -1487,7 +1545,7 @@
int err, start;
ovs_match_init(&match, &key, NULL);
- err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
+ err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
if (err)
return err;
@@ -1516,12 +1574,12 @@
key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
};
- start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
if (start < 0)
return start;
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
- sizeof(*tun_info) + key.tun_opts_len);
+ sizeof(*tun_info) + key.tun_opts_len, log);
if (IS_ERR(a))
return PTR_ERR(a);
@@ -1549,7 +1607,7 @@
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun, __be16 eth_type)
+ bool *set_tun, __be16 eth_type, bool log)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
@@ -1578,7 +1636,7 @@
return -EINVAL;
*set_tun = true;
- err = validate_and_copy_set_tun(a, sfa);
+ err = validate_and_copy_set_tun(a, sfa, log);
if (err)
return err;
break;
@@ -1653,6 +1711,7 @@
static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
+ [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
};
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
@@ -1670,12 +1729,12 @@
}
static int copy_action(const struct nlattr *from,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa, bool log)
{
int totlen = NLA_ALIGN(from->nla_len);
struct nlattr *to;
- to = reserve_sfa_size(sfa, from->nla_len);
+ to = reserve_sfa_size(sfa, from->nla_len, log);
if (IS_ERR(to))
return PTR_ERR(to);
@@ -1686,7 +1745,7 @@
static int __ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci)
+ __be16 eth_type, __be16 vlan_tci, bool log)
{
const struct nlattr *a;
bool out_tnl_port = false;
@@ -1809,7 +1868,7 @@
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
- &out_tnl_port, eth_type);
+ &out_tnl_port, eth_type, log);
if (err)
return err;
@@ -1818,18 +1877,18 @@
case OVS_ACTION_ATTR_SAMPLE:
err = validate_and_copy_sample(a, key, depth, sfa,
- eth_type, vlan_tci);
+ eth_type, vlan_tci, log);
if (err)
return err;
skip_copy = true;
break;
default:
- OVS_NLERR("Unknown tunnel attribute (%d).\n", type);
+ OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
}
if (!skip_copy) {
- err = copy_action(a, sfa);
+ err = copy_action(a, sfa, log);
if (err)
return err;
}
@@ -1843,16 +1902,16 @@
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa, bool log)
{
int err;
- *sfa = nla_alloc_flow_actions(nla_len(attr));
+ *sfa = nla_alloc_flow_actions(nla_len(attr), log);
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
- key->eth.tci);
+ key->eth.tci, log);
if (err)
kfree(*sfa);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index eb0b177..577f12b 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -37,6 +37,7 @@
#include "flow.h"
+size_t ovs_tun_key_attr_size(void);
size_t ovs_key_attr_size(void);
void ovs_match_init(struct sw_flow_match *match,
@@ -44,15 +45,17 @@
int ovs_nla_put_flow(const struct sw_flow_key *,
const struct sw_flow_key *, struct sk_buff *);
-int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *);
+int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
+ bool log);
-int ovs_nla_get_match(struct sw_flow_match *match,
- const struct nlattr *,
- const struct nlattr *);
+int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
+ const struct nlattr *mask, bool log);
+int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
+ const struct ovs_tunnel_info *);
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
- struct sw_flow_actions **sfa);
+ struct sw_flow_actions **sfa, bool log);
int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 90f8b40..e0a7fef 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -107,7 +107,7 @@
return ERR_PTR(-ENOMEM);
}
-int ovs_flow_tbl_count(struct flow_table *table)
+int ovs_flow_tbl_count(const struct flow_table *table)
{
return table->count;
}
@@ -401,7 +401,7 @@
}
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- struct sw_flow_match *match)
+ const struct sw_flow_match *match)
{
struct sw_flow_key *key = match->key;
int key_start = flow_key_start(key);
@@ -412,7 +412,7 @@
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
- struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -460,7 +460,7 @@
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
- struct sw_flow_match *match)
+ const struct sw_flow_match *match)
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
@@ -563,7 +563,7 @@
/* Add 'mask' into the mask list, if it is not already there. */
static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
- struct sw_flow_mask *new)
+ const struct sw_flow_mask *new)
{
struct sw_flow_mask *mask;
mask = flow_mask_find(tbl, new);
@@ -586,7 +586,7 @@
/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask)
{
struct table_instance *new_ti = NULL;
struct table_instance *ti;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index f682c8c..309fa64 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -61,12 +61,12 @@
void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
-int ovs_flow_tbl_count(struct flow_table *table);
+int ovs_flow_tbl_count(const struct flow_table *table);
void ovs_flow_tbl_destroy(struct flow_table *table);
int ovs_flow_tbl_flush(struct flow_table *flow_table);
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_mask *mask);
+ const struct sw_flow_mask *mask);
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
int ovs_flow_tbl_num_masks(const struct flow_table *table);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
@@ -77,9 +77,9 @@
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
- struct sw_flow_match *match);
+ const struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
- struct sw_flow_match *match);
+ const struct sw_flow_match *match);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 70c9765..347fa23 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -68,7 +68,7 @@
}
/* Convert 24 bit VNI to 64 bit tunnel ID. */
-static __be64 vni_to_tunnel_id(__u8 *vni)
+static __be64 vni_to_tunnel_id(const __u8 *vni)
{
#ifdef __BIG_ENDIAN
return (vni[0] << 16) | (vni[1] << 8) | vni[2];
@@ -97,7 +97,9 @@
key = vni_to_tunnel_id(geneveh->vni);
- ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
+ udp_hdr(skb)->source, udp_hdr(skb)->dest,
+ key, flags,
geneveh->options, opts_len);
ovs_vport_receive(vport, skb, &tun_info);
@@ -228,6 +230,22 @@
return geneve_port->name;
}
+static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *egress_tun_info)
+{
+ struct geneve_port *geneve_port = geneve_vport(vport);
+ struct net *net = ovs_dp_get_net(vport->dp);
+ __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
+ __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+
+ /* Get tp_src and tp_dst, refert to geneve_build_header().
+ */
+ return ovs_tunnel_get_egress_info(egress_tun_info,
+ ovs_dp_get_net(vport->dp),
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_UDP, skb->mark, sport, dport);
+}
+
static struct vport_ops ovs_geneve_vport_ops = {
.type = OVS_VPORT_TYPE_GENEVE,
.create = geneve_tnl_create,
@@ -236,6 +254,7 @@
.get_options = geneve_get_options,
.send = geneve_tnl_send,
.owner = THIS_MODULE,
+ .get_egress_tun_info = geneve_get_egress_tun_info,
};
static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 00270b6..8e61a5c 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -108,7 +108,7 @@
return PACKET_REJECT;
key = key_to_tunnel_id(tpi->key, tpi->seq);
- ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
+ ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
filter_tnl_flags(tpi->flags), NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
@@ -284,12 +284,22 @@
gre_exit();
}
+static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *egress_tun_info)
+{
+ return ovs_tunnel_get_egress_info(egress_tun_info,
+ ovs_dp_get_net(vport->dp),
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_GRE, skb->mark, 0, 0);
+}
+
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
.destroy = gre_tnl_destroy,
.get_name = gre_get_name,
.send = gre_tnl_send,
+ .get_egress_tun_info = gre_get_egress_tun_info,
.owner = THIS_MODULE,
};
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 877ee74..4776282 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -77,7 +77,7 @@
return RX_HANDLER_CONSUMED;
}
-static struct net_device *get_dpdev(struct datapath *dp)
+static struct net_device *get_dpdev(const struct datapath *dp)
{
struct vport *local;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 965e750..38f95a5 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -69,7 +69,9 @@
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8);
- ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
+ ovs_flow_tun_info_init(&tun_info, iph,
+ udp_hdr(skb)->source, udp_hdr(skb)->dest,
+ key, TUNNEL_KEY, NULL, 0);
ovs_vport_receive(vport, skb, &tun_info);
}
@@ -189,6 +191,25 @@
return err;
}
+static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *egress_tun_info)
+{
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
+ __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 src_port;
+ int port_min;
+ int port_max;
+
+ inet_get_local_port_range(net, &port_min, &port_max);
+ src_port = udp_flow_src_port(net, skb, 0, 0, true);
+
+ return ovs_tunnel_get_egress_info(egress_tun_info, net,
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_UDP, skb->mark,
+ src_port, dst_port);
+}
+
static const char *vxlan_get_name(const struct vport *vport)
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
@@ -202,6 +223,7 @@
.get_name = vxlan_get_name,
.get_options = vxlan_get_options,
.send = vxlan_tnl_send,
+ .get_egress_tun_info = vxlan_get_egress_tun_info,
.owner = THIS_MODULE,
};
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 8168ef0..e771a46 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -68,7 +68,7 @@
kfree(dev_table);
}
-static struct hlist_head *hash_bucket(struct net *net, const char *name)
+static struct hlist_head *hash_bucket(const struct net *net, const char *name)
{
unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
@@ -90,7 +90,7 @@
ovs_unlock();
return err;
}
-EXPORT_SYMBOL(ovs_vport_ops_register);
+EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
void ovs_vport_ops_unregister(struct vport_ops *ops)
{
@@ -98,7 +98,7 @@
list_del(&ops->list);
ovs_unlock();
}
-EXPORT_SYMBOL(ovs_vport_ops_unregister);
+EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
/**
* ovs_vport_locate - find a port that has already been created
@@ -107,7 +107,7 @@
*
* Must be called with ovs or RCU read lock.
*/
-struct vport *ovs_vport_locate(struct net *net, const char *name)
+struct vport *ovs_vport_locate(const struct net *net, const char *name)
{
struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
@@ -165,7 +165,7 @@
return vport;
}
-EXPORT_SYMBOL(ovs_vport_alloc);
+EXPORT_SYMBOL_GPL(ovs_vport_alloc);
/**
* ovs_vport_free - uninitialize and free vport
@@ -186,7 +186,7 @@
free_percpu(vport->percpu_stats);
kfree(vport);
}
-EXPORT_SYMBOL(ovs_vport_free);
+EXPORT_SYMBOL_GPL(ovs_vport_free);
static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
{
@@ -380,7 +380,7 @@
*
* Must be called with ovs_mutex.
*/
-int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids)
+int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
{
struct vport_portids *old, *vport_portids;
@@ -471,7 +471,7 @@
* skb->data should point to the Ethernet header.
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *tun_info)
+ const struct ovs_tunnel_info *tun_info)
{
struct pcpu_sw_netstats *stats;
struct sw_flow_key key;
@@ -493,7 +493,7 @@
}
ovs_dp_process_packet(skb, &key);
}
-EXPORT_SYMBOL(ovs_vport_receive);
+EXPORT_SYMBOL_GPL(ovs_vport_receive);
/**
* ovs_vport_send - send a packet on a device
@@ -572,4 +572,65 @@
call_rcu(&vport->rcu, free_vport_rcu);
}
-EXPORT_SYMBOL(ovs_vport_deferred_free);
+EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
+
+int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+ struct net *net,
+ const struct ovs_tunnel_info *tun_info,
+ u8 ipproto,
+ u32 skb_mark,
+ __be16 tp_src,
+ __be16 tp_dst)
+{
+ const struct ovs_key_ipv4_tunnel *tun_key;
+ struct rtable *rt;
+ struct flowi4 fl;
+
+ if (unlikely(!tun_info))
+ return -EINVAL;
+
+ tun_key = &tun_info->tunnel;
+
+ /* Route lookup to get srouce IP address.
+ * The process may need to be changed if the corresponding process
+ * in vports ops changed.
+ */
+ memset(&fl, 0, sizeof(fl));
+ fl.daddr = tun_key->ipv4_dst;
+ fl.saddr = tun_key->ipv4_src;
+ fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
+ fl.flowi4_mark = skb_mark;
+ fl.flowi4_proto = IPPROTO_GRE;
+
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ ip_rt_put(rt);
+
+ /* Generate egress_tun_info based on tun_info,
+ * saddr, tp_src and tp_dst
+ */
+ __ovs_flow_tun_info_init(egress_tun_info,
+ fl.saddr, tun_key->ipv4_dst,
+ tun_key->ipv4_tos,
+ tun_key->ipv4_ttl,
+ tp_src, tp_dst,
+ tun_key->tun_id,
+ tun_key->tun_flags,
+ tun_info->options,
+ tun_info->options_len);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
+
+int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *info)
+{
+ /* get_egress_tun_info() is only implemented on tunnel ports. */
+ if (unlikely(!vport->ops->get_egress_tun_info))
+ return -EINVAL;
+
+ return vport->ops->get_egress_tun_info(vport, skb, info);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index e41c3fa..99c8e71 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -45,19 +45,29 @@
struct vport *ovs_vport_add(const struct vport_parms *);
void ovs_vport_del(struct vport *);
-struct vport *ovs_vport_locate(struct net *net, const char *name);
+struct vport *ovs_vport_locate(const struct net *net, const char *name);
void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
int ovs_vport_set_options(struct vport *, struct nlattr *options);
int ovs_vport_get_options(const struct vport *, struct sk_buff *);
-int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids);
+int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
int ovs_vport_send(struct vport *, struct sk_buff *);
+int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+ struct net *net,
+ const struct ovs_tunnel_info *tun_info,
+ u8 ipproto,
+ u32 skb_mark,
+ __be16 tp_src,
+ __be16 tp_dst);
+int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ovs_tunnel_info *info);
+
/* The following definitions are for implementers of vport devices: */
struct vport_err_stats {
@@ -146,6 +156,8 @@
* @get_name: Get the device's name.
* @send: Send a packet on the device. Returns the length of the packet sent,
* zero for dropped packets or negative for error.
+ * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
+ * a packet.
*/
struct vport_ops {
enum ovs_vport_type type;
@@ -161,6 +173,8 @@
const char *(*get_name)(const struct vport *);
int (*send)(struct vport *, struct sk_buff *);
+ int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
+ struct ovs_tunnel_info *);
struct module *owner;
struct list_head list;
@@ -210,7 +224,7 @@
}
void ovs_vport_receive(struct vport *, struct sk_buff *,
- struct ovs_tunnel_info *);
+ const struct ovs_tunnel_info *);
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 5a940db..32ab87d 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -426,16 +426,17 @@
out_dev = phonet_route_output(net, pn_sockaddr_get_addr(&sa));
if (!out_dev) {
- LIMIT_NETDEBUG(KERN_WARNING"No Phonet route to %02X\n",
- pn_sockaddr_get_addr(&sa));
+ net_dbg_ratelimited("No Phonet route to %02X\n",
+ pn_sockaddr_get_addr(&sa));
goto out;
}
__skb_push(skb, sizeof(struct phonethdr));
skb->dev = out_dev;
if (out_dev == dev) {
- LIMIT_NETDEBUG(KERN_ERR"Phonet loop to %02X on %s\n",
- pn_sockaddr_get_addr(&sa), dev->name);
+ net_dbg_ratelimited("Phonet loop to %02X on %s\n",
+ pn_sockaddr_get_addr(&sa),
+ dev->name);
goto out_dev;
}
/* Some drivers (e.g. TUN) do not allocate HW header space */
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index e9a83a63..fa8237f 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -203,8 +203,7 @@
len = skb->len;
err = pep_write(sk, skb);
if (err) {
- LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n",
- dev->name, err);
+ net_dbg_ratelimited("%s: TX error (%d)\n", dev->name, err);
dev->stats.tx_aborted_errors++;
dev->stats.tx_errors++;
} else {
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 44b2123..9cd069d 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -272,8 +272,8 @@
hdr = pnp_hdr(skb);
if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
- LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n",
- (unsigned int)hdr->data[0]);
+ net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
+ (unsigned int)hdr->data[0]);
return -EOPNOTSUPP;
}
@@ -304,8 +304,8 @@
break;
default:
- LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n",
- (unsigned int)hdr->data[1]);
+ net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
+ (unsigned int)hdr->data[1]);
return -EOPNOTSUPP;
}
if (wake)
@@ -451,8 +451,8 @@
break;
default:
- LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n",
- hdr->message_id);
+ net_dbg_ratelimited("Phonet unknown PEP message: %u\n",
+ hdr->message_id);
err = -EINVAL;
}
out:
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index d49dc2e..ce469d6 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -205,9 +205,10 @@
if (sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN))
goto out_free;
- if (!sctp_ulpevent_is_notification(event))
+ if (!sctp_ulpevent_is_notification(event)) {
sk_mark_napi_id(sk, skb);
-
+ sk_incoming_cpu_update(sk);
+ }
/* Check if the user wishes to receive this event. */
if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe))
goto out_free;