Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net
Jeff Kirsher says:
====================
Intel Wired LAN Driver Updates 2015-01-06
This series contains fixes to i40e only.
Jesse provides a fix for when the driver was polling with interrupts
disabled the hardware would occasionally not write back descriptors.
His fix causes the driver to detect this situation and force an interrupt
to fire which will flush the stuck descriptor.
Anjali provides a couple of fixes, the first corrects an issue where
the receive port checksum error counter was incrementing incorrectly with
UDP encapsulated tunneled traffic. The second fix resolves an issue where
the driver was examining the outer protocol layer to set the inner protocol
layer checksum offload. In the case of TCP over IPv6 over an IPv4 based
VXLAN, the inner checksum offloads would be set to look for IPv4/UDP
instead of IPv6/TCP, so fixed the issue so that the driver will look at
the proper layer for encapsulation offload settings.
v2: fixed a bug in patch 01 of the series, where the interrupt rate impacted
4 port workloads by reducing throughput.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 04b4414..cecb340 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -658,6 +658,8 @@
return le32_to_cpu(*(volatile __le32 *)head);
}
+#define WB_STRIDE 0x3
+
/**
* i40e_clean_tx_irq - Reclaim resources after transmit completes
* @tx_ring: tx ring to clean
@@ -759,6 +761,18 @@
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_packets += total_packets;
+ /* check to see if there are any non-cache aligned descriptors
+ * waiting to be written back, and kick the hardware to force
+ * them to be written back in case of napi polling
+ */
+ if (budget &&
+ !((i & WB_STRIDE) == WB_STRIDE) &&
+ !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
+ (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+ tx_ring->arm_wb = true;
+ else
+ tx_ring->arm_wb = false;
+
if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
/* schedule immediate reset if we believe we hung */
dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
@@ -777,13 +791,16 @@
netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
dev_info(tx_ring->dev,
- "tx hang detected on queue %d, resetting adapter\n",
+ "tx hang detected on queue %d, reset requested\n",
tx_ring->queue_index);
- tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
+ /* do not fire the reset immediately, wait for the stack to
+ * decide we are truly stuck, also prevents every queue from
+ * simultaneously requesting a reset
+ */
- /* the adapter is about to reset, no point in enabling stuff */
- return true;
+ /* the adapter is about to reset, no point in enabling polling */
+ budget = 1;
}
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
@@ -806,7 +823,25 @@
}
}
- return budget > 0;
+ return !!budget;
+}
+
+/**
+ * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
+ * @vsi: the VSI we care about
+ * @q_vector: the vector on which to force writeback
+ *
+ **/
+static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+{
+ u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK
+ /* allow 00 to be written to the index */;
+
+ wr32(&vsi->back->hw,
+ I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
+ val);
}
/**
@@ -1290,9 +1325,7 @@
* so the total length of IPv4 header is IHL*4 bytes
* The UDP_0 bit *may* bet set if the *inner* header is UDP
*/
- if (ipv4_tunnel &&
- (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) &&
- !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
+ if (ipv4_tunnel) {
skb->transport_header = skb->mac_header +
sizeof(struct ethhdr) +
(ip_hdr(skb)->ihl * 4);
@@ -1302,15 +1335,19 @@
skb->protocol == htons(ETH_P_8021AD))
? VLAN_HLEN : 0;
- rx_udp_csum = udp_csum(skb);
- iph = ip_hdr(skb);
- csum = csum_tcpudp_magic(
- iph->saddr, iph->daddr,
- (skb->len - skb_transport_offset(skb)),
- IPPROTO_UDP, rx_udp_csum);
+ if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
+ (udp_hdr(skb)->check != 0)) {
+ rx_udp_csum = udp_csum(skb);
+ iph = ip_hdr(skb);
+ csum = csum_tcpudp_magic(
+ iph->saddr, iph->daddr,
+ (skb->len - skb_transport_offset(skb)),
+ IPPROTO_UDP, rx_udp_csum);
- if (udp_hdr(skb)->check != csum)
- goto checksum_fail;
+ if (udp_hdr(skb)->check != csum)
+ goto checksum_fail;
+
+ } /* else its GRE and so no outer UDP header */
}
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1581,6 +1618,7 @@
struct i40e_vsi *vsi = q_vector->vsi;
struct i40e_ring *ring;
bool clean_complete = true;
+ bool arm_wb = false;
int budget_per_ring;
if (test_bit(__I40E_DOWN, &vsi->state)) {
@@ -1591,8 +1629,10 @@
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
- i40e_for_each_ring(ring, q_vector->tx)
+ i40e_for_each_ring(ring, q_vector->tx) {
clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
+ arm_wb |= ring->arm_wb;
+ }
/* We attempt to distribute budget to each Rx queue fairly, but don't
* allow the budget to go below 1 because that would exit polling early.
@@ -1603,8 +1643,11 @@
clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
/* If work not completed, return budget and polling will return */
- if (!clean_complete)
+ if (!clean_complete) {
+ if (arm_wb)
+ i40e_force_wb(vsi, q_vector);
return budget;
+ }
/* Work is done so exit the polling mode and re-enable the interrupt */
napi_complete(napi);
@@ -1840,17 +1883,16 @@
if (err < 0)
return err;
- if (protocol == htons(ETH_P_IP)) {
- iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
+ iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
+ ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
+
+ if (iph->version == 4) {
tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
iph->tot_len = 0;
iph->check = 0;
tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
0, IPPROTO_TCP, 0);
- } else if (skb_is_gso_v6(skb)) {
-
- ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
- : ipv6_hdr(skb);
+ } else if (ipv6h->version == 6) {
tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
ipv6h->payload_len = 0;
tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
@@ -1946,13 +1988,9 @@
I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
}
} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
- if (tx_flags & I40E_TX_FLAGS_TSO) {
- *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+ *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+ if (tx_flags & I40E_TX_FLAGS_TSO)
ip_hdr(skb)->check = 0;
- } else {
- *cd_tunneling |=
- I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
- }
}
/* Now set the ctx descriptor fields */
@@ -1962,7 +2000,10 @@
((skb_inner_network_offset(skb) -
skb_transport_offset(skb)) >> 1) <<
I40E_TXD_CTX_QW0_NATLEN_SHIFT;
-
+ if (this_ip_hdr->version == 6) {
+ tx_flags &= ~I40E_TX_FLAGS_IPV4;
+ tx_flags |= I40E_TX_FLAGS_IPV6;
+ }
} else {
network_hdr_len = skb_network_header_len(skb);
this_ip_hdr = ip_hdr(skb);
@@ -2198,7 +2239,6 @@
/* Place RS bit on last descriptor of any packet that spans across the
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
*/
-#define WB_STRIDE 0x3
if (((i & WB_STRIDE) != WB_STRIDE) &&
(first <= &tx_ring->tx_bi[i]) &&
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index e60d3ac..18b0023 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -241,6 +241,7 @@
unsigned long last_rx_timestamp;
bool ring_active; /* is ring online or not */
+ bool arm_wb; /* do something to arm write back */
/* stats structs */
struct i40e_queue_stats stats;