Merge branch 'thunderx-fixes'

Aleksey Makarov says:

====================
net: thunderx: Misc fixes

Miscellaneous fixes for the ThunderX VNIC driver

All the patches can be applied individually.
It's ok to drop some if the maintainer feels uncomfortable
with applying for 4.2.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index dda8a02..8aee250 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -125,6 +125,15 @@
  */
 #define NICPF_CLK_PER_INT_TICK		2
 
+/* Time to wait before we decide that a SQ is stuck.
+ *
+ * Since both pkt rx and tx notifications are done with same CQ,
+ * when packets are being received at very high rate (eg: L2 forwarding)
+ * then freeing transmitted skbs will be delayed and watchdog
+ * will kick in, resetting interface. Hence keeping this value high.
+ */
+#define	NICVF_TX_TIMEOUT		(50 * HZ)
+
 struct nicvf_cq_poll {
 	u8	cq_idx;		/* Completion queue index */
 	struct	napi_struct napi;
@@ -216,8 +225,9 @@
 	/* Tx */
 	u64 tx_frames_ok;
 	u64 tx_drops;
-	u64 tx_busy;
 	u64 tx_tso;
+	u64 txq_stop;
+	u64 txq_wake;
 };
 
 struct nicvf {
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index 16bd2d7..a4228e6 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -66,9 +66,10 @@
 	NICVF_DRV_STAT(rx_frames_jumbo),
 	NICVF_DRV_STAT(rx_drops),
 	NICVF_DRV_STAT(tx_frames_ok),
-	NICVF_DRV_STAT(tx_busy),
 	NICVF_DRV_STAT(tx_tso),
 	NICVF_DRV_STAT(tx_drops),
+	NICVF_DRV_STAT(txq_stop),
+	NICVF_DRV_STAT(txq_wake),
 };
 
 static const struct nicvf_stat nicvf_queue_stats[] = {
@@ -126,6 +127,7 @@
 
 static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 {
+	struct nicvf *nic = netdev_priv(netdev);
 	int stats, qidx;
 
 	if (sset != ETH_SS_STATS)
@@ -141,7 +143,7 @@
 		data += ETH_GSTRING_LEN;
 	}
 
-	for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) {
+	for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
 		for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
 			sprintf(data, "rxq%d: %s", qidx,
 				nicvf_queue_stats[stats].name);
@@ -149,7 +151,7 @@
 		}
 	}
 
-	for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) {
+	for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
 		for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
 			sprintf(data, "txq%d: %s", qidx,
 				nicvf_queue_stats[stats].name);
@@ -170,12 +172,14 @@
 
 static int nicvf_get_sset_count(struct net_device *netdev, int sset)
 {
+	struct nicvf *nic = netdev_priv(netdev);
+
 	if (sset != ETH_SS_STATS)
 		return -EINVAL;
 
 	return nicvf_n_hw_stats + nicvf_n_drv_stats +
 		(nicvf_n_queue_stats *
-		 (MAX_RCV_QUEUES_PER_QS + MAX_SND_QUEUES_PER_QS)) +
+		 (nic->qs->rq_cnt + nic->qs->sq_cnt)) +
 		BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT;
 }
 
@@ -197,13 +201,13 @@
 		*(data++) = ((u64 *)&nic->drv_stats)
 				[nicvf_drv_stats[stat].index];
 
-	for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) {
+	for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
 		for (stat = 0; stat < nicvf_n_queue_stats; stat++)
 			*(data++) = ((u64 *)&nic->qs->rq[qidx].stats)
 					[nicvf_queue_stats[stat].index];
 	}
 
-	for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) {
+	for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
 		for (stat = 0; stat < nicvf_n_queue_stats; stat++)
 			*(data++) = ((u64 *)&nic->qs->sq[qidx].stats)
 					[nicvf_queue_stats[stat].index];
@@ -543,6 +547,7 @@
 {
 	struct nicvf *nic = netdev_priv(dev);
 	int err = 0;
+	bool if_up = netif_running(dev);
 
 	if (!channel->rx_count || !channel->tx_count)
 		return -EINVAL;
@@ -551,6 +556,9 @@
 	if (channel->tx_count > MAX_SND_QUEUES_PER_QS)
 		return -EINVAL;
 
+	if (if_up)
+		nicvf_stop(dev);
+
 	nic->qs->rq_cnt = channel->rx_count;
 	nic->qs->sq_cnt = channel->tx_count;
 	nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt);
@@ -559,11 +567,9 @@
 	if (err)
 		return err;
 
-	if (!netif_running(dev))
-		return err;
+	if (if_up)
+		nicvf_open(dev);
 
-	nicvf_stop(dev);
-	nicvf_open(dev);
 	netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
 		    nic->qs->sq_cnt, nic->qs->rq_cnt);
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 8b119a0..3b90afb 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -234,7 +234,7 @@
 				    nic->duplex == DUPLEX_FULL ?
 				"Full duplex" : "Half duplex");
 			netif_carrier_on(nic->netdev);
-			netif_tx_wake_all_queues(nic->netdev);
+			netif_tx_start_all_queues(nic->netdev);
 		} else {
 			netdev_info(nic->netdev, "%s: Link is Down\n",
 				    nic->netdev->name);
@@ -425,6 +425,7 @@
 	if (skb) {
 		prefetch(skb);
 		dev_consume_skb_any(skb);
+		sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
 	}
 }
 
@@ -476,12 +477,13 @@
 static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
 				 struct napi_struct *napi, int budget)
 {
-	int processed_cqe, work_done = 0;
+	int processed_cqe, work_done = 0, tx_done = 0;
 	int cqe_count, cqe_head;
 	struct nicvf *nic = netdev_priv(netdev);
 	struct queue_set *qs = nic->qs;
 	struct cmp_queue *cq = &qs->cq[cq_idx];
 	struct cqe_rx_t *cq_desc;
+	struct netdev_queue *txq;
 
 	spin_lock_bh(&cq->lock);
 loop:
@@ -496,8 +498,8 @@
 	cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
 	cqe_head &= 0xFFFF;
 
-	netdev_dbg(nic->netdev, "%s cqe_count %d cqe_head %d\n",
-		   __func__, cqe_count, cqe_head);
+	netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n",
+		   __func__, cq_idx, cqe_count, cqe_head);
 	while (processed_cqe < cqe_count) {
 		/* Get the CQ descriptor */
 		cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
@@ -511,8 +513,8 @@
 			break;
 		}
 
-		netdev_dbg(nic->netdev, "cq_desc->cqe_type %d\n",
-			   cq_desc->cqe_type);
+		netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n",
+			   cq_idx, cq_desc->cqe_type);
 		switch (cq_desc->cqe_type) {
 		case CQE_TYPE_RX:
 			nicvf_rcv_pkt_handler(netdev, napi, cq,
@@ -522,6 +524,7 @@
 		case CQE_TYPE_SEND:
 			nicvf_snd_pkt_handler(netdev, cq,
 					      (void *)cq_desc, CQE_TYPE_SEND);
+			tx_done++;
 		break;
 		case CQE_TYPE_INVALID:
 		case CQE_TYPE_RX_SPLIT:
@@ -532,8 +535,9 @@
 		}
 		processed_cqe++;
 	}
-	netdev_dbg(nic->netdev, "%s processed_cqe %d work_done %d budget %d\n",
-		   __func__, processed_cqe, work_done, budget);
+	netdev_dbg(nic->netdev,
+		   "%s CQ%d processed_cqe %d work_done %d budget %d\n",
+		   __func__, cq_idx, processed_cqe, work_done, budget);
 
 	/* Ring doorbell to inform H/W to reuse processed CQEs */
 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR,
@@ -543,6 +547,19 @@
 		goto loop;
 
 done:
+	/* Wakeup TXQ if its stopped earlier due to SQ full */
+	if (tx_done) {
+		txq = netdev_get_tx_queue(netdev, cq_idx);
+		if (netif_tx_queue_stopped(txq)) {
+			netif_tx_start_queue(txq);
+			nic->drv_stats.txq_wake++;
+			if (netif_msg_tx_err(nic))
+				netdev_warn(netdev,
+					    "%s: Transmit queue wakeup SQ%d\n",
+					    netdev->name, cq_idx);
+		}
+	}
+
 	spin_unlock_bh(&cq->lock);
 	return work_done;
 }
@@ -554,15 +571,10 @@
 	struct net_device *netdev = napi->dev;
 	struct nicvf *nic = netdev_priv(netdev);
 	struct nicvf_cq_poll *cq;
-	struct netdev_queue *txq;
 
 	cq = container_of(napi, struct nicvf_cq_poll, napi);
 	work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget);
 
-	txq = netdev_get_tx_queue(netdev, cq->cq_idx);
-	if (netif_tx_queue_stopped(txq))
-		netif_tx_wake_queue(txq);
-
 	if (work_done < budget) {
 		/* Slow packet rate, exit polling */
 		napi_complete(napi);
@@ -833,9 +845,9 @@
 		return NETDEV_TX_OK;
 	}
 
-	if (!nicvf_sq_append_skb(nic, skb) && !netif_tx_queue_stopped(txq)) {
+	if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) {
 		netif_tx_stop_queue(txq);
-		nic->drv_stats.tx_busy++;
+		nic->drv_stats.txq_stop++;
 		if (netif_msg_tx_err(nic))
 			netdev_warn(netdev,
 				    "%s: Transmit ring full, stopping SQ%d\n",
@@ -859,7 +871,6 @@
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	netif_carrier_off(netdev);
-	netif_tx_disable(netdev);
 
 	/* Disable RBDR & QS error interrupts */
 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
@@ -894,6 +905,8 @@
 		kfree(cq_poll);
 	}
 
+	netif_tx_disable(netdev);
+
 	/* Free resources */
 	nicvf_config_data_transfer(nic, false);
 
@@ -988,6 +1001,9 @@
 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
 		nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
 
+	nic->drv_stats.txq_stop = 0;
+	nic->drv_stats.txq_wake = 0;
+
 	netif_carrier_on(netdev);
 	netif_tx_start_all_queues(netdev);
 
@@ -1278,6 +1294,7 @@
 	netdev->hw_features = netdev->features;
 
 	netdev->netdev_ops = &nicvf_netdev_ops;
+	netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 
 	INIT_WORK(&nic->reset_task, nicvf_reset_task);
 
@@ -1318,11 +1335,17 @@
 	pci_disable_device(pdev);
 }
 
+static void nicvf_shutdown(struct pci_dev *pdev)
+{
+	nicvf_remove(pdev);
+}
+
 static struct pci_driver nicvf_driver = {
 	.name = DRV_NAME,
 	.id_table = nicvf_id_table,
 	.probe = nicvf_probe,
 	.remove = nicvf_remove,
+	.shutdown = nicvf_shutdown,
 };
 
 static int __init nicvf_init_module(void)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index d69d228d..ca4240a 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -103,9 +103,11 @@
 
 	/* Allocate a new page */
 	if (!nic->rb_page) {
-		nic->rb_page = alloc_pages(gfp | __GFP_COMP, order);
+		nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+					   order);
 		if (!nic->rb_page) {
-			netdev_err(nic->netdev, "Failed to allocate new rcv buffer\n");
+			netdev_err(nic->netdev,
+				   "Failed to allocate new rcv buffer\n");
 			return -ENOMEM;
 		}
 		nic->rb_page_offset = 0;
@@ -382,7 +384,8 @@
 		return;
 
 	if (sq->tso_hdrs)
-		dma_free_coherent(&nic->pdev->dev, sq->dmem.q_len,
+		dma_free_coherent(&nic->pdev->dev,
+				  sq->dmem.q_len * TSO_HEADER_SIZE,
 				  sq->tso_hdrs, sq->tso_hdrs_phys);
 
 	kfree(sq->skbuff);
@@ -863,10 +866,11 @@
 			continue;
 		}
 		skb = (struct sk_buff *)sq->skbuff[sq->head];
+		if (skb)
+			dev_kfree_skb_any(skb);
 		atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
 		atomic64_add(hdr->tot_len,
 			     (atomic64_t *)&netdev->stats.tx_bytes);
-		dev_kfree_skb_any(skb);
 		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 	}
 }
@@ -992,7 +996,7 @@
 
 	memset(gather, 0, SND_QUEUE_DESC_SIZE);
 	gather->subdesc_type = SQ_DESC_TYPE_GATHER;
-	gather->ld_type = NIC_SEND_LD_TYPE_E_LDWB;
+	gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
 	gather->size = size;
 	gather->addr = data;
 }
@@ -1048,7 +1052,7 @@
 		}
 		nicvf_sq_add_hdr_subdesc(sq, hdr_qentry,
 					 seg_subdescs - 1, skb, seg_len);
-		sq->skbuff[hdr_qentry] = 0;
+		sq->skbuff[hdr_qentry] = (u64)NULL;
 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
 
 		desc_cnt += seg_subdescs;
@@ -1062,6 +1066,7 @@
 	/* Inform HW to xmit all TSO segments */
 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
 			      skb_get_queue_mapping(skb), desc_cnt);
+	nic->drv_stats.tx_tso++;
 	return 1;
 }
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 8341bdf..f0937b7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -62,7 +62,7 @@
 #define SND_QUEUE_CNT		8
 #define CMP_QUEUE_CNT		8 /* Max of RCV and SND qcount */
 
-#define SND_QSIZE		SND_QUEUE_SIZE4
+#define SND_QSIZE		SND_QUEUE_SIZE2
 #define SND_QUEUE_LEN		(1ULL << (SND_QSIZE + 10))
 #define MAX_SND_QUEUE_LEN	(1ULL << (SND_QUEUE_SIZE6 + 10))
 #define SND_QUEUE_THRESH	2ULL
@@ -70,7 +70,10 @@
 /* Since timestamp not enabled, otherwise 2 */
 #define MAX_CQE_PER_PKT_XMIT		1
 
-#define CMP_QSIZE		CMP_QUEUE_SIZE4
+/* Keep CQ and SQ sizes same, if timestamping
+ * is enabled this equation will change.
+ */
+#define CMP_QSIZE		CMP_QUEUE_SIZE2
 #define CMP_QUEUE_LEN		(1ULL << (CMP_QSIZE + 10))
 #define CMP_QUEUE_CQE_THRESH	0
 #define CMP_QUEUE_TIMER_THRESH	220 /* 10usec */
@@ -87,7 +90,12 @@
 
 #define MAX_CQES_FOR_TX		((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
 				 MAX_CQE_PER_PKT_XMIT)
-#define RQ_CQ_DROP		((CMP_QUEUE_LEN - MAX_CQES_FOR_TX) / 256)
+/* Calculate number of CQEs to reserve for all SQEs.
+ * Its 1/256th level of CQ size.
+ * '+ 1' to account for pipelining
+ */
+#define RQ_CQ_DROP		((256 / (CMP_QUEUE_LEN / \
+				 (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1)
 
 /* Descriptor size in bytes */
 #define SND_QUEUE_DESC_SIZE	16
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 633ec05..b961a89 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -673,7 +673,10 @@
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg);
 	bgx_flush_dmac_addrs(bgx, lmacid);
 
-	if (lmac->phydev)
+	if ((bgx->lmac_type != BGX_MODE_XFI) &&
+	    (bgx->lmac_type != BGX_MODE_XLAUI) &&
+	    (bgx->lmac_type != BGX_MODE_40G_KR) &&
+	    (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
 		phy_disconnect(lmac->phydev);
 
 	lmac->phydev = NULL;