Merge branch 'cxgb4-more-debug-info'

Hariprasad Shenai says:

====================
Add some more debug info

This patch series adds the following.
Add more info for sge_qinfo dump
Differentiate tid and stids between different regions, and add a debugfs
entry to dump all the tid info

This patch series has been created against net-next tree and includes
patches on cxgb4 driver.

We have included all the maintainers of respective drivers. Kindly review
the change and let us know in case of any review comments.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index f65ab64..ce075d1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -1943,13 +1943,13 @@
 {
 	struct adapter *adap = seq->private;
 	int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
-	int toe_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4);
+	int iscsi_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4);
 	int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4);
 	int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4);
 	int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
 	int i, r = (uintptr_t)v - 1;
-	int toe_idx = r - eth_entries;
-	int rdma_idx = toe_idx - toe_entries;
+	int iscsi_idx = r - eth_entries;
+	int rdma_idx = iscsi_idx - iscsi_entries;
 	int ciq_idx = rdma_idx - rdma_entries;
 	int ctrl_idx =  ciq_idx - ciq_entries;
 	int fq_idx =  ctrl_idx - ctrl_entries;
@@ -1965,8 +1965,12 @@
 		seq_putc(seq, '\n'); \
 } while (0)
 #define S(s, v) S3("s", s, v)
+#define T3(fmt_spec, s, v) S3(fmt_spec, s, tx[i].v)
 #define T(s, v) S3("u", s, tx[i].v)
+#define TL(s, v) T3("lu", s, v)
+#define R3(fmt_spec, s, v) S3(fmt_spec, s, rx[i].v)
 #define R(s, v) S3("u", s, rx[i].v)
+#define RL(s, v) R3("lu", s, v)
 
 	if (r < eth_entries) {
 		int base_qset = r * 4;
@@ -2005,12 +2009,30 @@
 		R("FL avail:", fl.avail);
 		R("FL PIDX:", fl.pidx);
 		R("FL CIDX:", fl.cidx);
-	} else if (toe_idx < toe_entries) {
-		const struct sge_ofld_rxq *rx = &adap->sge.ofldrxq[toe_idx * 4];
-		const struct sge_ofld_txq *tx = &adap->sge.ofldtxq[toe_idx * 4];
-		int n = min(4, adap->sge.ofldqsets - 4 * toe_idx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxCSO:", stats.rx_cso);
+		RL("VLANxtract:", stats.vlan_ex);
+		RL("LROmerged:", stats.lro_merged);
+		RL("LROpackets:", stats.lro_pkts);
+		RL("RxDrops:", stats.rx_drops);
+		TL("TSO:", tso);
+		TL("TxCSO:", tx_cso);
+		TL("VLANins:", vlan_ins);
+		TL("TxQFull:", q.stops);
+		TL("TxQRestarts:", q.restarts);
+		TL("TxMapErr:", mapping_err);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLStarving:", fl.starving);
 
-		S("QType:", "TOE");
+	} else if (iscsi_idx < iscsi_entries) {
+		const struct sge_ofld_rxq *rx =
+			&adap->sge.ofldrxq[iscsi_idx * 4];
+		const struct sge_ofld_txq *tx =
+			&adap->sge.ofldtxq[iscsi_idx * 4];
+		int n = min(4, adap->sge.ofldqsets - 4 * iscsi_idx);
+
+		S("QType:", "iSCSI");
 		T("TxQ ID:", q.cntxt_id);
 		T("TxQ size:", q.size);
 		T("TxQ inuse:", q.in_use);
@@ -2030,6 +2052,13 @@
 		R("FL avail:", fl.avail);
 		R("FL PIDX:", fl.pidx);
 		R("FL CIDX:", fl.cidx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxImmPkts:", stats.imm);
+		RL("RxNoMem:", stats.nomem);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLStarving:", fl.starving);
+
 	} else if (rdma_idx < rdma_entries) {
 		const struct sge_ofld_rxq *rx =
 				&adap->sge.rdmarxq[rdma_idx * 4];
@@ -2052,6 +2081,13 @@
 		R("FL avail:", fl.avail);
 		R("FL PIDX:", fl.pidx);
 		R("FL CIDX:", fl.cidx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxImmPkts:", stats.imm);
+		RL("RxNoMem:", stats.nomem);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLStarving:", fl.starving);
+
 	} else if (ciq_idx < ciq_entries) {
 		const struct sge_ofld_rxq *rx = &adap->sge.rdmaciq[ciq_idx * 4];
 		int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx);
@@ -2067,6 +2103,9 @@
 		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
 		S3("u", "Intr pktcnt:",
 		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
+		RL("RxAN:", stats.an);
+		RL("RxNoMem:", stats.nomem);
+
 	} else if (ctrl_idx < ctrl_entries) {
 		const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4];
 		int n = min(4, adap->params.nports - 4 * ctrl_idx);
@@ -2077,6 +2116,8 @@
 		T("TxQ inuse:", q.in_use);
 		T("TxQ CIDX:", q.cidx);
 		T("TxQ PIDX:", q.pidx);
+		TL("TxQFull:", q.stops);
+		TL("TxQRestarts:", q.restarts);
 	} else if (fq_idx == 0) {
 		const struct sge_rspq *evtq = &adap->sge.fw_evtq;
 
@@ -2092,8 +2133,12 @@
 			   adap->sge.counter_val[evtq->pktcnt_idx]);
 	}
 #undef R
+#undef RL
 #undef T
+#undef TL
 #undef S
+#undef R3
+#undef T3
 #undef S3
 	return 0;
 }
@@ -2212,6 +2257,73 @@
 	.llseek  = default_llseek,
 };
 
+static int tid_info_show(struct seq_file *seq, void *v)
+{
+	struct adapter *adap = seq->private;
+	const struct tid_info *t = &adap->tids;
+	enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
+
+	if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) {
+		unsigned int sb;
+
+		if (chip <= CHELSIO_T5)
+			sb = t4_read_reg(adap, LE_DB_SERVER_INDEX_A) / 4;
+		else
+			sb = t4_read_reg(adap, LE_DB_SRVR_START_INDEX_A);
+
+		if (sb) {
+			seq_printf(seq, "TID range: 0..%u/%u..%u", sb - 1,
+				   adap->tids.hash_base,
+				   t->ntids - 1);
+			seq_printf(seq, ", in use: %u/%u\n",
+				   atomic_read(&t->tids_in_use),
+				   atomic_read(&t->hash_tids_in_use));
+		} else if (adap->flags & FW_OFLD_CONN) {
+			seq_printf(seq, "TID range: %u..%u/%u..%u",
+				   t->aftid_base,
+				   t->aftid_end,
+				   adap->tids.hash_base,
+				   t->ntids - 1);
+			seq_printf(seq, ", in use: %u/%u\n",
+				   atomic_read(&t->tids_in_use),
+				   atomic_read(&t->hash_tids_in_use));
+		} else {
+			seq_printf(seq, "TID range: %u..%u",
+				   adap->tids.hash_base,
+				   t->ntids - 1);
+			seq_printf(seq, ", in use: %u\n",
+				   atomic_read(&t->hash_tids_in_use));
+		}
+	} else if (t->ntids) {
+		seq_printf(seq, "TID range: 0..%u", t->ntids - 1);
+		seq_printf(seq, ", in use: %u\n",
+			   atomic_read(&t->tids_in_use));
+	}
+
+	if (t->nstids)
+		seq_printf(seq, "STID range: %u..%u, in use: %u\n",
+			   (!t->stid_base &&
+			   (chip <= CHELSIO_T5)) ?
+			   t->stid_base + 1 : t->stid_base,
+			   t->stid_base + t->nstids - 1, t->stids_in_use);
+	if (t->natids)
+		seq_printf(seq, "ATID range: 0..%u, in use: %u\n",
+			   t->natids - 1, t->atids_in_use);
+	seq_printf(seq, "FTID range: %u..%u\n", t->ftid_base,
+		   t->ftid_base + t->nftids - 1);
+	if (t->nsftids)
+		seq_printf(seq, "SFTID range: %u..%u in use: %u\n",
+			   t->sftid_base, t->sftid_base + t->nsftids - 2,
+			   t->sftids_in_use);
+	if (t->ntids)
+		seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n",
+			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A),
+			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV6_A));
+	return 0;
+}
+
+DEFINE_SIMPLE_DEBUGFS_FILE(tid_info);
+
 static void add_debugfs_mem(struct adapter *adap, const char *name,
 			    unsigned int idx, unsigned int size_mb)
 {
@@ -2625,6 +2737,7 @@
 #if IS_ENABLED(CONFIG_IPV6)
 		{ "clip_tbl", &clip_tbl_debugfs_fops, S_IRUSR, 0 },
 #endif
+		{ "tids", &tid_info_debugfs_fops, S_IRUSR, 0},
 		{ "blocked_fl", &blocked_fl_fops, S_IRUSR | S_IWUSR, 0 },
 		{ "meminfo", &meminfo_fops, S_IRUSR, 0 },
 	};
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 27e87b6..f35dd22 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1548,7 +1548,7 @@
 		t->stid_tab[stid].data = data;
 		stid -= t->nstids;
 		stid += t->sftid_base;
-		t->stids_in_use++;
+		t->sftids_in_use++;
 	}
 	spin_unlock_bh(&t->stid_lock);
 	return stid;
@@ -1573,10 +1573,14 @@
 	else
 		bitmap_release_region(t->stid_bmap, stid, 2);
 	t->stid_tab[stid].data = NULL;
-	if (family == PF_INET)
-		t->stids_in_use--;
-	else
-		t->stids_in_use -= 4;
+	if (stid < t->nstids) {
+		if (family == PF_INET)
+			t->stids_in_use--;
+		else
+			t->stids_in_use -= 4;
+	} else {
+		t->sftids_in_use--;
+	}
 	spin_unlock_bh(&t->stid_lock);
 }
 EXPORT_SYMBOL(cxgb4_free_stid);
@@ -1654,20 +1658,25 @@
  */
 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
 {
-	void *old;
 	struct sk_buff *skb;
 	struct adapter *adap = container_of(t, struct adapter, tids);
 
-	old = t->tid_tab[tid];
+	WARN_ON(tid >= t->ntids);
+
+	if (t->tid_tab[tid]) {
+		t->tid_tab[tid] = NULL;
+		if (t->hash_base && (tid >= t->hash_base))
+			atomic_dec(&t->hash_tids_in_use);
+		else
+			atomic_dec(&t->tids_in_use);
+	}
+
 	skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
 	if (likely(skb)) {
-		t->tid_tab[tid] = NULL;
 		mk_tid_release(skb, chan, tid);
 		t4_ofld_send(adap, skb);
 	} else
 		cxgb4_queue_tid_release(t, chan, tid);
-	if (old)
-		atomic_dec(&t->tids_in_use);
 }
 EXPORT_SYMBOL(cxgb4_remove_tid);
 
@@ -1702,9 +1711,11 @@
 	spin_lock_init(&t->atid_lock);
 
 	t->stids_in_use = 0;
+	t->sftids_in_use = 0;
 	t->afree = NULL;
 	t->atids_in_use = 0;
 	atomic_set(&t->tids_in_use, 0);
+	atomic_set(&t->hash_tids_in_use, 0);
 
 	/* Setup the free list for atid_tab and clear the stid bitmap. */
 	if (natids) {
@@ -4814,6 +4825,22 @@
 		adapter->params.offload = 0;
 	}
 
+	if (is_offload(adapter)) {
+		if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) {
+			u32 hash_base, hash_reg;
+
+			if (chip <= CHELSIO_T5) {
+				hash_reg = LE_DB_TID_HASHBASE_A;
+				hash_base = t4_read_reg(adapter, hash_reg);
+				adapter->tids.hash_base = hash_base / 4;
+			} else {
+				hash_reg = T6_LE_DB_HASH_TID_BASE_A;
+				hash_base = t4_read_reg(adapter, hash_reg);
+				adapter->tids.hash_base = hash_base;
+			}
+		}
+	}
+
 	/* See what interrupts we'll be using */
 	if (msi > 1 && enable_msix(adapter) == 0)
 		adapter->flags |= USING_MSIX;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index b27897d..c3a8be5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -96,6 +96,7 @@
 	unsigned long *stid_bmap;
 	unsigned int nstids;
 	unsigned int stid_base;
+	unsigned int hash_base;
 
 	union aopen_entry *atid_tab;
 	unsigned int natids;
@@ -116,8 +117,12 @@
 
 	spinlock_t stid_lock;
 	unsigned int stids_in_use;
+	unsigned int sftids_in_use;
 
+	/* TIDs in the TCAM */
 	atomic_t tids_in_use;
+	/* TIDs in the HASH */
+	atomic_t hash_tids_in_use;
 };
 
 static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
@@ -147,7 +152,10 @@
 				    unsigned int tid)
 {
 	t->tid_tab[tid] = data;
-	atomic_inc(&t->tids_in_use);
+	if (t->hash_base && (tid >= t->hash_base))
+		atomic_inc(&t->hash_tids_in_use);
+	else
+		atomic_inc(&t->tids_in_use);
 }
 
 int cxgb4_alloc_atid(struct tid_info *t, void *data);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index d4248d7..78f446c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1424,18 +1424,17 @@
 		struct fw_wr_hdr *wr;
 		unsigned int ndesc = skb->priority;     /* previously saved */
 
-		/*
-		 * Write descriptors and free skbs outside the lock to limit
+		written += ndesc;
+		/* Write descriptors and free skbs outside the lock to limit
 		 * wait times.  q->full is still set so new skbs will be queued.
 		 */
+		wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
+		txq_advance(&q->q, ndesc);
 		spin_unlock(&q->sendq.lock);
 
-		wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
 		inline_tx_skb(skb, &q->q, wr);
 		kfree_skb(skb);
 
-		written += ndesc;
-		txq_advance(&q->q, ndesc);
 		if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
 			unsigned long old = q->q.stops;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index e444dc4..4d2c929 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -2732,10 +2732,15 @@
 #define T6_LIPMISS_F    T6_LIPMISS_V(1U)
 
 #define LE_DB_CONFIG_A 0x19c04
+#define LE_DB_SERVER_INDEX_A 0x19c18
+#define LE_DB_SRVR_START_INDEX_A 0x19c18
+#define LE_DB_ACT_CNT_IPV4_A 0x19c20
+#define LE_DB_ACT_CNT_IPV6_A 0x19c24
 #define LE_DB_HASH_TID_BASE_A 0x19c30
 #define LE_DB_HASH_TBL_BASE_ADDR_A 0x19c30
 #define LE_DB_INT_CAUSE_A 0x19c3c
 #define LE_DB_TID_HASHBASE_A 0x19df8
+#define T6_LE_DB_HASH_TID_BASE_A 0x19df8
 
 #define HASHEN_S    20
 #define HASHEN_V(x) ((x) << HASHEN_S)