amd-xgbe: Adjust register settings to improve performance

Add support to change some general performance settings and to provide
some performance settings based on the device that is probed.

This includes:

- Setting the maximum read/write outstanding request limit
- Reducing the AXI interface burst length size
- Selectively setting the Tx and Rx descriptor pre-fetch threshold
- Selectively setting additional cache coherency controls

Tested and verified on all versions of the hardware.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 6b5c72d..9795419 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -123,8 +123,11 @@
 #define DMA_ISR				0x3008
 #define DMA_AXIARCR			0x3010
 #define DMA_AXIAWCR			0x3018
+#define DMA_AXIAWARCR			0x301c
 #define DMA_DSR0			0x3020
 #define DMA_DSR1			0x3024
+#define DMA_TXEDMACR			0x3040
+#define DMA_RXEDMACR			0x3044
 
 /* DMA register entry bit positions and sizes */
 #define DMA_ISR_MACIS_INDEX		17
@@ -135,12 +138,22 @@
 #define DMA_MR_INTM_WIDTH		2
 #define DMA_MR_SWR_INDEX		0
 #define DMA_MR_SWR_WIDTH		1
+#define DMA_RXEDMACR_RDPS_INDEX		0
+#define DMA_RXEDMACR_RDPS_WIDTH		3
+#define DMA_SBMR_AAL_INDEX		12
+#define DMA_SBMR_AAL_WIDTH		1
 #define DMA_SBMR_EAME_INDEX		11
 #define DMA_SBMR_EAME_WIDTH		1
 #define DMA_SBMR_BLEN_INDEX		1
 #define DMA_SBMR_BLEN_WIDTH		7
+#define DMA_SBMR_RD_OSR_LMT_INDEX	16
+#define DMA_SBMR_RD_OSR_LMT_WIDTH	6
 #define DMA_SBMR_UNDEF_INDEX		0
 #define DMA_SBMR_UNDEF_WIDTH		1
+#define DMA_SBMR_WR_OSR_LMT_INDEX	24
+#define DMA_SBMR_WR_OSR_LMT_WIDTH	6
+#define DMA_TXEDMACR_TDPS_INDEX		0
+#define DMA_TXEDMACR_TDPS_WIDTH		3
 
 /* DMA register values */
 #define DMA_SBMR_BLEN_256		256
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index a51ece52..06f953e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -2114,18 +2114,38 @@
 
 static void xgbe_config_dma_bus(struct xgbe_prv_data *pdata)
 {
+	unsigned int sbmr;
+
+	sbmr = XGMAC_IOREAD(pdata, DMA_SBMR);
+
 	/* Set enhanced addressing mode */
-	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, EAME, 1);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, EAME, 1);
 
 	/* Set the System Bus mode */
-	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, UNDEF, 1);
-	XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, BLEN, pdata->blen >> 2);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, UNDEF, 1);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, BLEN, pdata->blen >> 2);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, AAL, pdata->aal);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, RD_OSR_LMT, pdata->rd_osr_limit - 1);
+	XGMAC_SET_BITS(sbmr, DMA_SBMR, WR_OSR_LMT, pdata->wr_osr_limit - 1);
+
+	XGMAC_IOWRITE(pdata, DMA_SBMR, sbmr);
+
+	/* Set descriptor fetching threshold */
+	if (pdata->vdata->tx_desc_prefetch)
+		XGMAC_IOWRITE_BITS(pdata, DMA_TXEDMACR, TDPS,
+				   pdata->vdata->tx_desc_prefetch);
+
+	if (pdata->vdata->rx_desc_prefetch)
+		XGMAC_IOWRITE_BITS(pdata, DMA_RXEDMACR, RDPS,
+				   pdata->vdata->rx_desc_prefetch);
 }
 
 static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata)
 {
 	XGMAC_IOWRITE(pdata, DMA_AXIARCR, pdata->arcr);
 	XGMAC_IOWRITE(pdata, DMA_AXIAWCR, pdata->awcr);
+	if (pdata->awarcr)
+		XGMAC_IOWRITE(pdata, DMA_AXIAWARCR, pdata->awarcr);
 }
 
 static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 8eec9f5..500147d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -140,8 +140,11 @@
 {
 	DBGPR("-->xgbe_default_config\n");
 
-	pdata->blen = DMA_SBMR_BLEN_256;
+	pdata->blen = DMA_SBMR_BLEN_64;
 	pdata->pbl = DMA_PBL_128;
+	pdata->aal = 1;
+	pdata->rd_osr_limit = 8;
+	pdata->wr_osr_limit = 8;
 	pdata->tx_sf_mode = MTL_TSF_ENABLE;
 	pdata->tx_threshold = MTL_TX_THRESHOLD_64;
 	pdata->tx_osp_mode = DMA_OSP_ENABLE;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 1e73768..1e56ad7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -327,8 +327,9 @@
 
 	/* Set the DMA coherency values */
 	pdata->coherent = 1;
-	pdata->arcr = XGBE_DMA_OS_ARCR;
-	pdata->awcr = XGBE_DMA_OS_AWCR;
+	pdata->arcr = XGBE_DMA_PCI_ARCR;
+	pdata->awcr = XGBE_DMA_PCI_AWCR;
+	pdata->awarcr = XGBE_DMA_PCI_AWARCR;
 
 	/* Set the maximum channels and queues */
 	reg = XP_IOREAD(pdata, XP_PROP_1);
@@ -447,6 +448,8 @@
 	.ecc_support			= 1,
 	.i2c_support			= 1,
 	.irq_reissue_support		= 1,
+	.tx_desc_prefetch		= 5,
+	.rx_desc_prefetch		= 5,
 };
 
 static const struct xgbe_version_data xgbe_v2b = {
@@ -459,6 +462,8 @@
 	.ecc_support			= 1,
 	.i2c_support			= 1,
 	.irq_reissue_support		= 1,
+	.tx_desc_prefetch		= 5,
+	.rx_desc_prefetch		= 5,
 };
 
 static const struct pci_device_id xgbe_pci_table[] = {
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 4bf82eb..0938294 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -171,6 +171,11 @@
 #define XGBE_DMA_SYS_ARCR	0x00303030
 #define XGBE_DMA_SYS_AWCR	0x30303030
 
+/* DMA cache settings - PCI device */
+#define XGBE_DMA_PCI_ARCR	0x00000003
+#define XGBE_DMA_PCI_AWCR	0x13131313
+#define XGBE_DMA_PCI_AWARCR	0x00000313
+
 /* DMA channel interrupt modes */
 #define XGBE_IRQ_MODE_EDGE	0
 #define XGBE_IRQ_MODE_LEVEL	1
@@ -921,6 +926,8 @@
 	unsigned int ecc_support;
 	unsigned int i2c_support;
 	unsigned int irq_reissue_support;
+	unsigned int tx_desc_prefetch;
+	unsigned int rx_desc_prefetch;
 };
 
 struct xgbe_prv_data {
@@ -1000,6 +1007,7 @@
 	unsigned int coherent;
 	unsigned int arcr;
 	unsigned int awcr;
+	unsigned int awarcr;
 
 	/* Service routine support */
 	struct workqueue_struct *dev_workqueue;
@@ -1024,6 +1032,9 @@
 	/* Tx/Rx common settings */
 	unsigned int blen;
 	unsigned int pbl;
+	unsigned int aal;
+	unsigned int rd_osr_limit;
+	unsigned int wr_osr_limit;
 
 	/* Tx settings */
 	unsigned int tx_sf_mode;