Merge branch 'bgmac'

bgmac: add initial support for core rev 4 on ARM BCM47xx

====================
This adds support for core rev 4 and ARM BCM47XX.
With an other fix to the platform code I am now getting over 200 MBit/s
with this Ethernet driver, the DMA problems are solved are unrelated
to bgmac.

v3:
   - moved flags calculation for bcma_core_enable() into if block
   - remove hard coding of phy address to BGMAC_PHY_NOREGS

v2: add changed suggested by RafaƂ
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 0215f9a..09b632a 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -33,8 +33,6 @@
 int bcma_bus_suspend(struct bcma_bus *bus);
 int bcma_bus_resume(struct bcma_bus *bus);
 #endif
-struct bcma_device *bcma_find_core_unit(struct bcma_bus *bus, u16 coreid,
-					u8 unit);
 
 /* scan.c */
 int bcma_bus_scan(struct bcma_bus *bus);
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 5a9f6bd..34ea4c5 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -78,18 +78,6 @@
 	return BCMA_CORE_CHIPCOMMON;
 }
 
-struct bcma_device *bcma_find_core(struct bcma_bus *bus, u16 coreid)
-{
-	struct bcma_device *core;
-
-	list_for_each_entry(core, &bus->cores, list) {
-		if (core->id.id == coreid)
-			return core;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(bcma_find_core);
-
 struct bcma_device *bcma_find_core_unit(struct bcma_bus *bus, u16 coreid,
 					u8 unit)
 {
@@ -101,6 +89,7 @@
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(bcma_find_core_unit);
 
 bool bcma_wait_value(struct bcma_device *core, u16 reg, u32 mask, u32 value,
 		     int timeout)
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 39efb86..5d41f41 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -96,6 +96,19 @@
 	u32 ctl;
 
 	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL);
+	if (bgmac->core->id.rev >= 4) {
+		ctl &= ~BGMAC_DMA_TX_BL_MASK;
+		ctl |= BGMAC_DMA_TX_BL_128 << BGMAC_DMA_TX_BL_SHIFT;
+
+		ctl &= ~BGMAC_DMA_TX_MR_MASK;
+		ctl |= BGMAC_DMA_TX_MR_2 << BGMAC_DMA_TX_MR_SHIFT;
+
+		ctl &= ~BGMAC_DMA_TX_PC_MASK;
+		ctl |= BGMAC_DMA_TX_PC_16 << BGMAC_DMA_TX_PC_SHIFT;
+
+		ctl &= ~BGMAC_DMA_TX_PT_MASK;
+		ctl |= BGMAC_DMA_TX_PT_8 << BGMAC_DMA_TX_PT_SHIFT;
+	}
 	ctl |= BGMAC_DMA_TX_ENABLE;
 	ctl |= BGMAC_DMA_TX_PARITY_DISABLE;
 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
@@ -240,6 +253,16 @@
 	u32 ctl;
 
 	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
+	if (bgmac->core->id.rev >= 4) {
+		ctl &= ~BGMAC_DMA_RX_BL_MASK;
+		ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
+
+		ctl &= ~BGMAC_DMA_RX_PC_MASK;
+		ctl |= BGMAC_DMA_RX_PC_8 << BGMAC_DMA_RX_PC_SHIFT;
+
+		ctl &= ~BGMAC_DMA_RX_PT_MASK;
+		ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
+	}
 	ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
 	ctl |= BGMAC_DMA_RX_ENABLE;
 	ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
@@ -745,13 +768,13 @@
 	u32 cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
 	u32 new_val = (cmdcfg & mask) | set;
 
-	bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR);
+	bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR(bgmac->core->id.rev));
 	udelay(2);
 
 	if (new_val != cmdcfg || force)
 		bgmac_write(bgmac, BGMAC_CMDCFG, new_val);
 
-	bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR);
+	bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR(bgmac->core->id.rev));
 	udelay(2);
 }
 
@@ -825,6 +848,9 @@
 	case SPEED_1000:
 		set |= BGMAC_CMDCFG_ES_1000;
 		break;
+	case SPEED_2500:
+		set |= BGMAC_CMDCFG_ES_2500;
+		break;
 	default:
 		bgmac_err(bgmac, "Unsupported speed: %d\n", bgmac->mac_speed);
 	}
@@ -837,12 +863,26 @@
 
 static void bgmac_miiconfig(struct bgmac *bgmac)
 {
-	u8 imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
-			BGMAC_DS_MM_SHIFT;
-	if (imode == 0 || imode == 1) {
-		bgmac->mac_speed = SPEED_100;
+	struct bcma_device *core = bgmac->core;
+	struct bcma_chipinfo *ci = &core->bus->chipinfo;
+	u8 imode;
+
+	if (ci->id == BCMA_CHIP_ID_BCM4707 ||
+	    ci->id == BCMA_CHIP_ID_BCM53018) {
+		bcma_awrite32(core, BCMA_IOCTL,
+			      bcma_aread32(core, BCMA_IOCTL) | 0x40 |
+			      BGMAC_BCMA_IOCTL_SW_CLKEN);
+		bgmac->mac_speed = SPEED_2500;
 		bgmac->mac_duplex = DUPLEX_FULL;
 		bgmac_mac_speed(bgmac);
+	} else {
+		imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) &
+			BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT;
+		if (imode == 0 || imode == 1) {
+			bgmac->mac_speed = SPEED_100;
+			bgmac->mac_duplex = DUPLEX_FULL;
+			bgmac_mac_speed(bgmac);
+		}
 	}
 }
 
@@ -852,7 +892,7 @@
 	struct bcma_device *core = bgmac->core;
 	struct bcma_bus *bus = core->bus;
 	struct bcma_chipinfo *ci = &bus->chipinfo;
-	u32 flags = 0;
+	u32 flags;
 	u32 iost;
 	int i;
 
@@ -880,15 +920,21 @@
 	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188))
 		iost &= ~BGMAC_BCMA_IOST_ATTACHED;
 
-	if (iost & BGMAC_BCMA_IOST_ATTACHED) {
-		flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
-		if (!bgmac->has_robosw)
-			flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+	/* 3GMAC: for BCM4707, only do core reset at bgmac_probe() */
+	if (ci->id != BCMA_CHIP_ID_BCM4707) {
+		flags = 0;
+		if (iost & BGMAC_BCMA_IOST_ATTACHED) {
+			flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
+			if (!bgmac->has_robosw)
+				flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+		}
+		bcma_core_enable(core, flags);
 	}
 
-	bcma_core_enable(core, flags);
-
-	if (core->id.rev > 2) {
+	/* Request Misc PLL for corerev > 2 */
+	if (core->id.rev > 2 &&
+	    ci->id != BCMA_CHIP_ID_BCM4707 &&
+	    ci->id != BCMA_CHIP_ID_BCM53018) {
 		bgmac_set(bgmac, BCMA_CLKCTLST,
 			  BGMAC_BCMA_CLKCTLST_MISC_PLL_REQ);
 		bgmac_wait_value(bgmac->core, BCMA_CLKCTLST,
@@ -954,7 +1000,7 @@
 			     BGMAC_CMDCFG_PROM |
 			     BGMAC_CMDCFG_NLC |
 			     BGMAC_CMDCFG_CFE |
-			     BGMAC_CMDCFG_SR,
+			     BGMAC_CMDCFG_SR(core->id.rev),
 			     false);
 	bgmac->mac_speed = SPEED_UNKNOWN;
 	bgmac->mac_duplex = DUPLEX_UNKNOWN;
@@ -997,7 +1043,7 @@
 
 	cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
 	bgmac_cmdcfg_maskset(bgmac, ~(BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE),
-			     BGMAC_CMDCFG_SR, true);
+			     BGMAC_CMDCFG_SR(bgmac->core->id.rev), true);
 	udelay(2);
 	cmdcfg |= BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE;
 	bgmac_write(bgmac, BGMAC_CMDCFG, cmdcfg);
@@ -1026,12 +1072,16 @@
 		break;
 	}
 
-	rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL);
-	rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK;
-	bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) / 1000000;
-	mdp = (bp_clk * 128 / 1000) - 3;
-	rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT);
-	bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl);
+	if (ci->id != BCMA_CHIP_ID_BCM4707 &&
+	    ci->id != BCMA_CHIP_ID_BCM53018) {
+		rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL);
+		rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK;
+		bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) /
+				1000000;
+		mdp = (bp_clk * 128 / 1000) - 3;
+		rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT);
+		bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl);
+	}
 }
 
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */
@@ -1423,6 +1473,27 @@
 
 	bgmac_chip_reset(bgmac);
 
+	/* For Northstar, we have to take all GMAC core out of reset */
+	if (core->id.id == BCMA_CHIP_ID_BCM4707 ||
+	    core->id.id == BCMA_CHIP_ID_BCM53018) {
+		struct bcma_device *ns_core;
+		int ns_gmac;
+
+		/* Northstar has 4 GMAC cores */
+		for (ns_gmac = 0; ns_gmac < 4; ns_gmac++) {
+			/* As northstar requirement, we have to reset all GAMCs
+			 * before accessing one. bgmac_chip_reset() call
+			 * bcma_core_enable() for this core. Then the other
+			 * three GAMCs didn't reset.  We do it here.
+			 */
+			ns_core = bcma_find_core_unit(core->bus,
+						      BCMA_CORE_MAC_GBIT,
+						      ns_gmac);
+			if (ns_core && !bcma_core_is_enabled(ns_core))
+				bcma_core_enable(ns_core, 0);
+		}
+	}
+
 	err = bgmac_dma_alloc(bgmac);
 	if (err) {
 		bgmac_err(bgmac, "Unable to alloc memory for DMA\n");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 130b16b..89fa5bc 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -189,6 +189,7 @@
 #define   BGMAC_CMDCFG_ES_10			0x00000000
 #define   BGMAC_CMDCFG_ES_100			0x00000004
 #define   BGMAC_CMDCFG_ES_1000			0x00000008
+#define   BGMAC_CMDCFG_ES_2500			0x0000000C
 #define  BGMAC_CMDCFG_PROM			0x00000010	/* Set to activate promiscuous mode */
 #define  BGMAC_CMDCFG_PAD_EN			0x00000020
 #define  BGMAC_CMDCFG_CF			0x00000040
@@ -197,7 +198,9 @@
 #define  BGMAC_CMDCFG_TAI			0x00000200
 #define  BGMAC_CMDCFG_HD			0x00000400	/* Set if in half duplex mode */
 #define  BGMAC_CMDCFG_HD_SHIFT			10
-#define  BGMAC_CMDCFG_SR			0x00000800	/* Set to reset mode */
+#define  BGMAC_CMDCFG_SR_REV0			0x00000800	/* Set to reset mode, for other revs */
+#define  BGMAC_CMDCFG_SR_REV4			0x00002000	/* Set to reset mode, only for core rev 4 */
+#define  BGMAC_CMDCFG_SR(rev)  ((rev == 4) ? BGMAC_CMDCFG_SR_REV4 : BGMAC_CMDCFG_SR_REV0)
 #define  BGMAC_CMDCFG_ML			0x00008000	/* Set to activate mac loopback mode */
 #define  BGMAC_CMDCFG_AE			0x00400000
 #define  BGMAC_CMDCFG_CFE			0x00800000
@@ -237,9 +240,34 @@
 #define  BGMAC_DMA_TX_SUSPEND			0x00000002
 #define  BGMAC_DMA_TX_LOOPBACK			0x00000004
 #define  BGMAC_DMA_TX_FLUSH			0x00000010
+#define  BGMAC_DMA_TX_MR_MASK			0x000000C0	/* Multiple outstanding reads */
+#define  BGMAC_DMA_TX_MR_SHIFT			6
+#define   BGMAC_DMA_TX_MR_1			0
+#define   BGMAC_DMA_TX_MR_2			1
 #define  BGMAC_DMA_TX_PARITY_DISABLE		0x00000800
 #define  BGMAC_DMA_TX_ADDREXT_MASK		0x00030000
 #define  BGMAC_DMA_TX_ADDREXT_SHIFT		16
+#define  BGMAC_DMA_TX_BL_MASK			0x001C0000	/* BurstLen bits */
+#define  BGMAC_DMA_TX_BL_SHIFT			18
+#define   BGMAC_DMA_TX_BL_16			0
+#define   BGMAC_DMA_TX_BL_32			1
+#define   BGMAC_DMA_TX_BL_64			2
+#define   BGMAC_DMA_TX_BL_128			3
+#define   BGMAC_DMA_TX_BL_256			4
+#define   BGMAC_DMA_TX_BL_512			5
+#define   BGMAC_DMA_TX_BL_1024			6
+#define  BGMAC_DMA_TX_PC_MASK			0x00E00000	/* Prefetch control */
+#define  BGMAC_DMA_TX_PC_SHIFT			21
+#define   BGMAC_DMA_TX_PC_0			0
+#define   BGMAC_DMA_TX_PC_4			1
+#define   BGMAC_DMA_TX_PC_8			2
+#define   BGMAC_DMA_TX_PC_16			3
+#define  BGMAC_DMA_TX_PT_MASK			0x03000000	/* Prefetch threshold */
+#define  BGMAC_DMA_TX_PT_SHIFT			24
+#define   BGMAC_DMA_TX_PT_1			0
+#define   BGMAC_DMA_TX_PT_2			1
+#define   BGMAC_DMA_TX_PT_4			2
+#define   BGMAC_DMA_TX_PT_8			3
 #define BGMAC_DMA_TX_INDEX			0x04
 #define BGMAC_DMA_TX_RINGLO			0x08
 #define BGMAC_DMA_TX_RINGHI			0x0C
@@ -267,8 +295,33 @@
 #define  BGMAC_DMA_RX_DIRECT_FIFO		0x00000100
 #define  BGMAC_DMA_RX_OVERFLOW_CONT		0x00000400
 #define  BGMAC_DMA_RX_PARITY_DISABLE		0x00000800
+#define  BGMAC_DMA_RX_MR_MASK			0x000000C0	/* Multiple outstanding reads */
+#define  BGMAC_DMA_RX_MR_SHIFT			6
+#define   BGMAC_DMA_TX_MR_1			0
+#define   BGMAC_DMA_TX_MR_2			1
 #define  BGMAC_DMA_RX_ADDREXT_MASK		0x00030000
 #define  BGMAC_DMA_RX_ADDREXT_SHIFT		16
+#define  BGMAC_DMA_RX_BL_MASK			0x001C0000	/* BurstLen bits */
+#define  BGMAC_DMA_RX_BL_SHIFT			18
+#define   BGMAC_DMA_RX_BL_16			0
+#define   BGMAC_DMA_RX_BL_32			1
+#define   BGMAC_DMA_RX_BL_64			2
+#define   BGMAC_DMA_RX_BL_128			3
+#define   BGMAC_DMA_RX_BL_256			4
+#define   BGMAC_DMA_RX_BL_512			5
+#define   BGMAC_DMA_RX_BL_1024			6
+#define  BGMAC_DMA_RX_PC_MASK			0x00E00000	/* Prefetch control */
+#define  BGMAC_DMA_RX_PC_SHIFT			21
+#define   BGMAC_DMA_RX_PC_0			0
+#define   BGMAC_DMA_RX_PC_4			1
+#define   BGMAC_DMA_RX_PC_8			2
+#define   BGMAC_DMA_RX_PC_16			3
+#define  BGMAC_DMA_RX_PT_MASK			0x03000000	/* Prefetch threshold */
+#define  BGMAC_DMA_RX_PT_SHIFT			24
+#define   BGMAC_DMA_RX_PT_1			0
+#define   BGMAC_DMA_RX_PT_2			1
+#define   BGMAC_DMA_RX_PT_4			2
+#define   BGMAC_DMA_RX_PT_8			3
 #define BGMAC_DMA_RX_INDEX			0x24
 #define BGMAC_DMA_RX_RINGLO			0x28
 #define BGMAC_DMA_RX_RINGHI			0x2C
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 4d043c3..0b3bb16 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -418,7 +418,14 @@
 	bcma_write16(cc, offset, (bcma_read16(cc, offset) & mask) | set);
 }
 
-extern struct bcma_device *bcma_find_core(struct bcma_bus *bus, u16 coreid);
+extern struct bcma_device *bcma_find_core_unit(struct bcma_bus *bus, u16 coreid,
+					       u8 unit);
+static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus,
+						 u16 coreid)
+{
+	return bcma_find_core_unit(bus, coreid, 0);
+}
+
 extern bool bcma_core_is_enabled(struct bcma_device *core);
 extern void bcma_core_disable(struct bcma_device *core, u32 flags);
 extern int bcma_core_enable(struct bcma_device *core, u32 flags);