drm/radeon/kms: cut down indirects in register accesses.

We really don't want to be doing all these indirects, updating
the GPU gart table is something we do often so the less overhead the
better.

Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 3461127..87170a5 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -667,14 +667,11 @@
 	resource_size_t			rmmio_base;
 	resource_size_t			rmmio_size;
 	void				*rmmio;
-	radeon_rreg_t			mm_rreg;
-	radeon_wreg_t			mm_wreg;
 	radeon_rreg_t			mc_rreg;
 	radeon_wreg_t			mc_wreg;
 	radeon_rreg_t			pll_rreg;
 	radeon_wreg_t			pll_wreg;
-	radeon_rreg_t			pcie_rreg;
-	radeon_wreg_t			pcie_wreg;
+	uint32_t                        pcie_reg_mask;
 	radeon_rreg_t			pciep_rreg;
 	radeon_wreg_t			pciep_wreg;
 	struct radeon_clock             clock;
@@ -706,22 +703,42 @@
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
+static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	if (reg < 0x10000)
+		return readl(((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	if (reg < 0x10000)
+		writel(v, ((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
 
 /*
  * Registers read & write functions.
  */
 #define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg))
 #define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg))
-#define RREG32(reg) rdev->mm_rreg(rdev, (reg))
-#define WREG32(reg, v) rdev->mm_wreg(rdev, (reg), (v))
+#define RREG32(reg) r100_mm_rreg(rdev, (reg))
+#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
 #define WREG32_PLL(reg, v) rdev->pll_wreg(rdev, (reg), (v))
 #define RREG32_MC(reg) rdev->mc_rreg(rdev, (reg))
 #define WREG32_MC(reg, v) rdev->mc_wreg(rdev, (reg), (v))
-#define RREG32_PCIE(reg) rdev->pcie_rreg(rdev, (reg))
-#define WREG32_PCIE(reg, v) rdev->pcie_wreg(rdev, (reg), (v))
+#define RREG32_PCIE(reg) rv370_pcie_rreg(rdev, (reg))
+#define WREG32_PCIE(reg, v) rv370_pcie_wreg(rdev, (reg), (v))
 #define WREG32_P(reg, val, mask)				\
 	do {							\
 		uint32_t tmp_ = RREG32(reg);			\
@@ -737,6 +754,24 @@
 		WREG32_PLL(reg, tmp_);				\
 	} while (0)
 
+/*
+ * Indirect registers accessor
+ */
+static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+	r = RREG32(RADEON_PCIE_DATA);
+	return r;
+}
+
+static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+	WREG32(RADEON_PCIE_DATA, (v));
+}
+
 void r100_pll_errata_after_index(struct radeon_device *rdev);