drm/radeon: fixup tiling group size and backendmap on r6xx-r9xx (v4)

Tiling group size is always 256bits on r6xx/r7xx/r8xx/9xx. Also fix and
simplify render backend map. This now properly sets up the backend map
on r6xx-9xx which should improve 3D performance.

Vadim benchmarked also:
Some benchmarks on juniper (5750), fullscreen 1920x1080,
first result - kernel 3.4.0+ (fb21affa), second - with these patches:

Lightsmark:   91 fps => 123 fps    +35%
Doom3:        74 fps => 101 fps    +36%

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 2aa7046..a0b9806 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -41,6 +41,9 @@
 #define CAYMAN_MAX_TCC               16
 #define CAYMAN_MAX_TCC_MASK          0xFF
 
+#define CAYMAN_GB_ADDR_CONFIG_GOLDEN       0x02011003
+#define ARUBA_GB_ADDR_CONFIG_GOLDEN        0x12010001
+
 #define DMIF_ADDR_CONFIG  				0xBD4
 #define	SRBM_GFX_CNTL				        0x0E44
 #define		RINGID(x)					(((x) & 0x3) << 0)
@@ -148,6 +151,8 @@
 #define	CGTS_SYS_TCC_DISABLE				0x3F90
 #define	CGTS_USER_SYS_TCC_DISABLE			0x3F94
 
+#define RLC_GFX_INDEX           			0x3FC4
+
 #define	CONFIG_MEMSIZE					0x5428
 
 #define HDP_MEM_COHERENCY_FLUSH_CNTL			0x5480
@@ -212,6 +217,12 @@
 #define		SOFT_RESET_VGT					(1 << 14)
 #define		SOFT_RESET_IA					(1 << 15)
 
+#define GRBM_GFX_INDEX          			0x802C
+#define		INSTANCE_INDEX(x)			((x) << 0)
+#define		SE_INDEX(x)     			((x) << 16)
+#define		INSTANCE_BROADCAST_WRITES      		(1 << 30)
+#define		SE_BROADCAST_WRITES      		(1 << 31)
+
 #define	SCRATCH_REG0					0x8500
 #define	SCRATCH_REG1					0x8504
 #define	SCRATCH_REG2					0x8508