drm/radeon/kms: add r600 KMS support

This adds the r600 KMS + CS support to the Linux kernel.

The r600 TTM support is quite basic and still needs more
work esp around using interrupts, but the polled fencing
should work okay for now.

Also currently TTM is using memcpy to do VRAM moves,
the code is here to use a 3D blit to do this, but
isn't fully debugged yet.

Authors:
Alex Deucher <alexdeucher@gmail.com>
Dave Airlie <airlied@redhat.com>
Jerome Glisse <jglisse@redhat.com>

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index c5db0c4..14c3fe6 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -46,8 +46,9 @@
 	radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
 	radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
-	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \
-	radeon_test.o r200.o radeon_legacy_tv.o
+	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
+	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
+	r600_blit_kms.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 8e31e99..a7edd0f 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -389,6 +389,7 @@
 					pll_flags |= RADEON_PLL_USE_REF_DIV;
 			}
 			radeon_encoder = to_radeon_encoder(encoder);
+			break;
 		}
 	}
 
diff --git a/drivers/gpu/drm/radeon/avivod.h b/drivers/gpu/drm/radeon/avivod.h
new file mode 100644
index 0000000..d4e6e6e
--- /dev/null
+++ b/drivers/gpu/drm/radeon/avivod.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef AVIVOD_H
+#define AVIVOD_H
+
+
+#define	D1CRTC_CONTROL					0x6080
+#define		CRTC_EN						(1 << 0)
+#define	D1CRTC_UPDATE_LOCK				0x60E8
+#define	D1GRPH_PRIMARY_SURFACE_ADDRESS			0x6110
+#define	D1GRPH_SECONDARY_SURFACE_ADDRESS		0x6118
+
+#define	D2CRTC_CONTROL					0x6880
+#define	D2CRTC_UPDATE_LOCK				0x68E8
+#define	D2GRPH_PRIMARY_SURFACE_ADDRESS			0x6910
+#define	D2GRPH_SECONDARY_SURFACE_ADDRESS		0x6918
+
+#define	D1VGA_CONTROL					0x0330
+#define		DVGA_CONTROL_MODE_ENABLE			(1 << 0)
+#define		DVGA_CONTROL_TIMING_SELECT			(1 << 8)
+#define		DVGA_CONTROL_SYNC_POLARITY_SELECT		(1 << 9)
+#define		DVGA_CONTROL_OVERSCAN_TIMING_SELECT		(1 << 10)
+#define		DVGA_CONTROL_OVERSCAN_COLOR_EN			(1 << 16)
+#define		DVGA_CONTROL_ROTATE				(1 << 24)
+#define D2VGA_CONTROL					0x0338
+
+#define	VGA_HDP_CONTROL					0x328
+#define		VGA_MEM_PAGE_SELECT_EN				(1 << 0)
+#define		VGA_MEMORY_DISABLE				(1 << 4)
+#define		VGA_RBBM_LOCK_DISABLE				(1 << 8)
+#define		VGA_SOFT_RESET					(1 << 16)
+#define	VGA_MEMORY_BASE_ADDRESS				0x0310
+#define	VGA_RENDER_CONTROL				0x0300
+#define		VGA_VSTATUS_CNTL_MASK				0x00030000
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index ee3ab62..5708c07 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -31,6 +31,8 @@
 #include "radeon_drm.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "r100d.h"
+
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
 
@@ -391,9 +393,9 @@
 			return r;
 		}
 	}
-	WREG32(0x774, rdev->wb.gpu_addr);
-	WREG32(0x70C, rdev->wb.gpu_addr + 1024);
-	WREG32(0x770, 0xff);
+	WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr);
+	WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024);
+	WREG32(RADEON_SCRATCH_UMSK, 0xff);
 	return 0;
 }
 
@@ -559,18 +561,18 @@
 		fw_name = FIRMWARE_R520;
 	}
 
-	err = request_firmware(&rdev->fw, fw_name, &pdev->dev);
+	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
 	platform_device_unregister(pdev);
 	if (err) {
 		printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
 		       fw_name);
-	} else if (rdev->fw->size % 8) {
+	} else if (rdev->me_fw->size % 8) {
 		printk(KERN_ERR
 		       "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
-		       rdev->fw->size, fw_name);
+		       rdev->me_fw->size, fw_name);
 		err = -EINVAL;
-		release_firmware(rdev->fw);
-		rdev->fw = NULL;
+		release_firmware(rdev->me_fw);
+		rdev->me_fw = NULL;
 	}
 	return err;
 }
@@ -584,9 +586,9 @@
 		       "programming pipes. Bad things might happen.\n");
 	}
 
-	if (rdev->fw) {
-		size = rdev->fw->size / 4;
-		fw_data = (const __be32 *)&rdev->fw->data[0];
+	if (rdev->me_fw) {
+		size = rdev->me_fw->size / 4;
+		fw_data = (const __be32 *)&rdev->me_fw->data[0];
 		WREG32(RADEON_CP_ME_RAM_ADDR, 0);
 		for (i = 0; i < size; i += 2) {
 			WREG32(RADEON_CP_ME_RAM_DATAH,
@@ -632,7 +634,7 @@
 		DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
 	}
 
-	if (!rdev->fw) {
+	if (!rdev->me_fw) {
 		r = r100_cp_init_microcode(rdev);
 		if (r) {
 			DRM_ERROR("Failed to load firmware!\n");
@@ -765,6 +767,12 @@
 	return -1;
 }
 
+void r100_cp_commit(struct radeon_device *rdev)
+{
+	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
+	(void)RREG32(RADEON_CP_RB_WPTR);
+}
+
 
 /*
  * CS functions
@@ -2954,3 +2962,106 @@
 			}
 	}
 }
+
+int r100_ring_test(struct radeon_device *rdev)
+{
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, 2);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		radeon_scratch_free(rdev, scratch);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET0(scratch, 0));
+	radeon_ring_write(rdev, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF) {
+			break;
+		}
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test succeeded in %d usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	return r;
+}
+
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
+	radeon_ring_write(rdev, ib->gpu_addr);
+	radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r100_ib_test(struct radeon_device *rdev)
+{
+	struct radeon_ib *ib;
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ib_get(rdev, &ib);
+	if (r) {
+		return r;
+	}
+	ib->ptr[0] = PACKET0(scratch, 0);
+	ib->ptr[1] = 0xDEADBEEF;
+	ib->ptr[2] = PACKET2(0);
+	ib->ptr[3] = PACKET2(0);
+	ib->ptr[4] = PACKET2(0);
+	ib->ptr[5] = PACKET2(0);
+	ib->ptr[6] = PACKET2(0);
+	ib->ptr[7] = PACKET2(0);
+	ib->length_dw = 8;
+	r = radeon_ib_schedule(rdev, ib);
+	if (r) {
+		radeon_scratch_free(rdev, scratch);
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}
+	r = radeon_fence_wait(ib->fence, false);
+	if (r) {
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF) {
+			break;
+		}
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test succeeded in %u usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	radeon_ib_free(rdev, &ib);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h
new file mode 100644
index 0000000..6da7d92
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r100d.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R100D_H__
+#define __R100D_H__
+
+#define CP_PACKET0			0x00000000
+#define		PACKET0_BASE_INDEX_SHIFT	0
+#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
+#define		PACKET0_COUNT_SHIFT		16
+#define		PACKET0_COUNT_MASK		(0x3fff << 16)
+#define CP_PACKET1			0x40000000
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+#define CP_PACKET3			0xC0000000
+#define		PACKET3_IT_OPCODE_SHIFT		8
+#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
+#define		PACKET3_COUNT_SHIFT		16
+#define		PACKET3_COUNT_MASK		(0x3fff << 16)
+/* PACKET3 op code */
+#define		PACKET3_NOP			0x10
+#define		PACKET3_3D_DRAW_VBUF		0x28
+#define		PACKET3_3D_DRAW_IMMD		0x29
+#define		PACKET3_3D_DRAW_INDX		0x2A
+#define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_INDX_BUFFER		0x33
+#define		PACKET3_3D_DRAW_VBUF_2		0x34
+#define		PACKET3_3D_DRAW_IMMD_2		0x35
+#define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_BITBLT_MULTI		0x9B
+
+#define PACKET0(reg, n)	(CP_PACKET0 |					\
+			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
+			 REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n)	(CP_PACKET3 |					\
+			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
+			 REG_SET(PACKET3_COUNT, (n)))
+
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 33a2c55..a5f82f7 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -33,6 +33,7 @@
 #include "radeon_drm.h"
 #include "radeon_share.h"
 #include "r100_track.h"
+#include "r300d.h"
 
 #include "r300_reg_safe.h"
 
@@ -127,7 +128,7 @@
 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
 	rv370_pcie_gart_tlb_flush(rdev);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
-		 rdev->mc.gtt_size >> 20, table_addr);
+		 (unsigned)(rdev->mc.gtt_size >> 20), table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r300.h b/drivers/gpu/drm/radeon/r300.h
deleted file mode 100644
index 8486b4d..0000000
--- a/drivers/gpu/drm/radeon/r300.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2008 Advanced Micro Devices, Inc.
- * Copyright 2008 Red Hat Inc.
- * Copyright 2009 Jerome Glisse.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie
- *          Alex Deucher
- *          Jerome Glisse
- */
-#ifndef R300_H
-#define R300_H
-
-struct r300_asic {
-	const unsigned	*reg_safe_bm;
-	unsigned	reg_safe_bm_size;
-};
-
-#endif
diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h
new file mode 100644
index 0000000..63ec076
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r300d.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R300D_H__
+#define __R300D_H__
+
+#define CP_PACKET0			0x00000000
+#define		PACKET0_BASE_INDEX_SHIFT	0
+#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
+#define		PACKET0_COUNT_SHIFT		16
+#define		PACKET0_COUNT_MASK		(0x3fff << 16)
+#define CP_PACKET1			0x40000000
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+#define CP_PACKET3			0xC0000000
+#define		PACKET3_IT_OPCODE_SHIFT		8
+#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
+#define		PACKET3_COUNT_SHIFT		16
+#define		PACKET3_COUNT_MASK		(0x3fff << 16)
+/* PACKET3 op code */
+#define		PACKET3_NOP			0x10
+#define		PACKET3_3D_DRAW_VBUF		0x28
+#define		PACKET3_3D_DRAW_IMMD		0x29
+#define		PACKET3_3D_DRAW_INDX		0x2A
+#define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_INDX_BUFFER		0x33
+#define		PACKET3_3D_DRAW_VBUF_2		0x34
+#define		PACKET3_3D_DRAW_IMMD_2		0x35
+#define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_BITBLT_MULTI		0x9B
+
+#define PACKET0(reg, n)	(CP_PACKET0 |					\
+			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
+			 REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n)	(CP_PACKET3 |					\
+			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
+			 REG_SET(PACKET3_COUNT, (n)))
+
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 538cd90..d8fcef4 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -25,12 +25,46 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/seq_file.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
 #include "drmP.h"
-#include "radeon_reg.h"
+#include "radeon_drm.h"
 #include "radeon.h"
+#include "radeon_mode.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+#include "atom.h"
 
-/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
+#define PFP_UCODE_SIZE 576
+#define PM4_UCODE_SIZE 1792
+#define R700_PFP_UCODE_SIZE 848
+#define R700_PM4_UCODE_SIZE 1360
+
+/* Firmware Names */
+MODULE_FIRMWARE("radeon/R600_pfp.bin");
+MODULE_FIRMWARE("radeon/R600_me.bin");
+MODULE_FIRMWARE("radeon/RV610_pfp.bin");
+MODULE_FIRMWARE("radeon/RV610_me.bin");
+MODULE_FIRMWARE("radeon/RV630_pfp.bin");
+MODULE_FIRMWARE("radeon/RV630_me.bin");
+MODULE_FIRMWARE("radeon/RV620_pfp.bin");
+MODULE_FIRMWARE("radeon/RV620_me.bin");
+MODULE_FIRMWARE("radeon/RV635_pfp.bin");
+MODULE_FIRMWARE("radeon/RV635_me.bin");
+MODULE_FIRMWARE("radeon/RV670_pfp.bin");
+MODULE_FIRMWARE("radeon/RV670_me.bin");
+MODULE_FIRMWARE("radeon/RS780_pfp.bin");
+MODULE_FIRMWARE("radeon/RS780_me.bin");
+MODULE_FIRMWARE("radeon/RV770_pfp.bin");
+MODULE_FIRMWARE("radeon/RV770_me.bin");
+MODULE_FIRMWARE("radeon/RV730_pfp.bin");
+MODULE_FIRMWARE("radeon/RV730_me.bin");
+MODULE_FIRMWARE("radeon/RV710_pfp.bin");
+MODULE_FIRMWARE("radeon/RV710_me.bin");
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev);
 
 /* This files gather functions specifics to:
  * r600,rv610,rv630,rv620,rv635,rv670
@@ -39,87 +73,270 @@
  */
 int r600_mc_wait_for_idle(struct radeon_device *rdev);
 void r600_gpu_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
 
 
 /*
- * MC
+ * R600 PCIE GART
  */
-int r600_mc_init(struct radeon_device *rdev)
+int r600_gart_clear_page(struct radeon_device *rdev, int i)
 {
-	uint32_t tmp;
+	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
+	u64 pte;
 
-	r600_gpu_init(rdev);
-
-	/* setup the gart before changing location so we can ask to
-	 * discard unmapped mc request
-	 */
-	/* FIXME: disable out of gart access */
-	tmp = rdev->mc.gtt_location / 4096;
-	tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
-	tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
-	tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
-
-	rs600_mc_disable_clients(rdev);
-	if (r600_mc_wait_for_idle(rdev)) {
-		printk(KERN_WARNING "Failed to wait MC idle while "
-		       "programming pipes. Bad things might happen.\n");
-	}
-
-	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-	tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24);
-	tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24);
-	WREG32(R600_MC_VM_FB_LOCATION, tmp);
-	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-	tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22);
-	WREG32(R600_MC_VM_AGP_TOP, tmp);
-	tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
-	WREG32(R600_MC_VM_AGP_BOT, tmp);
+	if (i < 0 || i > rdev->gart.num_gpu_pages)
+		return -EINVAL;
+	pte = 0;
+	writeq(pte, ((void __iomem *)ptr) + (i * 8));
 	return 0;
 }
 
-void r600_mc_fini(struct radeon_device *rdev)
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
+	unsigned i;
+	u32 tmp;
+
+	WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+	WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE);
+		tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT;
+		if (tmp == 2) {
+			printk(KERN_WARNING "[drm] r600 flush TLB failed\n");
+			return;
+		}
+		if (tmp) {
+			return;
+		}
+		udelay(1);
+	}
 }
 
-
-/*
- * Global GPU functions
- */
-void r600_errata(struct radeon_device *rdev)
+int r600_pcie_gart_enable(struct radeon_device *rdev)
 {
-	rdev->pll_errata = 0;
+	u32 tmp;
+	int r, i;
+
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+	r = radeon_gart_table_vram_alloc(rdev);
+	if (r) {
+		return r;
+	}
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+		ENABLE_WAIT_L2_QUERY;
+	WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING);
+	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+			(u32)(rdev->dummy_page.addr >> 12));
+	for (i = 1; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	r600_pcie_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void r600_pcie_gart_disable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int i;
+
+	/* Clear ptes*/
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	r600_pcie_gart_tlb_flush(rdev);
+	/* Disable all tables */
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	/* Disable L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+	/* Setup L1 TLB control */
+	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+		ENABLE_WAIT_L2_QUERY;
+	WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
 }
 
 int r600_mc_wait_for_idle(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
-	return 0;
+	unsigned i;
+	u32 tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00;
+		if (!tmp)
+			return 0;
+		udelay(1);
+	}
+	return -1;
 }
 
-void r600_gpu_init(struct radeon_device *rdev)
+static void r600_mc_resume(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
+	u32 d1vga_control, d2vga_control;
+	u32 vga_render_control, vga_hdp_control;
+	u32 d1crtc_control, d2crtc_control;
+	u32 new_d1grph_primary, new_d1grph_secondary;
+	u32 new_d2grph_primary, new_d2grph_secondary;
+	u64 old_vram_start;
+	u32 tmp;
+	int i, j;
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+	d1vga_control = RREG32(D1VGA_CONTROL);
+	d2vga_control = RREG32(D2VGA_CONTROL);
+	vga_render_control = RREG32(VGA_RENDER_CONTROL);
+	vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+	d1crtc_control = RREG32(D1CRTC_CONTROL);
+	d2crtc_control = RREG32(D2CRTC_CONTROL);
+	old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+	new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d1grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d1grph_secondary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d2grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_secondary += rdev->mc.vram_start - old_vram_start;
+
+	/* Stop all video */
+	WREG32(D1VGA_CONTROL, 0);
+	WREG32(D2VGA_CONTROL, 0);
+	WREG32(VGA_RENDER_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, 0);
+	WREG32(D2CRTC_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
+	}
+
+	/* Lockout access through VGA aperture*/
+	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+
+	/* Update configuration */
+	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+	tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16;
+	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+	WREG32(MC_VM_FB_LOCATION, tmp);
+	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+	WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+	if (rdev->flags & RADEON_IS_AGP) {
+		WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16);
+		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+	} else {
+		WREG32(MC_VM_AGP_BASE, 0);
+		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	}
+	WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary);
+	WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary);
+	WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary);
+	WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary);
+	WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
+
+	/* Unlock host access */
+	WREG32(VGA_HDP_CONTROL, vga_hdp_control);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
+	}
+
+	/* Restore video state */
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, d1crtc_control);
+	WREG32(D2CRTC_CONTROL, d2crtc_control);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+	WREG32(D1VGA_CONTROL, d1vga_control);
+	WREG32(D2VGA_CONTROL, d2vga_control);
+	WREG32(VGA_RENDER_CONTROL, vga_render_control);
 }
 
-
-/*
- * VRAM info
- */
-void r600_vram_get_type(struct radeon_device *rdev)
+int r600_mc_init(struct radeon_device *rdev)
 {
-	uint32_t tmp;
+	fixed20_12 a;
+	u32 tmp;
 	int chansize;
+	int r;
 
+	/* Get VRAM informations */
 	rdev->mc.vram_width = 128;
 	rdev->mc.vram_is_ddr = true;
-
-	tmp = RREG32(R600_RAMCFG);
-	if (tmp & R600_CHANSIZE_OVERRIDE) {
+	tmp = RREG32(RAMCFG);
+	if (tmp & CHANSIZE_OVERRIDE) {
 		chansize = 16;
-	} else if (tmp & R600_CHANSIZE) {
+	} else if (tmp & CHANSIZE_MASK) {
 		chansize = 64;
 	} else {
 		chansize = 32;
@@ -135,36 +352,1391 @@
 			(rdev->family == CHIP_RV635)) {
 		rdev->mc.vram_width = 2 * chansize;
 	}
-}
-
-void r600_vram_info(struct radeon_device *rdev)
-{
-	r600_vram_get_type(rdev);
-	rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
-	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
-
 	/* Could aper size report 0 ? */
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* Setup GPU memory space */
+	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r)
+			return r;
+		/* gtt_size is setup by radeon_agp_init */
+		rdev->mc.gtt_location = rdev->mc.agp_base;
+		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+		/* Try to put vram before or after AGP because we
+		 * we want SYSTEM_APERTURE to cover both VRAM and
+		 * AGP so that GPU can catch out of VRAM/AGP access
+		 */
+		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+			/* Enought place before */
+			rdev->mc.vram_location = rdev->mc.gtt_location -
+							rdev->mc.mc_vram_size;
+		} else if (tmp > rdev->mc.mc_vram_size) {
+			/* Enought place after */
+			rdev->mc.vram_location = rdev->mc.gtt_location +
+							rdev->mc.gtt_size;
+		} else {
+			/* Try to setup VRAM then AGP might not
+			 * not work on some card
+			 */
+			rdev->mc.vram_location = 0x00000000UL;
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		}
+	} else {
+		if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) {
+			rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) &
+								0xFFFF) << 24;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+			tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
+				/* Enough place after vram */
+				rdev->mc.gtt_location = tmp;
+			} else if (rdev->mc.vram_location >= rdev->mc.gtt_size) {
+				/* Enough place before vram */
+				rdev->mc.gtt_location = 0;
+			} else {
+				/* Not enough place after or before shrink
+				 * gart size
+				 */
+				if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) {
+					rdev->mc.gtt_location = 0;
+					rdev->mc.gtt_size = rdev->mc.vram_location;
+				} else {
+					rdev->mc.gtt_location = tmp;
+					rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp;
+				}
+			}
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		} else {
+			rdev->mc.vram_location = 0x00000000UL;
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+		}
+	}
+	rdev->mc.vram_start = rdev->mc.vram_location;
+	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+	rdev->mc.gtt_start = rdev->mc.gtt_location;
+	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size;
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	return 0;
 }
 
+/* We doesn't check that the GPU really needs a reset we simply do the
+ * reset, it's up to the caller to determine if the GPU needs one. We
+ * might add an helper function to check that.
+ */
+int r600_gpu_soft_reset(struct radeon_device *rdev)
+{
+	u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) |
+				S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) |
+				S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) |
+				S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) |
+				S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) |
+				S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) |
+				S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) |
+				S_008010_GUI_ACTIVE(1);
+	u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) |
+			S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) |
+			S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) |
+			S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) |
+			S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) |
+			S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) |
+			S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) |
+			S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1);
+	u32 srbm_reset = 0;
+
+	/* Disable CP parsing/prefetching */
+	WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff));
+	/* Check if any of the rendering block is busy and reset it */
+	if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) ||
+	    (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) {
+		WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CR(1) |
+			S_008020_SOFT_RESET_DB(1) |
+			S_008020_SOFT_RESET_CB(1) |
+			S_008020_SOFT_RESET_PA(1) |
+			S_008020_SOFT_RESET_SC(1) |
+			S_008020_SOFT_RESET_SMX(1) |
+			S_008020_SOFT_RESET_SPI(1) |
+			S_008020_SOFT_RESET_SX(1) |
+			S_008020_SOFT_RESET_SH(1) |
+			S_008020_SOFT_RESET_TC(1) |
+			S_008020_SOFT_RESET_TA(1) |
+			S_008020_SOFT_RESET_VC(1) |
+			S_008020_SOFT_RESET_VGT(1));
+		(void)RREG32(R_008020_GRBM_SOFT_RESET);
+		udelay(50);
+		WREG32(R_008020_GRBM_SOFT_RESET, 0);
+		(void)RREG32(R_008020_GRBM_SOFT_RESET);
+	}
+	/* Reset CP (we always reset CP) */
+	WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CP(1));
+	(void)RREG32(R_008020_GRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(R_008020_GRBM_SOFT_RESET, 0);
+	(void)RREG32(R_008020_GRBM_SOFT_RESET);
+	/* Reset others GPU block if necessary */
+	if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+	if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_GRBM(1);
+	if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_IH(1);
+	if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_VMC(1);
+	if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+	if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+	if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+		srbm_reset |= S_000E60_SOFT_RESET_SEM(1);
+	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
+	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
+	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
+	/* Wait a little for things to settle down */
+	udelay(50);
+	return 0;
+}
+
+int r600_gpu_reset(struct radeon_device *rdev)
+{
+	return r600_gpu_soft_reset(rdev);
+}
+
+static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+					     u32 num_backends,
+					     u32 backend_disable_mask)
+{
+	u32 backend_map = 0;
+	u32 enabled_backends_mask;
+	u32 enabled_backends_count;
+	u32 cur_pipe;
+	u32 swizzle_pipe[R6XX_MAX_PIPES];
+	u32 cur_backend;
+	u32 i;
+
+	if (num_tile_pipes > R6XX_MAX_PIPES)
+		num_tile_pipes = R6XX_MAX_PIPES;
+	if (num_tile_pipes < 1)
+		num_tile_pipes = 1;
+	if (num_backends > R6XX_MAX_BACKENDS)
+		num_backends = R6XX_MAX_BACKENDS;
+	if (num_backends < 1)
+		num_backends = 1;
+
+	enabled_backends_mask = 0;
+	enabled_backends_count = 0;
+	for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
+		if (((backend_disable_mask >> i) & 1) == 0) {
+			enabled_backends_mask |= (1 << i);
+			++enabled_backends_count;
+		}
+		if (enabled_backends_count == num_backends)
+			break;
+	}
+
+	if (enabled_backends_count == 0) {
+		enabled_backends_mask = 1;
+		enabled_backends_count = 1;
+	}
+
+	if (enabled_backends_count != num_backends)
+		num_backends = enabled_backends_count;
+
+	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
+	switch (num_tile_pipes) {
+	case 1:
+		swizzle_pipe[0] = 0;
+		break;
+	case 2:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		break;
+	case 3:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		swizzle_pipe[2] = 2;
+		break;
+	case 4:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		swizzle_pipe[2] = 2;
+		swizzle_pipe[3] = 3;
+		break;
+	case 5:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		swizzle_pipe[2] = 2;
+		swizzle_pipe[3] = 3;
+		swizzle_pipe[4] = 4;
+		break;
+	case 6:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 5;
+		swizzle_pipe[4] = 1;
+		swizzle_pipe[5] = 3;
+		break;
+	case 7:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 1;
+		swizzle_pipe[5] = 3;
+		swizzle_pipe[6] = 5;
+		break;
+	case 8:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 1;
+		swizzle_pipe[5] = 3;
+		swizzle_pipe[6] = 5;
+		swizzle_pipe[7] = 7;
+		break;
+	}
+
+	cur_backend = 0;
+	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+		while (((1 << cur_backend) & enabled_backends_mask) == 0)
+			cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+
+		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
+
+		cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+	}
+
+	return backend_map;
+}
+
+int r600_count_pipe_bits(uint32_t val)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < 32; i++) {
+		ret += val & 1;
+		val >>= 1;
+	}
+	return ret;
+}
+
+void r600_gpu_init(struct radeon_device *rdev)
+{
+	u32 tiling_config;
+	u32 ramcfg;
+	u32 tmp;
+	int i, j;
+	u32 sq_config;
+	u32 sq_gpr_resource_mgmt_1 = 0;
+	u32 sq_gpr_resource_mgmt_2 = 0;
+	u32 sq_thread_resource_mgmt = 0;
+	u32 sq_stack_resource_mgmt_1 = 0;
+	u32 sq_stack_resource_mgmt_2 = 0;
+
+	/* FIXME: implement */
+	switch (rdev->family) {
+	case CHIP_R600:
+		rdev->config.r600.max_pipes = 4;
+		rdev->config.r600.max_tile_pipes = 8;
+		rdev->config.r600.max_simds = 4;
+		rdev->config.r600.max_backends = 4;
+		rdev->config.r600.max_gprs = 256;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 256;
+		rdev->config.r600.max_hw_contexts = 8;
+		rdev->config.r600.max_gs_threads = 16;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 2;
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		rdev->config.r600.max_pipes = 2;
+		rdev->config.r600.max_tile_pipes = 2;
+		rdev->config.r600.max_simds = 3;
+		rdev->config.r600.max_backends = 1;
+		rdev->config.r600.max_gprs = 128;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 128;
+		rdev->config.r600.max_hw_contexts = 8;
+		rdev->config.r600.max_gs_threads = 4;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 2;
+		break;
+	case CHIP_RV610:
+	case CHIP_RV620:
+	case CHIP_RS780:
+	case CHIP_RS880:
+		rdev->config.r600.max_pipes = 1;
+		rdev->config.r600.max_tile_pipes = 1;
+		rdev->config.r600.max_simds = 2;
+		rdev->config.r600.max_backends = 1;
+		rdev->config.r600.max_gprs = 128;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 128;
+		rdev->config.r600.max_hw_contexts = 4;
+		rdev->config.r600.max_gs_threads = 4;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 1;
+		break;
+	case CHIP_RV670:
+		rdev->config.r600.max_pipes = 4;
+		rdev->config.r600.max_tile_pipes = 4;
+		rdev->config.r600.max_simds = 4;
+		rdev->config.r600.max_backends = 4;
+		rdev->config.r600.max_gprs = 192;
+		rdev->config.r600.max_threads = 192;
+		rdev->config.r600.max_stack_entries = 256;
+		rdev->config.r600.max_hw_contexts = 8;
+		rdev->config.r600.max_gs_threads = 16;
+		rdev->config.r600.sx_max_export_size = 128;
+		rdev->config.r600.sx_max_export_pos_size = 16;
+		rdev->config.r600.sx_max_export_smx_size = 128;
+		rdev->config.r600.sq_num_cf_insts = 2;
+		break;
+	default:
+		break;
+	}
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+
+	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+	/* Setup tiling */
+	tiling_config = 0;
+	ramcfg = RREG32(RAMCFG);
+	switch (rdev->config.r600.max_tile_pipes) {
+	case 1:
+		tiling_config |= PIPE_TILING(0);
+		break;
+	case 2:
+		tiling_config |= PIPE_TILING(1);
+		break;
+	case 4:
+		tiling_config |= PIPE_TILING(2);
+		break;
+	case 8:
+		tiling_config |= PIPE_TILING(3);
+		break;
+	default:
+		break;
+	}
+	tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
+	tiling_config |= GROUP_SIZE(0);
+	tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
+	if (tmp > 3) {
+		tiling_config |= ROW_TILING(3);
+		tiling_config |= SAMPLE_SPLIT(3);
+	} else {
+		tiling_config |= ROW_TILING(tmp);
+		tiling_config |= SAMPLE_SPLIT(tmp);
+	}
+	tiling_config |= BANK_SWAPS(1);
+	tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes,
+						rdev->config.r600.max_backends,
+						(0xff << rdev->config.r600.max_backends) & 0xff);
+	tiling_config |= BACKEND_MAP(tmp);
+	WREG32(GB_TILING_CONFIG, tiling_config);
+	WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
+	WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
+
+	tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK);
+	WREG32(CC_RB_BACKEND_DISABLE, tmp);
+
+	/* Setup pipes */
+	tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK);
+	tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK);
+	WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp);
+	WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp);
+
+	tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK);
+	WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
+	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK);
+
+	/* Setup some CP states */
+	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b)));
+	WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40)));
+
+	WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT |
+			     SYNC_WALKER | SYNC_ALIGNER));
+	/* Setup various GPU states */
+	if (rdev->family == CHIP_RV670)
+		WREG32(ARB_GDEC_RD_CNTL, 0x00000021);
+
+	tmp = RREG32(SX_DEBUG_1);
+	tmp |= SMX_EVENT_RELEASE;
+	if ((rdev->family > CHIP_R600))
+		tmp |= ENABLE_NEW_SMX_ADDRESS;
+	WREG32(SX_DEBUG_1, tmp);
+
+	if (((rdev->family) == CHIP_R600) ||
+	    ((rdev->family) == CHIP_RV630) ||
+	    ((rdev->family) == CHIP_RV610) ||
+	    ((rdev->family) == CHIP_RV620) ||
+	    ((rdev->family) == CHIP_RS780)) {
+		WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE);
+	} else {
+		WREG32(DB_DEBUG, 0);
+	}
+	WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) |
+			       DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4)));
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+	WREG32(VGT_NUM_INSTANCES, 0);
+
+	WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
+	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0));
+
+	tmp = RREG32(SQ_MS_FIFO_SIZES);
+	if (((rdev->family) == CHIP_RV610) ||
+	    ((rdev->family) == CHIP_RV620) ||
+	    ((rdev->family) == CHIP_RS780)) {
+		tmp = (CACHE_FIFO_SIZE(0xa) |
+		       FETCH_FIFO_HIWATER(0xa) |
+		       DONE_FIFO_HIWATER(0xe0) |
+		       ALU_UPDATE_FIFO_HIWATER(0x8));
+	} else if (((rdev->family) == CHIP_R600) ||
+		   ((rdev->family) == CHIP_RV630)) {
+		tmp &= ~DONE_FIFO_HIWATER(0xff);
+		tmp |= DONE_FIFO_HIWATER(0x4);
+	}
+	WREG32(SQ_MS_FIFO_SIZES, tmp);
+
+	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
+	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
+	 */
+	sq_config = RREG32(SQ_CONFIG);
+	sq_config &= ~(PS_PRIO(3) |
+		       VS_PRIO(3) |
+		       GS_PRIO(3) |
+		       ES_PRIO(3));
+	sq_config |= (DX9_CONSTS |
+		      VC_ENABLE |
+		      PS_PRIO(0) |
+		      VS_PRIO(1) |
+		      GS_PRIO(2) |
+		      ES_PRIO(3));
+
+	if ((rdev->family) == CHIP_R600) {
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) |
+					  NUM_VS_GPRS(124) |
+					  NUM_CLAUSE_TEMP_GPRS(4));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) |
+					  NUM_ES_GPRS(0));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(136) |
+					   NUM_VS_THREADS(48) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(4));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) |
+					    NUM_VS_STACK_ENTRIES(128));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) |
+					    NUM_ES_STACK_ENTRIES(0));
+	} else if (((rdev->family) == CHIP_RV610) ||
+		   ((rdev->family) == CHIP_RV620) ||
+		   ((rdev->family) == CHIP_RS780)) {
+		/* no vertex cache */
+		sq_config &= ~VC_ENABLE;
+
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+					  NUM_VS_GPRS(44) |
+					  NUM_CLAUSE_TEMP_GPRS(2));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+					  NUM_ES_GPRS(17));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+					   NUM_VS_THREADS(78) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(31));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+					    NUM_VS_STACK_ENTRIES(40));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+					    NUM_ES_STACK_ENTRIES(16));
+	} else if (((rdev->family) == CHIP_RV630) ||
+		   ((rdev->family) == CHIP_RV635)) {
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+					  NUM_VS_GPRS(44) |
+					  NUM_CLAUSE_TEMP_GPRS(2));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) |
+					  NUM_ES_GPRS(18));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+					   NUM_VS_THREADS(78) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(31));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+					    NUM_VS_STACK_ENTRIES(40));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+					    NUM_ES_STACK_ENTRIES(16));
+	} else if ((rdev->family) == CHIP_RV670) {
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+					  NUM_VS_GPRS(44) |
+					  NUM_CLAUSE_TEMP_GPRS(2));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+					  NUM_ES_GPRS(17));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+					   NUM_VS_THREADS(78) |
+					   NUM_GS_THREADS(4) |
+					   NUM_ES_THREADS(31));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) |
+					    NUM_VS_STACK_ENTRIES(64));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) |
+					    NUM_ES_STACK_ENTRIES(64));
+	}
+
+	WREG32(SQ_CONFIG, sq_config);
+	WREG32(SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
+	WREG32(SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
+	WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
+	WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
+	WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
+
+	if (((rdev->family) == CHIP_RV610) ||
+	    ((rdev->family) == CHIP_RV620) ||
+	    ((rdev->family) == CHIP_RS780)) {
+		WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY));
+	} else {
+		WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC));
+	}
+
+	/* More default values. 2D/3D driver should adjust as needed */
+	WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) |
+					 S1_X(0x4) | S1_Y(0xc)));
+	WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) |
+					 S1_X(0x2) | S1_Y(0x2) |
+					 S2_X(0xa) | S2_Y(0x6) |
+					 S3_X(0x6) | S3_Y(0xa)));
+	WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) |
+					     S1_X(0x4) | S1_Y(0xc) |
+					     S2_X(0x1) | S2_Y(0x6) |
+					     S3_X(0xa) | S3_Y(0xe)));
+	WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) |
+					     S5_X(0x0) | S5_Y(0x0) |
+					     S6_X(0xb) | S6_Y(0x4) |
+					     S7_X(0x7) | S7_Y(0x8)));
+
+	WREG32(VGT_STRMOUT_EN, 0);
+	tmp = rdev->config.r600.max_pipes * 16;
+	switch (rdev->family) {
+	case CHIP_RV610:
+	case CHIP_RS780:
+	case CHIP_RV620:
+		tmp += 32;
+		break;
+	case CHIP_RV670:
+		tmp += 128;
+		break;
+	default:
+		break;
+	}
+	if (tmp > 256) {
+		tmp = 256;
+	}
+	WREG32(VGT_ES_PER_GS, 128);
+	WREG32(VGT_GS_PER_ES, tmp);
+	WREG32(VGT_GS_PER_VS, 2);
+	WREG32(VGT_GS_VERTEX_REUSE, 16);
+
+	/* more default values. 2D/3D driver should adjust as needed */
+	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+	WREG32(VGT_STRMOUT_EN, 0);
+	WREG32(SX_MISC, 0);
+	WREG32(PA_SC_MODE_CNTL, 0);
+	WREG32(PA_SC_AA_CONFIG, 0);
+	WREG32(PA_SC_LINE_STIPPLE, 0);
+	WREG32(SPI_INPUT_Z, 0);
+	WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
+	WREG32(CB_COLOR7_FRAG, 0);
+
+	/* Clear render buffer base addresses */
+	WREG32(CB_COLOR0_BASE, 0);
+	WREG32(CB_COLOR1_BASE, 0);
+	WREG32(CB_COLOR2_BASE, 0);
+	WREG32(CB_COLOR3_BASE, 0);
+	WREG32(CB_COLOR4_BASE, 0);
+	WREG32(CB_COLOR5_BASE, 0);
+	WREG32(CB_COLOR6_BASE, 0);
+	WREG32(CB_COLOR7_BASE, 0);
+	WREG32(CB_COLOR7_FRAG, 0);
+
+	switch (rdev->family) {
+	case CHIP_RV610:
+	case CHIP_RS780:
+	case CHIP_RV620:
+		tmp = TC_L2_SIZE(8);
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		tmp = TC_L2_SIZE(4);
+		break;
+	case CHIP_R600:
+		tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT;
+		break;
+	default:
+		tmp = TC_L2_SIZE(0);
+		break;
+	}
+	WREG32(TC_CNTL, tmp);
+
+	tmp = RREG32(HDP_HOST_PATH_CNTL);
+	WREG32(HDP_HOST_PATH_CNTL, tmp);
+
+	tmp = RREG32(ARB_POP);
+	tmp |= ENABLE_TC128;
+	WREG32(ARB_POP, tmp);
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+	WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
+			       NUM_CLIP_SEQ(3)));
+	WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095));
+}
+
+
 /*
  * Indirect registers accessor
  */
-uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg)
+u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg)
 {
-	uint32_t r;
+	u32 r;
 
-	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
-	(void)RREG32(R600_PCIE_PORT_INDEX);
-	r = RREG32(R600_PCIE_PORT_DATA);
+	WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+	(void)RREG32(PCIE_PORT_INDEX);
+	r = RREG32(PCIE_PORT_DATA);
 	return r;
 }
 
-void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 {
-	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
-	(void)RREG32(R600_PCIE_PORT_INDEX);
-	WREG32(R600_PCIE_PORT_DATA, (v));
-	(void)RREG32(R600_PCIE_PORT_DATA);
+	WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+	(void)RREG32(PCIE_PORT_INDEX);
+	WREG32(PCIE_PORT_DATA, (v));
+	(void)RREG32(PCIE_PORT_DATA);
+}
+
+
+/*
+ * CP & Ring
+ */
+void r600_cp_stop(struct radeon_device *rdev)
+{
+	WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
+}
+
+int r600_cp_init_microcode(struct radeon_device *rdev)
+{
+	struct platform_device *pdev;
+	const char *chip_name;
+	size_t pfp_req_size, me_req_size;
+	char fw_name[30];
+	int err;
+
+	DRM_DEBUG("\n");
+
+	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
+	err = IS_ERR(pdev);
+	if (err) {
+		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
+		return -EINVAL;
+	}
+
+	switch (rdev->family) {
+	case CHIP_R600: chip_name = "R600"; break;
+	case CHIP_RV610: chip_name = "RV610"; break;
+	case CHIP_RV630: chip_name = "RV630"; break;
+	case CHIP_RV620: chip_name = "RV620"; break;
+	case CHIP_RV635: chip_name = "RV635"; break;
+	case CHIP_RV670: chip_name = "RV670"; break;
+	case CHIP_RS780:
+	case CHIP_RS880: chip_name = "RS780"; break;
+	case CHIP_RV770: chip_name = "RV770"; break;
+	case CHIP_RV730:
+	case CHIP_RV740: chip_name = "RV730"; break;
+	case CHIP_RV710: chip_name = "RV710"; break;
+	default: BUG();
+	}
+
+	if (rdev->family >= CHIP_RV770) {
+		pfp_req_size = R700_PFP_UCODE_SIZE * 4;
+		me_req_size = R700_PM4_UCODE_SIZE * 4;
+	} else {
+		pfp_req_size = PFP_UCODE_SIZE * 4;
+		me_req_size = PM4_UCODE_SIZE * 12;
+	}
+
+	DRM_INFO("Loading %s CP Microcode\n", chip_name);
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->pfp_fw->size != pfp_req_size) {
+		printk(KERN_ERR
+		       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->pfp_fw->size, fw_name);
+		err = -EINVAL;
+		goto out;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->me_fw->size != me_req_size) {
+		printk(KERN_ERR
+		       "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->me_fw->size, fw_name);
+		err = -EINVAL;
+	}
+out:
+	platform_device_unregister(pdev);
+
+	if (err) {
+		if (err != -EINVAL)
+			printk(KERN_ERR
+			       "r600_cp: Failed to load firmware \"%s\"\n",
+			       fw_name);
+		release_firmware(rdev->pfp_fw);
+		rdev->pfp_fw = NULL;
+		release_firmware(rdev->me_fw);
+		rdev->me_fw = NULL;
+	}
+	return err;
+}
+
+static int r600_cp_load_microcode(struct radeon_device *rdev)
+{
+	const __be32 *fw_data;
+	int i;
+
+	if (!rdev->me_fw || !rdev->pfp_fw)
+		return -EINVAL;
+
+	r600_cp_stop(rdev);
+
+	WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
+
+	/* Reset cp */
+	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+	RREG32(GRBM_SOFT_RESET);
+	mdelay(15);
+	WREG32(GRBM_SOFT_RESET, 0);
+
+	WREG32(CP_ME_RAM_WADDR, 0);
+
+	fw_data = (const __be32 *)rdev->me_fw->data;
+	WREG32(CP_ME_RAM_WADDR, 0);
+	for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
+		WREG32(CP_ME_RAM_DATA,
+		       be32_to_cpup(fw_data++));
+
+	fw_data = (const __be32 *)rdev->pfp_fw->data;
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	for (i = 0; i < PFP_UCODE_SIZE; i++)
+		WREG32(CP_PFP_UCODE_DATA,
+		       be32_to_cpup(fw_data++));
+
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	WREG32(CP_ME_RAM_WADDR, 0);
+	WREG32(CP_ME_RAM_RADDR, 0);
+	return 0;
+}
+
+int r600_cp_start(struct radeon_device *rdev)
+{
+	int r;
+	uint32_t cp_me;
+
+	r = radeon_ring_lock(rdev, 7);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
+	radeon_ring_write(rdev, 0x1);
+	if (rdev->family < CHIP_RV770) {
+		radeon_ring_write(rdev, 0x3);
+		radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1);
+	} else {
+		radeon_ring_write(rdev, 0x0);
+		radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1);
+	}
+	radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_unlock_commit(rdev);
+
+	cp_me = 0xff;
+	WREG32(R_0086D8_CP_ME_CNTL, cp_me);
+	return 0;
+}
+
+int r600_cp_resume(struct radeon_device *rdev)
+{
+	u32 tmp;
+	u32 rb_bufsz;
+	int r;
+
+	/* Reset cp */
+	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+	RREG32(GRBM_SOFT_RESET);
+	mdelay(15);
+	WREG32(GRBM_SOFT_RESET, 0);
+
+	/* Set ring buffer size */
+	rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+#ifdef __BIG_ENDIAN
+	WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE |
+		(drm_order(4096/8) << 8) | rb_bufsz);
+#else
+	WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(4096/8) << 8) | rb_bufsz);
+#endif
+	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+
+	/* Set the write pointer delay */
+	WREG32(CP_RB_WPTR_DELAY, 0);
+
+	/* Initialize the ring buffer's read and write pointers */
+	tmp = RREG32(CP_RB_CNTL);
+	WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
+	WREG32(CP_RB_RPTR_WR, 0);
+	WREG32(CP_RB_WPTR, 0);
+	WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF);
+	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr));
+	mdelay(1);
+	WREG32(CP_RB_CNTL, tmp);
+
+	WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8);
+	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
+
+	rdev->cp.rptr = RREG32(CP_RB_RPTR);
+	rdev->cp.wptr = RREG32(CP_RB_WPTR);
+
+	r600_cp_start(rdev);
+	rdev->cp.ready = true;
+	r = radeon_ring_test(rdev);
+	if (r) {
+		rdev->cp.ready = false;
+		return r;
+	}
+	return 0;
+}
+
+void r600_cp_commit(struct radeon_device *rdev)
+{
+	WREG32(CP_RB_WPTR, rdev->cp.wptr);
+	(void)RREG32(CP_RB_WPTR);
+}
+
+void r600_ring_init(struct radeon_device *rdev, unsigned ring_size)
+{
+	u32 rb_bufsz;
+
+	/* Align ring size */
+	rb_bufsz = drm_order(ring_size / 8);
+	ring_size = (1 << (rb_bufsz + 1)) * 4;
+	rdev->cp.ring_size = ring_size;
+	rdev->cp.align_mask = 16 - 1;
+}
+
+
+/*
+ * GPU scratch registers helpers function.
+ */
+void r600_scratch_init(struct radeon_device *rdev)
+{
+	int i;
+
+	rdev->scratch.num_reg = 7;
+	for (i = 0; i < rdev->scratch.num_reg; i++) {
+		rdev->scratch.free[i] = true;
+		rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4);
+	}
+}
+
+int r600_ring_test(struct radeon_device *rdev)
+{
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, 3);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		radeon_scratch_free(rdev, scratch);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+	radeon_ring_write(rdev, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test succeeded in %d usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	return r;
+}
+
+/*
+ * Writeback
+ */
+int r600_wb_init(struct radeon_device *rdev)
+{
+	int r;
+
+	if (rdev->wb.wb_obj == NULL) {
+		r = radeon_object_create(rdev, NULL, 4096,
+					 true,
+					 RADEON_GEM_DOMAIN_GTT,
+					 false, &rdev->wb.wb_obj);
+		if (r) {
+			DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
+			return r;
+		}
+		r = radeon_object_pin(rdev->wb.wb_obj,
+				      RADEON_GEM_DOMAIN_GTT,
+				      &rdev->wb.gpu_addr);
+		if (r) {
+			DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
+			return r;
+		}
+		r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
+		if (r) {
+			DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
+			return r;
+		}
+	}
+	WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF);
+	WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC);
+	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF);
+	WREG32(SCRATCH_UMSK, 0xff);
+	return 0;
+}
+
+void r600_wb_fini(struct radeon_device *rdev)
+{
+	if (rdev->wb.wb_obj) {
+		radeon_object_kunmap(rdev->wb.wb_obj);
+		radeon_object_unpin(rdev->wb.wb_obj);
+		radeon_object_unref(&rdev->wb.wb_obj);
+		rdev->wb.wb = NULL;
+		rdev->wb.wb_obj = NULL;
+	}
+}
+
+
+/*
+ * CS
+ */
+void r600_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence)
+{
+	/* Emit fence sequence & fire IRQ */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+	radeon_ring_write(rdev, fence->seq);
+}
+
+int r600_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int r600_copy_blit(struct radeon_device *rdev,
+		   uint64_t src_offset, uint64_t dst_offset,
+		   unsigned num_pages, struct radeon_fence *fence)
+{
+	r600_blit_prepare_copy(rdev, num_pages * 4096);
+	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * 4096);
+	r600_blit_done_copy(rdev, fence);
+	return 0;
+}
+
+int r600_irq_process(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int r600_irq_set(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
+{
+	/* FIXME: implement */
+}
+
+
+bool r600_card_posted(struct radeon_device *rdev)
+{
+	uint32_t reg;
+
+	/* first check CRTCs */
+	reg = RREG32(D1CRTC_CONTROL) |
+		RREG32(D2CRTC_CONTROL);
+	if (reg & CRTC_EN)
+		return true;
+
+	/* then check MEM_SIZE, in case the crtcs are off */
+	if (RREG32(CONFIG_MEMSIZE))
+		return true;
+
+	return false;
+}
+
+int r600_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	r600_gpu_reset(rdev);
+	r600_mc_resume(rdev);
+	r = r600_pcie_gart_enable(rdev);
+	if (r)
+		return r;
+	r600_gpu_init(rdev);
+	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	if (r)
+		return r;
+	r = r600_cp_load_microcode(rdev);
+	if (r)
+		return r;
+	r = r600_cp_resume(rdev);
+	if (r)
+		return r;
+	r = r600_wb_init(rdev);
+	if (r)
+		return r;
+	return 0;
+}
+
+int r600_suspend(struct radeon_device *rdev)
+{
+	/* FIXME: we should wait for ring to be empty */
+	r600_cp_stop(rdev);
+	return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int r600_init(struct radeon_device *rdev)
+{
+	int r;
+
+	rdev->new_init_path = true;
+	r = radeon_dummy_page_init(rdev);
+	if (r)
+		return r;
+	if (r600_debugfs_mc_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for mc !\n");
+	}
+	/* This don't do much */
+	r = radeon_gem_init(rdev);
+	if (r)
+		return r;
+	/* Read BIOS */
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	/* Must be an ATOMBIOS */
+	if (!rdev->is_atom_bios)
+		return -EINVAL;
+	r = radeon_atombios_init(rdev);
+	if (r)
+		return r;
+	/* Post card if necessary */
+	if (!r600_card_posted(rdev) && rdev->bios) {
+		DRM_INFO("GPU not posted. posting now...\n");
+		atom_asic_init(rdev->mode_info.atom_context);
+	}
+	/* Initialize scratch registers */
+	r600_scratch_init(rdev);
+	/* Initialize surface registers */
+	radeon_surface_init(rdev);
+	r = radeon_clocks_init(rdev);
+	if (r)
+		return r;
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r)
+		return r;
+	r = r600_mc_init(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			r600_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return r600_init(rdev);
+		}
+		return r;
+	}
+	/* Memory manager */
+	r = radeon_object_init(rdev);
+	if (r)
+		return r;
+	rdev->cp.ring_obj = NULL;
+	r600_ring_init(rdev, 1024 * 1024);
+
+	if (!rdev->me_fw || !rdev->pfp_fw) {
+		r = r600_cp_init_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load firmware!\n");
+			return r;
+		}
+	}
+
+	r = r600_resume(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			r600_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return r600_init(rdev);
+		}
+		return r;
+	}
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+		return r;
+	}
+	r = r600_blit_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled blitter (%d).\n", r);
+		return r;
+	}
+	r = radeon_ib_test(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			return r;
+	}
+	return 0;
+}
+
+void r600_fini(struct radeon_device *rdev)
+{
+	/* Suspend operations */
+	r600_suspend(rdev);
+
+	r600_blit_fini(rdev);
+	radeon_ring_fini(rdev);
+	r600_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+	radeon_gem_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_clocks_fini(rdev);
+#if __OS_HAS_AGP
+	if (rdev->flags & RADEON_IS_AGP)
+		radeon_agp_fini(rdev);
+#endif
+	radeon_object_fini(rdev);
+	if (rdev->is_atom_bios)
+		radeon_atombios_fini(rdev);
+	else
+		radeon_combios_fini(rdev);
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	radeon_dummy_page_fini(rdev);
+}
+
+
+/*
+ * CS stuff
+ */
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	/* FIXME: implement */
+	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC);
+	radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
+	radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r600_ib_test(struct radeon_device *rdev)
+{
+	struct radeon_ib *ib;
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ib_get(rdev, &ib);
+	if (r) {
+		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+		return r;
+	}
+	ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
+	ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	ib->ptr[2] = 0xDEADBEEF;
+	ib->ptr[3] = PACKET2(0);
+	ib->ptr[4] = PACKET2(0);
+	ib->ptr[5] = PACKET2(0);
+	ib->ptr[6] = PACKET2(0);
+	ib->ptr[7] = PACKET2(0);
+	ib->ptr[8] = PACKET2(0);
+	ib->ptr[9] = PACKET2(0);
+	ib->ptr[10] = PACKET2(0);
+	ib->ptr[11] = PACKET2(0);
+	ib->ptr[12] = PACKET2(0);
+	ib->ptr[13] = PACKET2(0);
+	ib->ptr[14] = PACKET2(0);
+	ib->ptr[15] = PACKET2(0);
+	ib->length_dw = 16;
+	r = radeon_ib_schedule(rdev, ib);
+	if (r) {
+		radeon_scratch_free(rdev, scratch);
+		radeon_ib_free(rdev, &ib);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		return r;
+	}
+	r = radeon_fence_wait(ib->fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test succeeded in %u usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	radeon_ib_free(rdev, &ib);
+	return r;
+}
+
+
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t rdp, wdp;
+	unsigned count, i, j;
+
+	radeon_ring_free_size(rdev);
+	rdp = RREG32(CP_RB_RPTR);
+	wdp = RREG32(CP_RB_WPTR);
+	count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
+	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT));
+	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
+	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
+	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
+	seq_printf(m, "%u dwords in ring\n", count);
+	for (j = 0; j <= count; j++) {
+		i = (rdp + j) & rdev->cp.ptr_mask;
+		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
+	}
+	return 0;
+}
+
+static int r600_debugfs_mc_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS);
+	DREG32_SYS(m, rdev, VM_L2_STATUS);
+	return 0;
+}
+
+static struct drm_info_list r600_mc_info_list[] = {
+	{"r600_mc_info", r600_debugfs_mc_info, 0, NULL},
+	{"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL},
+};
+#endif
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list));
+#else
+	return 0;
+#endif
 }
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
new file mode 100644
index 0000000..c51402e
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -0,0 +1,855 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *     Alex Deucher <alexander.deucher@amd.com>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_drv.h"
+
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST        0x11
+#define DI_INDEX_SIZE_16_BIT  0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8                 0x1
+#define FMT_5_6_5             0x8
+#define FMT_8_8_8_8           0x1a
+#define COLOR_8               0x1
+#define COLOR_5_6_5           0x8
+#define COLOR_8_8_8_8         0x1a
+
+static inline void
+set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
+{
+	u32 cb_color_info;
+	int pitch, slice;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	h = (h + 7) & ~7;
+	if (h < 8)
+		h = 8;
+
+	cb_color_info = ((format << 2) | (1 << 27));
+	pitch = (w / 8) - 1;
+	slice = ((w * h) / 64) - 1;
+
+	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
+		BEGIN_RING(21 + 2);
+		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+		OUT_RING(gpu_addr >> 8);
+		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+		OUT_RING(2 << 0);
+	} else {
+		BEGIN_RING(21);
+		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+		OUT_RING(gpu_addr >> 8);
+	}
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((pitch << 0) | (slice << 10));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(cb_color_info);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	ADVANCE_RING();
+}
+
+static inline void
+cp_set_surface_sync(drm_radeon_private_t *dev_priv,
+		    u32 sync_type, u32 size, u64 mc_addr)
+{
+	u32 cp_coher_size;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	if (size == 0xffffffff)
+		cp_coher_size = 0xffffffff;
+	else
+		cp_coher_size = ((size + 255) >> 8);
+
+	BEGIN_RING(5);
+	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
+	OUT_RING(sync_type);
+	OUT_RING(cp_coher_size);
+	OUT_RING((mc_addr >> 8));
+	OUT_RING(10); /* poll interval */
+	ADVANCE_RING();
+}
+
+static inline void
+set_shaders(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	u64 gpu_addr;
+	int shader_size, i;
+	u32 *vs, *ps;
+	uint32_t sq_pgm_resources;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	/* load shaders */
+	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
+	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
+
+	shader_size = r6xx_vs_size;
+	for (i = 0; i < shader_size; i++)
+		vs[i] = r6xx_vs[i];
+	shader_size = r6xx_ps_size;
+	for (i = 0; i < shader_size; i++)
+		ps[i] = r6xx_ps[i];
+
+	dev_priv->blit_vb->used = 512;
+
+	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
+
+	/* setup shader regs */
+	sq_pgm_resources = (1 << 0);
+
+	BEGIN_RING(9 + 12);
+	/* VS */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(gpu_addr >> 8);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(sq_pgm_resources);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+
+	/* PS */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((gpu_addr + 256) >> 8);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(sq_pgm_resources | (1 << 28));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(2);
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING(0);
+	ADVANCE_RING();
+
+	cp_set_surface_sync(dev_priv,
+			    R600_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+static inline void
+set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
+{
+	uint32_t sq_vtx_constant_word2;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
+
+	BEGIN_RING(9);
+	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+	OUT_RING(0x460);
+	OUT_RING(gpu_addr & 0xffffffff);
+	OUT_RING(48 - 1);
+	OUT_RING(sq_vtx_constant_word2);
+	OUT_RING(1 << 0);
+	OUT_RING(0);
+	OUT_RING(0);
+	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
+	ADVANCE_RING();
+
+	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+		cp_set_surface_sync(dev_priv,
+				    R600_TC_ACTION_ENA, 48, gpu_addr);
+	else
+		cp_set_surface_sync(dev_priv,
+				    R600_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+static inline void
+set_tex_resource(drm_radeon_private_t *dev_priv,
+		 int format, int w, int h, int pitch, u64 gpu_addr)
+{
+	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	if (h < 1)
+		h = 1;
+
+	sq_tex_resource_word0 = (1 << 0);
+	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+				  ((w - 1) << 19));
+
+	sq_tex_resource_word1 = (format << 26);
+	sq_tex_resource_word1 |= ((h - 1) << 0);
+
+	sq_tex_resource_word4 = ((1 << 14) |
+				 (0 << 16) |
+				 (1 << 19) |
+				 (2 << 22) |
+				 (3 << 25));
+
+	BEGIN_RING(9);
+	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+	OUT_RING(0);
+	OUT_RING(sq_tex_resource_word0);
+	OUT_RING(sq_tex_resource_word1);
+	OUT_RING(gpu_addr >> 8);
+	OUT_RING(gpu_addr >> 8);
+	OUT_RING(sq_tex_resource_word4);
+	OUT_RING(0);
+	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
+	ADVANCE_RING();
+
+}
+
+static inline void
+set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
+{
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	BEGIN_RING(12);
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((x1 << 0) | (y1 << 16));
+	OUT_RING((x2 << 0) | (y2 << 16));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+	OUT_RING((x2 << 0) | (y2 << 16));
+
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+	OUT_RING((x2 << 0) | (y2 << 16));
+	ADVANCE_RING();
+}
+
+static inline void
+draw_auto(drm_radeon_private_t *dev_priv)
+{
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	BEGIN_RING(10);
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
+	OUT_RING(DI_PT_RECTLIST);
+
+	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+	OUT_RING(DI_INDEX_SIZE_16_BIT);
+
+	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+	OUT_RING(1);
+
+	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+	OUT_RING(3);
+	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
+
+	ADVANCE_RING();
+	COMMIT_RING();
+}
+
+static inline void
+set_default_state(drm_radeon_private_t *dev_priv)
+{
+	int default_state_dw, i;
+	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+	RING_LOCALS;
+
+	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
+	case CHIP_R600:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 40;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV610:
+	case CHIP_RV620:
+	case CHIP_RS780:
+	case CHIP_RS880:
+	default:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV670:
+		num_ps_gprs = 144;
+		num_vs_gprs = 40;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV770:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 256;
+		num_vs_stack_entries = 256;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV730:
+	case CHIP_RV740:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV710:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 48;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	}
+
+	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+		sq_config = 0;
+	else
+		sq_config = R600_VC_ENABLE;
+
+	sq_config |= (R600_DX9_CONSTS |
+		      R600_ALU_INST_PREFER_VECTOR |
+		      R600_PS_PRIO(0) |
+		      R600_VS_PRIO(1) |
+		      R600_GS_PRIO(2) |
+		      R600_ES_PRIO(3));
+
+	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
+				  R600_NUM_VS_GPRS(num_vs_gprs) |
+				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
+				  R600_NUM_ES_GPRS(num_es_gprs));
+	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
+				   R600_NUM_VS_THREADS(num_vs_threads) |
+				   R600_NUM_GS_THREADS(num_gs_threads) |
+				   R600_NUM_ES_THREADS(num_es_threads));
+	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
+		default_state_dw = r7xx_default_size * 4;
+		BEGIN_RING(default_state_dw + 10);
+		for (i = 0; i < default_state_dw; i++)
+			OUT_RING(r7xx_default_state[i]);
+	} else {
+		default_state_dw = r6xx_default_size * 4;
+		BEGIN_RING(default_state_dw + 10);
+		for (i = 0; i < default_state_dw; i++)
+			OUT_RING(r6xx_default_state[i]);
+	}
+	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+	/* SQ config */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
+	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
+	OUT_RING(sq_config);
+	OUT_RING(sq_gpr_resource_mgmt_1);
+	OUT_RING(sq_gpr_resource_mgmt_2);
+	OUT_RING(sq_thread_resource_mgmt);
+	OUT_RING(sq_stack_resource_mgmt_1);
+	OUT_RING(sq_stack_resource_mgmt_2);
+	ADVANCE_RING();
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+	u32 result, i, exponent, fraction;
+
+	if ((input & 0x3fff) == 0)
+		result = 0; /* 0 is a special case */
+	else {
+		exponent = 140; /* exponent biased by 127; */
+		fraction = (input & 0x3fff) << 10; /* cheat and only
+						      handle numbers below 2^^15 */
+		for (i = 0; i < 14; i++) {
+			if (fraction & 0x800000)
+				break;
+			else {
+				fraction = fraction << 1; /* keep
+							     shifting left until top bit = 1 */
+				exponent = exponent - 1;
+			}
+		}
+		result = exponent << 23 | (fraction & 0x7fffff); /* mask
+								    off top bit; assumed 1 */
+	}
+	return result;
+}
+
+
+int r600_nomm_get_vb(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	dev_priv->blit_vb = radeon_freelist_get(dev);
+	if (!dev_priv->blit_vb) {
+		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+void r600_nomm_put_vb(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+
+	dev_priv->blit_vb->used = 0;
+	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
+}
+
+void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	return (((char *)dev->agp_buffer_map->handle +
+		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
+}
+
+int
+r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	DRM_DEBUG("\n");
+
+	r600_nomm_get_vb(dev);
+
+	dev_priv->blit_vb->file_priv = file_priv;
+
+	set_default_state(dev_priv);
+	set_shaders(dev);
+
+	return 0;
+}
+
+
+void
+r600_done_blit_copy(struct drm_device *dev)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	BEGIN_RING(5);
+	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+	/* wait for 3D idle clean */
+	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
+	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
+
+	ADVANCE_RING();
+	COMMIT_RING();
+
+	r600_nomm_put_vb(dev);
+}
+
+void
+r600_blit_copy(struct drm_device *dev,
+	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+	       int size_bytes)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	int max_bytes;
+	u64 vb_addr;
+	u32 *vb;
+
+	vb = r600_nomm_get_vb_ptr(dev);
+
+	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+		max_bytes = 8192;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = src_gpu_addr & 255;
+			int dst_x = dst_gpu_addr & 255;
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!dev_priv->blit_vb)
+					return;
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+			}
+
+			vb[0] = i2f(dst_x);
+			vb[1] = 0;
+			vb[2] = i2f(src_x);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f(dst_x + cur_size);
+			vb[9] = i2f(h);
+			vb[10] = i2f(src_x + cur_size);
+			vb[11] = i2f(h);
+
+			/* src */
+			set_tex_resource(dev_priv, FMT_8,
+					 src_x + cur_size, h, src_x + cur_size,
+					 src_gpu_addr);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst */
+			set_render_target(dev_priv, COLOR_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors */
+			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
+
+			/* Vertex buffer setup */
+			vb_addr = dev_priv->gart_buffers_offset +
+				dev_priv->blit_vb->offset +
+				dev_priv->blit_vb->used;
+			set_vtx_resource(dev_priv, vb_addr);
+
+			/* draw */
+			draw_auto(dev_priv);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			vb += 12;
+			dev_priv->blit_vb->used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	} else {
+		max_bytes = 8192 * 4;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = (src_gpu_addr & 255);
+			int dst_x = (dst_gpu_addr & 255);
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!dev_priv->blit_vb)
+					return;
+
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+			}
+
+			vb[0] = i2f(dst_x / 4);
+			vb[1] = 0;
+			vb[2] = i2f(src_x / 4);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x / 4);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x / 4);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f((dst_x + cur_size) / 4);
+			vb[9] = i2f(h);
+			vb[10] = i2f((src_x + cur_size) / 4);
+			vb[11] = i2f(h);
+
+			/* src */
+			set_tex_resource(dev_priv, FMT_8_8_8_8,
+					 (src_x + cur_size) / 4,
+					 h, (src_x + cur_size) / 4,
+					 src_gpu_addr);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst */
+			set_render_target(dev_priv, COLOR_8_8_8_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors */
+			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+			/* Vertex buffer setup */
+			vb_addr = dev_priv->gart_buffers_offset +
+				dev_priv->blit_vb->offset +
+				dev_priv->blit_vb->used;
+			set_vtx_resource(dev_priv, vb_addr);
+
+			/* draw */
+			draw_auto(dev_priv);
+
+			cp_set_surface_sync(dev_priv,
+					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			vb += 12;
+			dev_priv->blit_vb->used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	}
+}
+
+void
+r600_blit_swap(struct drm_device *dev,
+	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+	       int sx, int sy, int dx, int dy,
+	       int w, int h, int src_pitch, int dst_pitch, int cpp)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	int cb_format, tex_format;
+	u64 vb_addr;
+	u32 *vb;
+
+	vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
+		      dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
+
+	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+		r600_nomm_put_vb(dev);
+		r600_nomm_get_vb(dev);
+		if (!dev_priv->blit_vb)
+			return;
+
+		set_shaders(dev);
+		vb = r600_nomm_get_vb_ptr(dev);
+	}
+
+	if (cpp == 4) {
+		cb_format = COLOR_8_8_8_8;
+		tex_format = FMT_8_8_8_8;
+	} else if (cpp == 2) {
+		cb_format = COLOR_5_6_5;
+		tex_format = FMT_5_6_5;
+	} else {
+		cb_format = COLOR_8;
+		tex_format = FMT_8;
+	}
+
+	vb[0] = i2f(dx);
+	vb[1] = i2f(dy);
+	vb[2] = i2f(sx);
+	vb[3] = i2f(sy);
+
+	vb[4] = i2f(dx);
+	vb[5] = i2f(dy + h);
+	vb[6] = i2f(sx);
+	vb[7] = i2f(sy + h);
+
+	vb[8] = i2f(dx + w);
+	vb[9] = i2f(dy + h);
+	vb[10] = i2f(sx + w);
+	vb[11] = i2f(sy + h);
+
+	/* src */
+	set_tex_resource(dev_priv, tex_format,
+			 src_pitch / cpp,
+			 sy + h, src_pitch / cpp,
+			 src_gpu_addr);
+
+	cp_set_surface_sync(dev_priv,
+			    R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr);
+
+	/* dst */
+	set_render_target(dev_priv, cb_format,
+			  dst_pitch / cpp, dy + h,
+			  dst_gpu_addr);
+
+	/* scissors */
+	set_scissors(dev_priv, dx, dy, dx + w, dy + h);
+
+	/* Vertex buffer setup */
+	vb_addr = dev_priv->gart_buffers_offset +
+		dev_priv->blit_vb->offset +
+		dev_priv->blit_vb->used;
+	set_vtx_resource(dev_priv, vb_addr);
+
+	/* draw */
+	draw_auto(dev_priv);
+
+	cp_set_surface_sync(dev_priv,
+			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+			    dst_pitch * (dy + h), dst_gpu_addr);
+
+	dev_priv->blit_vb->used += 12 * 4;
+}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
new file mode 100644
index 0000000..5755647
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -0,0 +1,777 @@
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+#include "r600d.h"
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST        0x11
+#define DI_INDEX_SIZE_16_BIT  0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8                 0x1
+#define FMT_5_6_5             0x8
+#define FMT_8_8_8_8           0x1a
+#define COLOR_8               0x1
+#define COLOR_5_6_5           0x8
+#define COLOR_8_8_8_8         0x1a
+
+/* emits 21 on rv770+, 23 on r600 */
+static void
+set_render_target(struct radeon_device *rdev, int format,
+		  int w, int h, u64 gpu_addr)
+{
+	u32 cb_color_info;
+	int pitch, slice;
+
+	h = (h + 7) & ~7;
+	if (h < 8)
+		h = 8;
+
+	cb_color_info = ((format << 2) | (1 << 27));
+	pitch = (w / 8) - 1;
+	slice = ((w * h) / 64) - 1;
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+
+	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
+		radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
+		radeon_ring_write(rdev, 2 << 0);
+	}
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, cb_color_info);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+}
+
+/* emits 5dw */
+static void
+cp_set_surface_sync(struct radeon_device *rdev,
+		    u32 sync_type, u32 size,
+		    u64 mc_addr)
+{
+	u32 cp_coher_size;
+
+	if (size == 0xffffffff)
+		cp_coher_size = 0xffffffff;
+	else
+		cp_coher_size = ((size + 255) >> 8);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	radeon_ring_write(rdev, sync_type);
+	radeon_ring_write(rdev, cp_coher_size);
+	radeon_ring_write(rdev, mc_addr >> 8);
+	radeon_ring_write(rdev, 10); /* poll interval */
+}
+
+/* emits 21dw + 1 surface sync = 26dw */
+static void
+set_shaders(struct radeon_device *rdev)
+{
+	u64 gpu_addr;
+	u32 sq_pgm_resources;
+
+	/* setup shader regs */
+	sq_pgm_resources = (1 << 0);
+
+	/* VS */
+	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, sq_pgm_resources);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	/* PS */
+	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 2);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, 0);
+
+	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+/* emits 9 + 1 sync (5) = 14*/
+static void
+set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
+{
+	u32 sq_vtx_constant_word2;
+
+	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+	radeon_ring_write(rdev, 0x460);
+	radeon_ring_write(rdev, gpu_addr & 0xffffffff);
+	radeon_ring_write(rdev, 48 - 1);
+	radeon_ring_write(rdev, sq_vtx_constant_word2);
+	radeon_ring_write(rdev, 1 << 0);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+
+	if ((rdev->family == CHIP_RV610) ||
+	    (rdev->family == CHIP_RV620) ||
+	    (rdev->family == CHIP_RS780) ||
+	    (rdev->family == CHIP_RS880) ||
+	    (rdev->family == CHIP_RV710))
+		cp_set_surface_sync(rdev,
+				    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
+	else
+		cp_set_surface_sync(rdev,
+				    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+/* emits 9 */
+static void
+set_tex_resource(struct radeon_device *rdev,
+		 int format, int w, int h, int pitch,
+		 u64 gpu_addr)
+{
+	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+
+	if (h < 1)
+		h = 1;
+
+	sq_tex_resource_word0 = (1 << 0);
+	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+				  ((w - 1) << 19));
+
+	sq_tex_resource_word1 = (format << 26);
+	sq_tex_resource_word1 |= ((h - 1) << 0);
+
+	sq_tex_resource_word4 = ((1 << 14) |
+				 (0 << 16) |
+				 (1 << 19) |
+				 (2 << 22) |
+				 (3 << 25));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, sq_tex_resource_word0);
+	radeon_ring_write(rdev, sq_tex_resource_word1);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+	radeon_ring_write(rdev, gpu_addr >> 8);
+	radeon_ring_write(rdev, sq_tex_resource_word4);
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
+}
+
+/* emits 12 */
+static void
+set_scissors(struct radeon_device *rdev, int x1, int y1,
+	     int x2, int y2)
+{
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
+	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+}
+
+/* emits 10 */
+static void
+draw_auto(struct radeon_device *rdev)
+{
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, DI_PT_RECTLIST);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
+	radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
+	radeon_ring_write(rdev, 1);
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
+	radeon_ring_write(rdev, 3);
+	radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
+
+}
+
+/* emits 14 */
+static void
+set_default_state(struct radeon_device *rdev)
+{
+	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+	u64 gpu_addr;
+
+	switch (rdev->family) {
+	case CHIP_R600:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV630:
+	case CHIP_RV635:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 40;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV610:
+	case CHIP_RV620:
+	case CHIP_RS780:
+	case CHIP_RS880:
+	default:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV670:
+		num_ps_gprs = 144;
+		num_vs_gprs = 40;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 136;
+		num_vs_threads = 48;
+		num_gs_threads = 4;
+		num_es_threads = 4;
+		num_ps_stack_entries = 40;
+		num_vs_stack_entries = 40;
+		num_gs_stack_entries = 32;
+		num_es_stack_entries = 16;
+		break;
+	case CHIP_RV770:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 256;
+		num_vs_stack_entries = 256;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV730:
+	case CHIP_RV740:
+		num_ps_gprs = 84;
+		num_vs_gprs = 36;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 188;
+		num_vs_threads = 60;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	case CHIP_RV710:
+		num_ps_gprs = 192;
+		num_vs_gprs = 56;
+		num_temp_gprs = 4;
+		num_gs_gprs = 0;
+		num_es_gprs = 0;
+		num_ps_threads = 144;
+		num_vs_threads = 48;
+		num_gs_threads = 0;
+		num_es_threads = 0;
+		num_ps_stack_entries = 128;
+		num_vs_stack_entries = 128;
+		num_gs_stack_entries = 0;
+		num_es_stack_entries = 0;
+		break;
+	}
+
+	if ((rdev->family == CHIP_RV610) ||
+	    (rdev->family == CHIP_RV620) ||
+	    (rdev->family == CHIP_RS780) ||
+	    (rdev->family == CHIP_RS780) ||
+	    (rdev->family == CHIP_RV710))
+		sq_config = 0;
+	else
+		sq_config = VC_ENABLE;
+
+	sq_config |= (DX9_CONSTS |
+		      ALU_INST_PREFER_VECTOR |
+		      PS_PRIO(0) |
+		      VS_PRIO(1) |
+		      GS_PRIO(2) |
+		      ES_PRIO(3));
+
+	sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+				  NUM_VS_GPRS(num_vs_gprs) |
+				  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+	sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+				  NUM_ES_GPRS(num_es_gprs));
+	sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+				   NUM_VS_THREADS(num_vs_threads) |
+				   NUM_GS_THREADS(num_gs_threads) |
+				   NUM_ES_THREADS(num_es_threads));
+	sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+				    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+	sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+				    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+	/* emit an IB pointing at default state */
+	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
+	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
+	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
+	radeon_ring_write(rdev, (rdev->r600_blit.state_len / 4));
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+	/* SQ config */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
+	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, sq_config);
+	radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+	radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+	radeon_ring_write(rdev, sq_thread_resource_mgmt);
+	radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+	u32 result, i, exponent, fraction;
+
+	if ((input & 0x3fff) == 0)
+		result = 0; /* 0 is a special case */
+	else {
+		exponent = 140; /* exponent biased by 127; */
+		fraction = (input & 0x3fff) << 10; /* cheat and only
+						      handle numbers below 2^^15 */
+		for (i = 0; i < 14; i++) {
+			if (fraction & 0x800000)
+				break;
+			else {
+				fraction = fraction << 1; /* keep
+							     shifting left until top bit = 1 */
+				exponent = exponent - 1;
+			}
+		}
+		result = exponent << 23 | (fraction & 0x7fffff); /* mask
+								    off top bit; assumed 1 */
+	}
+	return result;
+}
+
+int r600_blit_init(struct radeon_device *rdev)
+{
+	u32 obj_size;
+	int r;
+	void *ptr;
+
+	rdev->r600_blit.state_offset = 0;
+
+	if (rdev->family >= CHIP_RV770)
+		rdev->r600_blit.state_len = r7xx_default_size * 4;
+	else
+		rdev->r600_blit.state_len = r6xx_default_size * 4;
+
+	obj_size = rdev->r600_blit.state_len;
+	obj_size = ALIGN(obj_size, 256);
+
+	rdev->r600_blit.vs_offset = obj_size;
+	obj_size += r6xx_vs_size * 4;
+	obj_size = ALIGN(obj_size, 256);
+
+	rdev->r600_blit.ps_offset = obj_size;
+	obj_size += r6xx_ps_size * 4;
+	obj_size = ALIGN(obj_size, 256);
+
+	r = radeon_object_create(rdev, NULL, obj_size,
+				 true, RADEON_GEM_DOMAIN_VRAM,
+				 false, &rdev->r600_blit.shader_obj);
+	if (r) {
+		DRM_ERROR("r600 failed to allocate shader\n");
+		return r;
+	}
+
+	r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+			     &rdev->r600_blit.shader_gpu_addr);
+	if (r) {
+		DRM_ERROR("failed to pin blit object %d\n", r);
+		return r;
+	}
+
+	DRM_DEBUG("r6xx blit allocated bo @ 0x%16llx %08x vs %08x ps %08x\n",
+		  rdev->r600_blit.shader_gpu_addr, obj_size,
+		  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
+
+	r = radeon_object_kmap(rdev->r600_blit.shader_obj, &ptr);
+	if (r) {
+		DRM_ERROR("failed to map blit object %d\n", r);
+		return r;
+	}
+
+	if (rdev->family >= CHIP_RV770)
+		memcpy_toio(ptr + rdev->r600_blit.state_offset, r7xx_default_state, rdev->r600_blit.state_len);
+	else
+		memcpy_toio(ptr + rdev->r600_blit.state_offset, r6xx_default_state, rdev->r600_blit.state_len);
+
+	memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4);
+	memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4);
+
+	radeon_object_kunmap(rdev->r600_blit.shader_obj);
+	return 0;
+}
+
+void r600_blit_fini(struct radeon_device *rdev)
+{
+	radeon_object_unpin(rdev->r600_blit.shader_obj);
+	radeon_object_unref(&rdev->r600_blit.shader_obj);
+}
+
+int r600_vb_ib_get(struct radeon_device *rdev)
+{
+	int r;
+	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
+	if (r) {
+		DRM_ERROR("failed to get IB for vertex buffer\n");
+		return r;
+	}
+
+	rdev->r600_blit.vb_total = 64*1024;
+	rdev->r600_blit.vb_used = 0;
+	return 0;
+}
+
+void r600_vb_ib_put(struct radeon_device *rdev)
+{
+	mutex_lock(&rdev->ib_pool.mutex);
+	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
+	list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs);
+	mutex_unlock(&rdev->ib_pool.mutex);
+	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
+}
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+{
+	int r;
+	int ring_size;
+	const int max_size = 8192*8192;
+
+	r = r600_vb_ib_get(rdev);
+	WARN_ON(r);
+
+	/* loops of emits 64 + fence emit possible */
+	ring_size = ((size_bytes + max_size) / max_size) * 78;
+	/* set default  + shaders */
+	ring_size += 40; /* shaders + def state */
+	ring_size += 3; /* fence emit for VB IB */
+	ring_size += 5; /* done copy */
+	ring_size += 3; /* fence emit for done copy */
+	r = radeon_ring_lock(rdev, ring_size);
+	WARN_ON(r);
+
+	set_default_state(rdev); /* 14 */
+	set_shaders(rdev); /* 26 */
+	return 0;
+}
+
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
+{
+	int r;
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+	/* wait for 3D idle clean */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
+
+	if (rdev->r600_blit.vb_ib)
+		r600_vb_ib_put(rdev);
+
+	if (fence)
+		r = radeon_fence_emit(rdev, fence);
+
+	radeon_ring_unlock_commit(rdev);
+}
+
+void r600_kms_blit_copy(struct radeon_device *rdev,
+			u64 src_gpu_addr, u64 dst_gpu_addr,
+			int size_bytes)
+{
+	int max_bytes;
+	u64 vb_gpu_addr;
+	u32 *vb;
+
+	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
+		  size_bytes, rdev->r600_blit.vb_used);
+	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
+	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+		max_bytes = 8192;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = src_gpu_addr & 255;
+			int dst_x = dst_gpu_addr & 255;
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+				WARN_ON(1);
+
+#if 0
+				r600_vb_ib_put(rdev);
+
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!dev_priv->blit_vb)
+					return;
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+#endif
+			}
+
+			vb[0] = i2f(dst_x);
+			vb[1] = 0;
+			vb[2] = i2f(src_x);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f(dst_x + cur_size);
+			vb[9] = i2f(h);
+			vb[10] = i2f(src_x + cur_size);
+			vb[11] = i2f(h);
+
+			/* src 9 */
+			set_tex_resource(rdev, FMT_8,
+					 src_x + cur_size, h, src_x + cur_size,
+					 src_gpu_addr);
+
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst 23 */
+			set_render_target(rdev, COLOR_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors 12 */
+			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+
+			/* 14 */
+			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+			set_vtx_resource(rdev, vb_gpu_addr);
+
+			/* draw 10 */
+			draw_auto(rdev);
+
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			vb += 12;
+			rdev->r600_blit.vb_used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	} else {
+		max_bytes = 8192 * 4;
+
+		while (size_bytes) {
+			int cur_size = size_bytes;
+			int src_x = (src_gpu_addr & 255);
+			int dst_x = (dst_gpu_addr & 255);
+			int h = 1;
+			src_gpu_addr = src_gpu_addr & ~255;
+			dst_gpu_addr = dst_gpu_addr & ~255;
+
+			if (!src_x && !dst_x) {
+				h = (cur_size / max_bytes);
+				if (h > 8192)
+					h = 8192;
+				if (h == 0)
+					h = 1;
+				else
+					cur_size = max_bytes;
+			} else {
+				if (cur_size > max_bytes)
+					cur_size = max_bytes;
+				if (cur_size > (max_bytes - dst_x))
+					cur_size = (max_bytes - dst_x);
+				if (cur_size > (max_bytes - src_x))
+					cur_size = (max_bytes - src_x);
+			}
+
+			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+				WARN_ON(1);
+			}
+#if 0
+			if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) {
+				r600_nomm_put_vb(dev);
+				r600_nomm_get_vb(dev);
+				if (!rdev->blit_vb)
+					return;
+
+				set_shaders(dev);
+				vb = r600_nomm_get_vb_ptr(dev);
+			}
+#endif
+
+			vb[0] = i2f(dst_x / 4);
+			vb[1] = 0;
+			vb[2] = i2f(src_x / 4);
+			vb[3] = 0;
+
+			vb[4] = i2f(dst_x / 4);
+			vb[5] = i2f(h);
+			vb[6] = i2f(src_x / 4);
+			vb[7] = i2f(h);
+
+			vb[8] = i2f((dst_x + cur_size) / 4);
+			vb[9] = i2f(h);
+			vb[10] = i2f((src_x + cur_size) / 4);
+			vb[11] = i2f(h);
+
+			/* src 9 */
+			set_tex_resource(rdev, FMT_8_8_8_8,
+					 (src_x + cur_size) / 4,
+					 h, (src_x + cur_size) / 4,
+					 src_gpu_addr);
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+			/* dst 23 */
+			set_render_target(rdev, COLOR_8_8_8_8,
+					  dst_x + cur_size, h,
+					  dst_gpu_addr);
+
+			/* scissors 12  */
+			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+			/* Vertex buffer setup 14 */
+			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+			set_vtx_resource(rdev, vb_gpu_addr);
+
+			/* draw 10 */
+			draw_auto(rdev);
+
+			/* 5 */
+			cp_set_surface_sync(rdev,
+					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+					    cur_size * h, dst_gpu_addr);
+
+			/* 78 ring dwords per loop */
+			vb += 12;
+			rdev->r600_blit.vb_used += 12 * 4;
+
+			src_gpu_addr += cur_size * h;
+			dst_gpu_addr += cur_size * h;
+			size_bytes -= cur_size * h;
+		}
+	}
+}
+
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c
new file mode 100644
index 0000000..d745e81
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c
@@ -0,0 +1,1072 @@
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+const u32 r6xx_default_state[] =
+{
+	0xc0002400,
+	0x00000000,
+	0xc0012800,
+	0x80000000,
+	0x80000000,
+	0xc0004600,
+	0x00000016,
+	0xc0016800,
+	0x00000010,
+	0x00028000,
+	0xc0016800,
+	0x00000010,
+	0x00008000,
+	0xc0016800,
+	0x00000542,
+	0x07000003,
+	0xc0016800,
+	0x000005c5,
+	0x00000000,
+	0xc0016800,
+	0x00000363,
+	0x00000000,
+	0xc0016800,
+	0x0000060c,
+	0x82000000,
+	0xc0016800,
+	0x0000060e,
+	0x01020204,
+	0xc0016f00,
+	0x00000000,
+	0x00000000,
+	0xc0016f00,
+	0x00000001,
+	0x00000000,
+	0xc0096900,
+	0x0000022a,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000004,
+	0x00000000,
+	0xc0016900,
+	0x0000000a,
+	0x00000000,
+	0xc0016900,
+	0x0000000b,
+	0x00000000,
+	0xc0016900,
+	0x0000010c,
+	0x00000000,
+	0xc0016900,
+	0x0000010d,
+	0x00000000,
+	0xc0016900,
+	0x00000200,
+	0x00000000,
+	0xc0016900,
+	0x00000343,
+	0x00000060,
+	0xc0016900,
+	0x00000344,
+	0x00000040,
+	0xc0016900,
+	0x00000351,
+	0x0000aa00,
+	0xc0016900,
+	0x00000104,
+	0x00000000,
+	0xc0016900,
+	0x0000010e,
+	0x00000000,
+	0xc0046900,
+	0x00000105,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0036900,
+	0x00000109,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0046900,
+	0x0000030c,
+	0x01000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0046900,
+	0x00000048,
+	0x3f800000,
+	0x00000000,
+	0x3f800000,
+	0x3f800000,
+	0xc0016900,
+	0x0000008e,
+	0x0000000f,
+	0xc0016900,
+	0x00000080,
+	0x00000000,
+	0xc0016900,
+	0x00000083,
+	0x0000ffff,
+	0xc0016900,
+	0x00000084,
+	0x00000000,
+	0xc0016900,
+	0x00000085,
+	0x20002000,
+	0xc0016900,
+	0x00000086,
+	0x00000000,
+	0xc0016900,
+	0x00000087,
+	0x20002000,
+	0xc0016900,
+	0x00000088,
+	0x00000000,
+	0xc0016900,
+	0x00000089,
+	0x20002000,
+	0xc0016900,
+	0x0000008a,
+	0x00000000,
+	0xc0016900,
+	0x0000008b,
+	0x20002000,
+	0xc0016900,
+	0x0000008c,
+	0x00000000,
+	0xc0016900,
+	0x00000094,
+	0x80000000,
+	0xc0016900,
+	0x00000095,
+	0x20002000,
+	0xc0026900,
+	0x000000b4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000096,
+	0x80000000,
+	0xc0016900,
+	0x00000097,
+	0x20002000,
+	0xc0026900,
+	0x000000b6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000098,
+	0x80000000,
+	0xc0016900,
+	0x00000099,
+	0x20002000,
+	0xc0026900,
+	0x000000b8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009a,
+	0x80000000,
+	0xc0016900,
+	0x0000009b,
+	0x20002000,
+	0xc0026900,
+	0x000000ba,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009c,
+	0x80000000,
+	0xc0016900,
+	0x0000009d,
+	0x20002000,
+	0xc0026900,
+	0x000000bc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009e,
+	0x80000000,
+	0xc0016900,
+	0x0000009f,
+	0x20002000,
+	0xc0026900,
+	0x000000be,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a0,
+	0x80000000,
+	0xc0016900,
+	0x000000a1,
+	0x20002000,
+	0xc0026900,
+	0x000000c0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a2,
+	0x80000000,
+	0xc0016900,
+	0x000000a3,
+	0x20002000,
+	0xc0026900,
+	0x000000c2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a4,
+	0x80000000,
+	0xc0016900,
+	0x000000a5,
+	0x20002000,
+	0xc0026900,
+	0x000000c4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a6,
+	0x80000000,
+	0xc0016900,
+	0x000000a7,
+	0x20002000,
+	0xc0026900,
+	0x000000c6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a8,
+	0x80000000,
+	0xc0016900,
+	0x000000a9,
+	0x20002000,
+	0xc0026900,
+	0x000000c8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000aa,
+	0x80000000,
+	0xc0016900,
+	0x000000ab,
+	0x20002000,
+	0xc0026900,
+	0x000000ca,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ac,
+	0x80000000,
+	0xc0016900,
+	0x000000ad,
+	0x20002000,
+	0xc0026900,
+	0x000000cc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ae,
+	0x80000000,
+	0xc0016900,
+	0x000000af,
+	0x20002000,
+	0xc0026900,
+	0x000000ce,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b0,
+	0x80000000,
+	0xc0016900,
+	0x000000b1,
+	0x20002000,
+	0xc0026900,
+	0x000000d0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b2,
+	0x80000000,
+	0xc0016900,
+	0x000000b3,
+	0x20002000,
+	0xc0026900,
+	0x000000d2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000293,
+	0x00004010,
+	0xc0016900,
+	0x00000300,
+	0x00000000,
+	0xc0016900,
+	0x00000301,
+	0x00000000,
+	0xc0016900,
+	0x00000312,
+	0xffffffff,
+	0xc0016900,
+	0x00000307,
+	0x00000000,
+	0xc0016900,
+	0x00000308,
+	0x00000000,
+	0xc0016900,
+	0x00000283,
+	0x00000000,
+	0xc0016900,
+	0x00000292,
+	0x00000000,
+	0xc0066900,
+	0x0000010f,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000000,
+	0xc0016900,
+	0x00000207,
+	0x00000000,
+	0xc0016900,
+	0x00000208,
+	0x00000000,
+	0xc0046900,
+	0x00000303,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0xc0016900,
+	0x00000205,
+	0x00000004,
+	0xc0016900,
+	0x00000280,
+	0x00000000,
+	0xc0016900,
+	0x00000281,
+	0x00000000,
+	0xc0016900,
+	0x0000037e,
+	0x00000000,
+	0xc0016900,
+	0x00000382,
+	0x00000000,
+	0xc0016900,
+	0x00000380,
+	0x00000000,
+	0xc0016900,
+	0x00000383,
+	0x00000000,
+	0xc0016900,
+	0x00000381,
+	0x00000000,
+	0xc0016900,
+	0x00000282,
+	0x00000008,
+	0xc0016900,
+	0x00000302,
+	0x0000002d,
+	0xc0016900,
+	0x0000037f,
+	0x00000000,
+	0xc0016900,
+	0x000001b2,
+	0x00000000,
+	0xc0016900,
+	0x000001b6,
+	0x00000000,
+	0xc0016900,
+	0x000001b7,
+	0x00000000,
+	0xc0016900,
+	0x000001b8,
+	0x00000000,
+	0xc0016900,
+	0x000001b9,
+	0x00000000,
+	0xc0016900,
+	0x00000225,
+	0x00000000,
+	0xc0016900,
+	0x00000229,
+	0x00000000,
+	0xc0016900,
+	0x00000237,
+	0x00000000,
+	0xc0016900,
+	0x00000100,
+	0x00000800,
+	0xc0016900,
+	0x00000101,
+	0x00000000,
+	0xc0016900,
+	0x00000102,
+	0x00000000,
+	0xc0016900,
+	0x000002a8,
+	0x00000000,
+	0xc0016900,
+	0x000002a9,
+	0x00000000,
+	0xc0016900,
+	0x00000103,
+	0x00000000,
+	0xc0016900,
+	0x00000284,
+	0x00000000,
+	0xc0016900,
+	0x00000290,
+	0x00000000,
+	0xc0016900,
+	0x00000285,
+	0x00000000,
+	0xc0016900,
+	0x00000286,
+	0x00000000,
+	0xc0016900,
+	0x00000287,
+	0x00000000,
+	0xc0016900,
+	0x00000288,
+	0x00000000,
+	0xc0016900,
+	0x00000289,
+	0x00000000,
+	0xc0016900,
+	0x0000028a,
+	0x00000000,
+	0xc0016900,
+	0x0000028b,
+	0x00000000,
+	0xc0016900,
+	0x0000028c,
+	0x00000000,
+	0xc0016900,
+	0x0000028d,
+	0x00000000,
+	0xc0016900,
+	0x0000028e,
+	0x00000000,
+	0xc0016900,
+	0x0000028f,
+	0x00000000,
+	0xc0016900,
+	0x000002a1,
+	0x00000000,
+	0xc0016900,
+	0x000002a5,
+	0x00000000,
+	0xc0016900,
+	0x000002ac,
+	0x00000000,
+	0xc0016900,
+	0x000002ad,
+	0x00000000,
+	0xc0016900,
+	0x000002ae,
+	0x00000000,
+	0xc0016900,
+	0x000002c8,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000100,
+	0xc0016900,
+	0x00000204,
+	0x00010000,
+	0xc0036e00,
+	0x00000000,
+	0x00000012,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x0000008f,
+	0x0000000f,
+	0xc0016900,
+	0x000001e8,
+	0x00000001,
+	0xc0016900,
+	0x00000202,
+	0x00cc0000,
+	0xc0016900,
+	0x00000205,
+	0x00000244,
+	0xc0016900,
+	0x00000203,
+	0x00000210,
+	0xc0016900,
+	0x000001b1,
+	0x00000000,
+	0xc0016900,
+	0x00000185,
+	0x00000000,
+	0xc0016900,
+	0x000001b3,
+	0x00000001,
+	0xc0016900,
+	0x000001b4,
+	0x00000000,
+	0xc0016900,
+	0x00000191,
+	0x00000b00,
+	0xc0016900,
+	0x000001b5,
+	0x00000000,
+};
+
+const u32 r7xx_default_state[] =
+{
+	0xc0012800,
+	0x80000000,
+	0x80000000,
+	0xc0004600,
+	0x00000016,
+	0xc0016800,
+	0x00000010,
+	0x00028000,
+	0xc0016800,
+	0x00000010,
+	0x00008000,
+	0xc0016800,
+	0x00000542,
+	0x07000002,
+	0xc0016800,
+	0x000005c5,
+	0x00000000,
+	0xc0016800,
+	0x00000363,
+	0x00004000,
+	0xc0016800,
+	0x0000060c,
+	0x00000000,
+	0xc0016800,
+	0x0000060e,
+	0x00420204,
+	0xc0016f00,
+	0x00000000,
+	0x00000000,
+	0xc0016f00,
+	0x00000001,
+	0x00000000,
+	0xc0096900,
+	0x0000022a,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000004,
+	0x00000000,
+	0xc0016900,
+	0x0000000a,
+	0x00000000,
+	0xc0016900,
+	0x0000000b,
+	0x00000000,
+	0xc0016900,
+	0x0000010c,
+	0x00000000,
+	0xc0016900,
+	0x0000010d,
+	0x00000000,
+	0xc0016900,
+	0x00000200,
+	0x00000000,
+	0xc0016900,
+	0x00000343,
+	0x00000060,
+	0xc0016900,
+	0x00000344,
+	0x00000000,
+	0xc0016900,
+	0x00000351,
+	0x0000aa00,
+	0xc0016900,
+	0x00000104,
+	0x00000000,
+	0xc0016900,
+	0x0000010e,
+	0x00000000,
+	0xc0046900,
+	0x00000105,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0046900,
+	0x0000030c,
+	0x01000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x0000008e,
+	0x0000000f,
+	0xc0016900,
+	0x00000080,
+	0x00000000,
+	0xc0016900,
+	0x00000083,
+	0x0000ffff,
+	0xc0016900,
+	0x00000084,
+	0x00000000,
+	0xc0016900,
+	0x00000085,
+	0x20002000,
+	0xc0016900,
+	0x00000086,
+	0x00000000,
+	0xc0016900,
+	0x00000087,
+	0x20002000,
+	0xc0016900,
+	0x00000088,
+	0x00000000,
+	0xc0016900,
+	0x00000089,
+	0x20002000,
+	0xc0016900,
+	0x0000008a,
+	0x00000000,
+	0xc0016900,
+	0x0000008b,
+	0x20002000,
+	0xc0016900,
+	0x0000008c,
+	0xaaaaaaaa,
+	0xc0016900,
+	0x00000094,
+	0x80000000,
+	0xc0016900,
+	0x00000095,
+	0x20002000,
+	0xc0026900,
+	0x000000b4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000096,
+	0x80000000,
+	0xc0016900,
+	0x00000097,
+	0x20002000,
+	0xc0026900,
+	0x000000b6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000098,
+	0x80000000,
+	0xc0016900,
+	0x00000099,
+	0x20002000,
+	0xc0026900,
+	0x000000b8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009a,
+	0x80000000,
+	0xc0016900,
+	0x0000009b,
+	0x20002000,
+	0xc0026900,
+	0x000000ba,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009c,
+	0x80000000,
+	0xc0016900,
+	0x0000009d,
+	0x20002000,
+	0xc0026900,
+	0x000000bc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x0000009e,
+	0x80000000,
+	0xc0016900,
+	0x0000009f,
+	0x20002000,
+	0xc0026900,
+	0x000000be,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a0,
+	0x80000000,
+	0xc0016900,
+	0x000000a1,
+	0x20002000,
+	0xc0026900,
+	0x000000c0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a2,
+	0x80000000,
+	0xc0016900,
+	0x000000a3,
+	0x20002000,
+	0xc0026900,
+	0x000000c2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a4,
+	0x80000000,
+	0xc0016900,
+	0x000000a5,
+	0x20002000,
+	0xc0026900,
+	0x000000c4,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a6,
+	0x80000000,
+	0xc0016900,
+	0x000000a7,
+	0x20002000,
+	0xc0026900,
+	0x000000c6,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000a8,
+	0x80000000,
+	0xc0016900,
+	0x000000a9,
+	0x20002000,
+	0xc0026900,
+	0x000000c8,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000aa,
+	0x80000000,
+	0xc0016900,
+	0x000000ab,
+	0x20002000,
+	0xc0026900,
+	0x000000ca,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ac,
+	0x80000000,
+	0xc0016900,
+	0x000000ad,
+	0x20002000,
+	0xc0026900,
+	0x000000cc,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000ae,
+	0x80000000,
+	0xc0016900,
+	0x000000af,
+	0x20002000,
+	0xc0026900,
+	0x000000ce,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b0,
+	0x80000000,
+	0xc0016900,
+	0x000000b1,
+	0x20002000,
+	0xc0026900,
+	0x000000d0,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x000000b2,
+	0x80000000,
+	0xc0016900,
+	0x000000b3,
+	0x20002000,
+	0xc0026900,
+	0x000000d2,
+	0x00000000,
+	0x3f800000,
+	0xc0016900,
+	0x00000293,
+	0x00514000,
+	0xc0016900,
+	0x00000300,
+	0x00000000,
+	0xc0016900,
+	0x00000301,
+	0x00000000,
+	0xc0016900,
+	0x00000312,
+	0xffffffff,
+	0xc0016900,
+	0x00000307,
+	0x00000000,
+	0xc0016900,
+	0x00000308,
+	0x00000000,
+	0xc0016900,
+	0x00000283,
+	0x00000000,
+	0xc0016900,
+	0x00000292,
+	0x00000000,
+	0xc0066900,
+	0x0000010f,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000000,
+	0xc0016900,
+	0x00000207,
+	0x00000000,
+	0xc0016900,
+	0x00000208,
+	0x00000000,
+	0xc0046900,
+	0x00000303,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0x3f800000,
+	0xc0016900,
+	0x00000205,
+	0x00000004,
+	0xc0016900,
+	0x00000280,
+	0x00000000,
+	0xc0016900,
+	0x00000281,
+	0x00000000,
+	0xc0016900,
+	0x0000037e,
+	0x00000000,
+	0xc0016900,
+	0x00000382,
+	0x00000000,
+	0xc0016900,
+	0x00000380,
+	0x00000000,
+	0xc0016900,
+	0x00000383,
+	0x00000000,
+	0xc0016900,
+	0x00000381,
+	0x00000000,
+	0xc0016900,
+	0x00000282,
+	0x00000008,
+	0xc0016900,
+	0x00000302,
+	0x0000002d,
+	0xc0016900,
+	0x0000037f,
+	0x00000000,
+	0xc0016900,
+	0x000001b2,
+	0x00000001,
+	0xc0016900,
+	0x000001b6,
+	0x00000000,
+	0xc0016900,
+	0x000001b7,
+	0x00000000,
+	0xc0016900,
+	0x000001b8,
+	0x00000000,
+	0xc0016900,
+	0x000001b9,
+	0x00000000,
+	0xc0016900,
+	0x00000225,
+	0x00000000,
+	0xc0016900,
+	0x00000229,
+	0x00000000,
+	0xc0016900,
+	0x00000237,
+	0x00000000,
+	0xc0016900,
+	0x00000100,
+	0x00000800,
+	0xc0016900,
+	0x00000101,
+	0x00000000,
+	0xc0016900,
+	0x00000102,
+	0x00000000,
+	0xc0016900,
+	0x000002a8,
+	0x00000000,
+	0xc0016900,
+	0x000002a9,
+	0x00000000,
+	0xc0016900,
+	0x00000103,
+	0x00000000,
+	0xc0016900,
+	0x00000284,
+	0x00000000,
+	0xc0016900,
+	0x00000290,
+	0x00000000,
+	0xc0016900,
+	0x00000285,
+	0x00000000,
+	0xc0016900,
+	0x00000286,
+	0x00000000,
+	0xc0016900,
+	0x00000287,
+	0x00000000,
+	0xc0016900,
+	0x00000288,
+	0x00000000,
+	0xc0016900,
+	0x00000289,
+	0x00000000,
+	0xc0016900,
+	0x0000028a,
+	0x00000000,
+	0xc0016900,
+	0x0000028b,
+	0x00000000,
+	0xc0016900,
+	0x0000028c,
+	0x00000000,
+	0xc0016900,
+	0x0000028d,
+	0x00000000,
+	0xc0016900,
+	0x0000028e,
+	0x00000000,
+	0xc0016900,
+	0x0000028f,
+	0x00000000,
+	0xc0016900,
+	0x000002a1,
+	0x00000000,
+	0xc0016900,
+	0x000002a5,
+	0x00000000,
+	0xc0016900,
+	0x000002ac,
+	0x00000000,
+	0xc0016900,
+	0x000002ad,
+	0x00000000,
+	0xc0016900,
+	0x000002ae,
+	0x00000000,
+	0xc0016900,
+	0x000002c8,
+	0x00000000,
+	0xc0016900,
+	0x00000206,
+	0x00000100,
+	0xc0016900,
+	0x00000204,
+	0x00010000,
+	0xc0036e00,
+	0x00000000,
+	0x00000012,
+	0x00000000,
+	0x00000000,
+	0xc0016900,
+	0x0000008f,
+	0x0000000f,
+	0xc0016900,
+	0x000001e8,
+	0x00000001,
+	0xc0016900,
+	0x00000202,
+	0x00cc0000,
+	0xc0016900,
+	0x00000205,
+	0x00000244,
+	0xc0016900,
+	0x00000203,
+	0x00000210,
+	0xc0016900,
+	0x000001b1,
+	0x00000000,
+	0xc0016900,
+	0x00000185,
+	0x00000000,
+	0xc0016900,
+	0x000001b3,
+	0x00000001,
+	0xc0016900,
+	0x000001b4,
+	0x00000000,
+	0xc0016900,
+	0x00000191,
+	0x00000b00,
+	0xc0016900,
+	0x000001b5,
+	0x00000000,
+};
+
+/* same for r6xx/r7xx */
+const u32 r6xx_vs[] =
+{
+	0x00000004,
+	0x81000000,
+	0x0000203c,
+	0x94000b08,
+	0x00004000,
+	0x14200b1a,
+	0x00000000,
+	0x00000000,
+	0x3c000000,
+	0x68cd1000,
+	0x00080000,
+	0x00000000,
+};
+
+const u32 r6xx_ps[] =
+{
+	0x00000002,
+	0x80800000,
+	0x00000000,
+	0x94200688,
+	0x00000010,
+	0x000d1000,
+	0xb0800000,
+	0x00000000,
+};
+
+const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps);
+const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs);
+const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state);
+const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state);
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h
new file mode 100644
index 0000000..fdc3b37
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h
@@ -0,0 +1,14 @@
+
+#ifndef R600_BLIT_SHADERS_H
+#define R600_BLIT_SHADERS_H
+
+extern const u32 r6xx_ps[];
+extern const u32 r6xx_vs[];
+extern const u32 r7xx_default_state[];
+extern const u32 r6xx_default_state[];
+
+
+extern const u32 r6xx_ps_size, r6xx_vs_size;
+extern const u32 r6xx_default_size, r7xx_default_size;
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 8327912..6d5a711 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -58,6 +58,12 @@
 MODULE_FIRMWARE("radeon/RV710_pfp.bin");
 MODULE_FIRMWARE("radeon/RV710_me.bin");
 
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+			unsigned family, u32 *ib, int *l);
+void r600_cs_legacy_init(void);
+
+
 # define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
 # define ATI_PCIGART_PAGE_MASK		(~(ATI_PCIGART_PAGE_SIZE-1))
 
@@ -1857,6 +1863,8 @@
 
 	DRM_DEBUG("\n");
 
+	mutex_init(&dev_priv->cs_mutex);
+	r600_cs_legacy_init();
 	/* if we require new memory map but we don't have it fail */
 	if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
 		DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
@@ -1888,7 +1896,7 @@
 	/* Enable vblank on CRTC1 for older X servers
 	 */
 	dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
-
+	dev_priv->do_boxes = 0;
 	dev_priv->cp_mode = init->cp_mode;
 
 	/* We don't support anything other than bus-mastering ring mode,
@@ -1974,11 +1982,11 @@
 	} else
 #endif
 	{
-		dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
+		dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
 		dev_priv->ring_rptr->handle =
-		    (void *)dev_priv->ring_rptr->offset;
+			(void *)(unsigned long)dev_priv->ring_rptr->offset;
 		dev->agp_buffer_map->handle =
-		    (void *)dev->agp_buffer_map->offset;
+			(void *)(unsigned long)dev->agp_buffer_map->offset;
 
 		DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
 			  dev_priv->cp_ring->handle);
@@ -2282,3 +2290,239 @@
 
 	return 0;
 }
+
+void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_master *master = file_priv->master;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
+	int nbox = sarea_priv->nbox;
+	struct drm_clip_rect *pbox = sarea_priv->boxes;
+	int i, cpp, src_pitch, dst_pitch;
+	uint64_t src, dst;
+	RING_LOCALS;
+	DRM_DEBUG("\n");
+
+	if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
+		cpp = 4;
+	else
+		cpp = 2;
+
+	if (sarea_priv->pfCurrentPage == 0) {
+		src_pitch = dev_priv->back_pitch;
+		dst_pitch = dev_priv->front_pitch;
+		src = dev_priv->back_offset + dev_priv->fb_location;
+		dst = dev_priv->front_offset + dev_priv->fb_location;
+	} else {
+		src_pitch = dev_priv->front_pitch;
+		dst_pitch = dev_priv->back_pitch;
+		src = dev_priv->front_offset + dev_priv->fb_location;
+		dst = dev_priv->back_offset + dev_priv->fb_location;
+	}
+
+	if (r600_prepare_blit_copy(dev, file_priv)) {
+		DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+		return;
+	}
+	for (i = 0; i < nbox; i++) {
+		int x = pbox[i].x1;
+		int y = pbox[i].y1;
+		int w = pbox[i].x2 - x;
+		int h = pbox[i].y2 - y;
+
+		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
+
+		r600_blit_swap(dev,
+			       src, dst,
+			       x, y, x, y, w, h,
+			       src_pitch, dst_pitch, cpp);
+	}
+	r600_done_blit_copy(dev);
+
+	/* Increment the frame counter.  The client-side 3D driver must
+	 * throttle the framerate by waiting for this value before
+	 * performing the swapbuffer ioctl.
+	 */
+	sarea_priv->last_frame++;
+
+	BEGIN_RING(3);
+	R600_FRAME_AGE(sarea_priv->last_frame);
+	ADVANCE_RING();
+}
+
+int r600_cp_dispatch_texture(struct drm_device *dev,
+			     struct drm_file *file_priv,
+			     drm_radeon_texture_t *tex,
+			     drm_radeon_tex_image_t *image)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_buf *buf;
+	u32 *buffer;
+	const u8 __user *data;
+	int size, pass_size;
+	u64 src_offset, dst_offset;
+
+	if (!radeon_check_offset(dev_priv, tex->offset)) {
+		DRM_ERROR("Invalid destination offset\n");
+		return -EINVAL;
+	}
+
+	/* this might fail for zero-sized uploads - are those illegal? */
+	if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
+		DRM_ERROR("Invalid final destination offset\n");
+		return -EINVAL;
+	}
+
+	size = tex->height * tex->pitch;
+
+	if (size == 0)
+		return 0;
+
+	dst_offset = tex->offset;
+
+	if (r600_prepare_blit_copy(dev, file_priv)) {
+		DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+		return -EAGAIN;
+	}
+	do {
+		data = (const u8 __user *)image->data;
+		pass_size = size;
+
+		buf = radeon_freelist_get(dev);
+		if (!buf) {
+			DRM_DEBUG("EAGAIN\n");
+			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
+				return -EFAULT;
+			return -EAGAIN;
+		}
+
+		if (pass_size > buf->total)
+			pass_size = buf->total;
+
+		/* Dispatch the indirect buffer.
+		 */
+		buffer =
+		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
+
+		if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {
+			DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
+			return -EFAULT;
+		}
+
+		buf->file_priv = file_priv;
+		buf->used = pass_size;
+		src_offset = dev_priv->gart_buffers_offset + buf->offset;
+
+		r600_blit_copy(dev, src_offset, dst_offset, pass_size);
+
+		radeon_cp_discard_buffer(dev, file_priv->master, buf);
+
+		/* Update the input parameters for next time */
+		image->data = (const u8 __user *)image->data + pass_size;
+		dst_offset += pass_size;
+		size -= pass_size;
+	} while (size > 0);
+	r600_done_blit_copy(dev);
+
+	return 0;
+}
+
+/*
+ * Legacy cs ioctl
+ */
+static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
+{
+	/* FIXME: check if wrap affect last reported wrap & sequence */
+	radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
+	if (!radeon->cs_id_scnt) {
+		/* increment wrap counter */
+		radeon->cs_id_wcnt += 0x01000000;
+		/* valid sequence counter start at 1 */
+		radeon->cs_id_scnt = 1;
+	}
+	return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
+}
+
+static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
+{
+	RING_LOCALS;
+
+	*id = radeon_cs_id_get(dev_priv);
+
+	/* SCRATCH 2 */
+	BEGIN_RING(3);
+	R600_CLEAR_AGE(*id);
+	ADVANCE_RING();
+	COMMIT_RING();
+}
+
+static int r600_ib_get(struct drm_device *dev,
+			struct drm_file *fpriv,
+			struct drm_buf **buffer)
+{
+	struct drm_buf *buf;
+
+	*buffer = NULL;
+	buf = radeon_freelist_get(dev);
+	if (!buf) {
+		return -EBUSY;
+	}
+	buf->file_priv = fpriv;
+	*buffer = buf;
+	return 0;
+}
+
+static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
+			struct drm_file *fpriv, int l, int r)
+{
+	drm_radeon_private_t *dev_priv = dev->dev_private;
+
+	if (buf) {
+		if (!r)
+			r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
+		radeon_cp_discard_buffer(dev, fpriv->master, buf);
+		COMMIT_RING();
+	}
+}
+
+int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
+{
+	struct drm_radeon_private *dev_priv = dev->dev_private;
+	struct drm_radeon_cs *cs = data;
+	struct drm_buf *buf;
+	unsigned family;
+	int l, r = 0;
+	u32 *ib, cs_id = 0;
+
+	if (dev_priv == NULL) {
+		DRM_ERROR("called with no initialization\n");
+		return -EINVAL;
+	}
+	family = dev_priv->flags & RADEON_FAMILY_MASK;
+	if (family < CHIP_R600) {
+		DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
+		return -EINVAL;
+	}
+	mutex_lock(&dev_priv->cs_mutex);
+	/* get ib */
+	r = r600_ib_get(dev, fpriv, &buf);
+	if (r) {
+		DRM_ERROR("ib_get failed\n");
+		goto out;
+	}
+	ib = dev->agp_buffer_map->handle + buf->offset;
+	/* now parse command stream */
+	r = r600_cs_legacy(dev, data,  fpriv, family, ib, &l);
+	if (r) {
+		goto out;
+	}
+
+out:
+	r600_ib_free(dev, buf, fpriv, l, r);
+	/* emit cs id sequence */
+	r600_cs_id_emit(dev_priv, &cs_id);
+	cs->cs_id = cs_id;
+	mutex_unlock(&dev_priv->cs_mutex);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
new file mode 100644
index 0000000..39bf634
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc);
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc);
+typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
+static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
+
+/**
+ * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
+ * @parser:	parser structure holding parsing context.
+ * @pkt:	where to store packet informations
+ *
+ * Assume that chunk_ib_index is properly set. Will return -EINVAL
+ * if packet is bigger than remaining ib size. or if packets is unknown.
+ **/
+int r600_cs_packet_parse(struct radeon_cs_parser *p,
+			struct radeon_cs_packet *pkt,
+			unsigned idx)
+{
+	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+	uint32_t header;
+
+	if (idx >= ib_chunk->length_dw) {
+		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+			  idx, ib_chunk->length_dw);
+		return -EINVAL;
+	}
+	header = ib_chunk->kdata[idx];
+	pkt->idx = idx;
+	pkt->type = CP_PACKET_GET_TYPE(header);
+	pkt->count = CP_PACKET_GET_COUNT(header);
+	pkt->one_reg_wr = 0;
+	switch (pkt->type) {
+	case PACKET_TYPE0:
+		pkt->reg = CP_PACKET0_GET_REG(header);
+		break;
+	case PACKET_TYPE3:
+		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
+		break;
+	case PACKET_TYPE2:
+		pkt->count = -1;
+		break;
+	default:
+		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
+		return -EINVAL;
+	}
+	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
+		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
+			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3
+ * @parser:		parser structure holding parsing context.
+ * @data:		pointer to relocation data
+ * @offset_start:	starting offset
+ * @offset_mask:	offset mask (to align start offset on)
+ * @reloc:		reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_chunk *relocs_chunk;
+	struct radeon_cs_packet p3reloc;
+	unsigned idx;
+	int r;
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+	*cs_reloc = NULL;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+	if (r) {
+		return r;
+	}
+	p->idx += p3reloc.count + 2;
+	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+			  p3reloc.idx);
+		return -EINVAL;
+	}
+	idx = ib_chunk->kdata[p3reloc.idx + 1];
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+	/* FIXME: we assume reloc size is 4 dwords */
+	*cs_reloc = p->relocs_ptr[(idx / 4)];
+	return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3
+ * @parser:		parser structure holding parsing context.
+ * @data:		pointer to relocation data
+ * @offset_start:	starting offset
+ * @offset_mask:	offset mask (to align start offset on)
+ * @reloc:		reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+					struct radeon_cs_reloc **cs_reloc)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_chunk *relocs_chunk;
+	struct radeon_cs_packet p3reloc;
+	unsigned idx;
+	int r;
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+	*cs_reloc = NULL;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+	if (r) {
+		return r;
+	}
+	p->idx += p3reloc.count + 2;
+	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+			  p3reloc.idx);
+		return -EINVAL;
+	}
+	idx = ib_chunk->kdata[p3reloc.idx + 1];
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+	*cs_reloc = &p->relocs[0];
+	(*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32;
+	(*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
+	return 0;
+}
+
+static int r600_packet0_check(struct radeon_cs_parser *p,
+				struct radeon_cs_packet *pkt,
+				unsigned idx, unsigned reg)
+{
+	switch (reg) {
+	case AVIVO_D1MODE_VLINE_START_END:
+	case AVIVO_D2MODE_VLINE_START_END:
+		break;
+	default:
+		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
+		       reg, idx);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
+				struct radeon_cs_packet *pkt)
+{
+	unsigned reg, i;
+	unsigned idx;
+	int r;
+
+	idx = pkt->idx + 1;
+	reg = pkt->reg;
+	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
+		r = r600_packet0_check(p, pkt, idx, reg);
+		if (r) {
+			return r;
+		}
+	}
+	return 0;
+}
+
+static int r600_packet3_check(struct radeon_cs_parser *p,
+				struct radeon_cs_packet *pkt)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_reloc *reloc;
+	volatile u32 *ib;
+	unsigned idx;
+	unsigned i;
+	unsigned start_reg, end_reg, reg;
+	int r;
+
+	ib = p->ib->ptr;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	idx = pkt->idx + 1;
+	switch (pkt->opcode) {
+	case PACKET3_START_3D_CMDBUF:
+		if (p->family >= CHIP_RV770 || pkt->count) {
+			DRM_ERROR("bad START_3D\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_CONTEXT_CONTROL:
+		if (pkt->count != 1) {
+			DRM_ERROR("bad CONTEXT_CONTROL\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_INDEX_TYPE:
+	case PACKET3_NUM_INSTANCES:
+		if (pkt->count) {
+			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_DRAW_INDEX:
+		if (pkt->count != 3) {
+			DRM_ERROR("bad DRAW_INDEX\n");
+			return -EINVAL;
+		}
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("bad DRAW_INDEX\n");
+			return -EINVAL;
+		}
+		ib[idx+0] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+		ib[idx+1] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		break;
+	case PACKET3_DRAW_INDEX_AUTO:
+		if (pkt->count != 1) {
+			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_DRAW_INDEX_IMMD_BE:
+	case PACKET3_DRAW_INDEX_IMMD:
+		if (pkt->count < 2) {
+			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_WAIT_REG_MEM:
+		if (pkt->count != 5) {
+			DRM_ERROR("bad WAIT_REG_MEM\n");
+			return -EINVAL;
+		}
+		/* bit 4 is reg (0) or mem (1) */
+		if (ib_chunk->kdata[idx+0] & 0x10) {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad WAIT_REG_MEM\n");
+				return -EINVAL;
+			}
+			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+			ib[idx+2] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		}
+		break;
+	case PACKET3_SURFACE_SYNC:
+		if (pkt->count != 3) {
+			DRM_ERROR("bad SURFACE_SYNC\n");
+			return -EINVAL;
+		}
+		/* 0xffffffff/0x0 is flush all cache flag */
+		if (ib_chunk->kdata[idx+1] != 0xffffffff ||
+		    ib_chunk->kdata[idx+2] != 0) {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad SURFACE_SYNC\n");
+				return -EINVAL;
+			}
+			ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		}
+		break;
+	case PACKET3_EVENT_WRITE:
+		if (pkt->count != 2 && pkt->count != 0) {
+			DRM_ERROR("bad EVENT_WRITE\n");
+			return -EINVAL;
+		}
+		if (pkt->count) {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("bad EVENT_WRITE\n");
+				return -EINVAL;
+			}
+			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+			ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		}
+		break;
+	case PACKET3_EVENT_WRITE_EOP:
+		if (pkt->count != 4) {
+			DRM_ERROR("bad EVENT_WRITE_EOP\n");
+			return -EINVAL;
+		}
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("bad EVENT_WRITE\n");
+			return -EINVAL;
+		}
+		ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+		ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		break;
+	case PACKET3_SET_CONFIG_REG:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONFIG_REG_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) ||
+		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
+		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
+			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < pkt->count; i++) {
+			reg = start_reg + (4 * i);
+			switch (reg) {
+			case CP_COHER_BASE:
+				/* use PACKET3_SURFACE_SYNC */
+				return -EINVAL;
+			default:
+				break;
+			}
+		}
+		break;
+	case PACKET3_SET_CONTEXT_REG:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONTEXT_REG_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) ||
+		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
+		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
+			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < pkt->count; i++) {
+			reg = start_reg + (4 * i);
+			switch (reg) {
+			case DB_DEPTH_BASE:
+			case CB_COLOR0_BASE:
+			case CB_COLOR1_BASE:
+			case CB_COLOR2_BASE:
+			case CB_COLOR3_BASE:
+			case CB_COLOR4_BASE:
+			case CB_COLOR5_BASE:
+			case CB_COLOR6_BASE:
+			case CB_COLOR7_BASE:
+			case SQ_PGM_START_FS:
+			case SQ_PGM_START_ES:
+			case SQ_PGM_START_VS:
+			case SQ_PGM_START_GS:
+			case SQ_PGM_START_PS:
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_CONTEXT_REG "
+							"0x%04X\n", reg);
+					return -EINVAL;
+				}
+				ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				break;
+			case VGT_DMA_BASE:
+			case VGT_DMA_BASE_HI:
+				/* These should be handled by DRAW_INDEX packet 3 */
+			case VGT_STRMOUT_BASE_OFFSET_0:
+			case VGT_STRMOUT_BASE_OFFSET_1:
+			case VGT_STRMOUT_BASE_OFFSET_2:
+			case VGT_STRMOUT_BASE_OFFSET_3:
+			case VGT_STRMOUT_BASE_OFFSET_HI_0:
+			case VGT_STRMOUT_BASE_OFFSET_HI_1:
+			case VGT_STRMOUT_BASE_OFFSET_HI_2:
+			case VGT_STRMOUT_BASE_OFFSET_HI_3:
+			case VGT_STRMOUT_BUFFER_BASE_0:
+			case VGT_STRMOUT_BUFFER_BASE_1:
+			case VGT_STRMOUT_BUFFER_BASE_2:
+			case VGT_STRMOUT_BUFFER_BASE_3:
+			case VGT_STRMOUT_BUFFER_OFFSET_0:
+			case VGT_STRMOUT_BUFFER_OFFSET_1:
+			case VGT_STRMOUT_BUFFER_OFFSET_2:
+			case VGT_STRMOUT_BUFFER_OFFSET_3:
+				/* These should be handled by STRMOUT_BUFFER packet 3 */
+				DRM_ERROR("bad context reg: 0x%08x\n", reg);
+				return -EINVAL;
+			default:
+				break;
+			}
+		}
+		break;
+	case PACKET3_SET_RESOURCE:
+		if (pkt->count % 7) {
+			DRM_ERROR("bad SET_RESOURCE\n");
+			return -EINVAL;
+		}
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_RESOURCE_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) ||
+		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
+		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
+			DRM_ERROR("bad SET_RESOURCE\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < (pkt->count / 7); i++) {
+			switch (G__SQ_VTX_CONSTANT_TYPE(ib[idx+(i*7)+6+1])) {
+			case SQ_TEX_VTX_VALID_TEXTURE:
+				/* tex base */
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_RESOURCE\n");
+					return -EINVAL;
+				}
+				ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				/* tex mip base */
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_RESOURCE\n");
+					return -EINVAL;
+				}
+				ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				break;
+			case SQ_TEX_VTX_VALID_BUFFER:
+				/* vtx base */
+				r = r600_cs_packet_next_reloc(p, &reloc);
+				if (r) {
+					DRM_ERROR("bad SET_RESOURCE\n");
+					return -EINVAL;
+				}
+				ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
+				ib[idx+1+(i*7)+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+				break;
+			case SQ_TEX_VTX_INVALID_TEXTURE:
+			case SQ_TEX_VTX_INVALID_BUFFER:
+			default:
+				DRM_ERROR("bad SET_RESOURCE\n");
+				return -EINVAL;
+			}
+		}
+		break;
+	case PACKET3_SET_ALU_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_ALU_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_ALU_CONST_END) ||
+		    (end_reg >= PACKET3_SET_ALU_CONST_END)) {
+			DRM_ERROR("bad SET_ALU_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_BOOL_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_BOOL_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
+		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
+			DRM_ERROR("bad SET_BOOL_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_LOOP_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_LOOP_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
+		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
+			DRM_ERROR("bad SET_LOOP_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_CTL_CONST:
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_CTL_CONST_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) ||
+		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
+		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
+			DRM_ERROR("bad SET_CTL_CONST\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_SAMPLER:
+		if (pkt->count % 3) {
+			DRM_ERROR("bad SET_SAMPLER\n");
+			return -EINVAL;
+		}
+		start_reg = (ib[idx+0] << 2) + PACKET3_SET_SAMPLER_OFFSET;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) ||
+		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
+		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
+			DRM_ERROR("bad SET_SAMPLER\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_SURFACE_BASE_UPDATE:
+		if (p->family >= CHIP_RV770 || p->family == CHIP_R600) {
+			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+			return -EINVAL;
+		}
+		if (pkt->count) {
+			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+			return -EINVAL;
+		}
+		break;
+	case PACKET3_NOP:
+		break;
+	default:
+		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int r600_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_packet pkt;
+	int r;
+
+	do {
+		r = r600_cs_packet_parse(p, &pkt, p->idx);
+		if (r) {
+			return r;
+		}
+		p->idx += pkt.count + 2;
+		switch (pkt.type) {
+		case PACKET_TYPE0:
+			r = r600_cs_parse_packet0(p, &pkt);
+			break;
+		case PACKET_TYPE2:
+			break;
+		case PACKET_TYPE3:
+			r = r600_packet3_check(p, &pkt);
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+			return -EINVAL;
+		}
+		if (r) {
+			return r;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+	for (r = 0; r < p->ib->length_dw; r++) {
+		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib->ptr[r]);
+		mdelay(1);
+	}
+#endif
+	return 0;
+}
+
+static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
+{
+	if (p->chunk_relocs_idx == -1) {
+		return 0;
+	}
+	p->relocs = kcalloc(1, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+	if (p->relocs == NULL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser:	parser structure holding parsing context.
+ * @error:	error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
+{
+	unsigned i;
+
+	kfree(parser->relocs);
+	for (i = 0; i < parser->nchunks; i++) {
+		kfree(parser->chunks[i].kdata);
+	}
+	kfree(parser->chunks);
+	kfree(parser->chunks_array);
+}
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+			unsigned family, u32 *ib, int *l)
+{
+	struct radeon_cs_parser parser;
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_ib	fake_ib;
+	int r;
+
+	/* initialize parser */
+	memset(&parser, 0, sizeof(struct radeon_cs_parser));
+	parser.filp = filp;
+	parser.rdev = NULL;
+	parser.family = family;
+	parser.ib = &fake_ib;
+	fake_ib.ptr = ib;
+	r = radeon_cs_parser_init(&parser, data);
+	if (r) {
+		DRM_ERROR("Failed to initialize parser !\n");
+		r600_cs_parser_fini(&parser, r);
+		return r;
+	}
+	r = r600_cs_parser_relocs_legacy(&parser);
+	if (r) {
+		DRM_ERROR("Failed to parse relocation !\n");
+		r600_cs_parser_fini(&parser, r);
+		return r;
+	}
+	/* Copy the packet into the IB, the parser will read from the
+	 * input memory (cached) and write to the IB (which can be
+	 * uncached). */
+	ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+	parser.ib->length_dw = ib_chunk->length_dw;
+	memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4);
+	*l = parser.ib->length_dw;
+	r = r600_cs_parse(&parser);
+	if (r) {
+		DRM_ERROR("Invalid command stream !\n");
+		r600_cs_parser_fini(&parser, r);
+		return r;
+	}
+	r600_cs_parser_fini(&parser, r);
+	return r;
+}
+
+void r600_cs_legacy_init(void)
+{
+	r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
+}
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
new file mode 100644
index 0000000..723295f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -0,0 +1,661 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef R600D_H
+#define R600D_H
+
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define R6XX_MAX_SH_GPRS			256
+#define R6XX_MAX_TEMP_GPRS			16
+#define R6XX_MAX_SH_THREADS			256
+#define R6XX_MAX_SH_STACK_ENTRIES		4096
+#define R6XX_MAX_BACKENDS			8
+#define R6XX_MAX_BACKENDS_MASK			0xff
+#define R6XX_MAX_SIMDS				8
+#define R6XX_MAX_SIMDS_MASK			0xff
+#define R6XX_MAX_PIPES				8
+#define R6XX_MAX_PIPES_MASK			0xff
+
+/* PTE flags */
+#define PTE_VALID				(1 << 0)
+#define PTE_SYSTEM				(1 << 1)
+#define PTE_SNOOPED				(1 << 2)
+#define PTE_READABLE				(1 << 5)
+#define PTE_WRITEABLE				(1 << 6)
+
+/* Registers */
+#define	ARB_POP						0x2418
+#define 	ENABLE_TC128					(1 << 30)
+#define	ARB_GDEC_RD_CNTL				0x246C
+
+#define	CC_GC_SHADER_PIPE_CONFIG			0x8950
+#define	CC_RB_BACKEND_DISABLE				0x98F4
+#define		BACKEND_DISABLE(x)				((x) << 16)
+
+#define	CB_COLOR0_BASE					0x28040
+#define	CB_COLOR1_BASE					0x28044
+#define	CB_COLOR2_BASE					0x28048
+#define	CB_COLOR3_BASE					0x2804C
+#define	CB_COLOR4_BASE					0x28050
+#define	CB_COLOR5_BASE					0x28054
+#define	CB_COLOR6_BASE					0x28058
+#define	CB_COLOR7_BASE					0x2805C
+#define	CB_COLOR7_FRAG					0x280FC
+
+#define CB_COLOR0_SIZE                                  0x28060
+#define CB_COLOR0_VIEW                                  0x28080
+#define CB_COLOR0_INFO                                  0x280a0
+#define CB_COLOR0_TILE                                  0x280c0
+#define CB_COLOR0_FRAG                                  0x280e0
+#define CB_COLOR0_MASK                                  0x28100
+
+#define	CONFIG_MEMSIZE					0x5428
+#define	CP_STAT						0x8680
+#define	CP_COHER_BASE					0x85F8
+#define	CP_DEBUG					0xC1FC
+#define	R_0086D8_CP_ME_CNTL			0x86D8
+#define		S_0086D8_CP_ME_HALT(x)			(((x) & 1)<<28)
+#define		C_0086D8_CP_ME_HALT(x)			((x) & 0xEFFFFFFF)
+#define	CP_ME_RAM_DATA					0xC160
+#define	CP_ME_RAM_RADDR					0xC158
+#define	CP_ME_RAM_WADDR					0xC15C
+#define CP_MEQ_THRESHOLDS				0x8764
+#define		MEQ_END(x)					((x) << 16)
+#define		ROQ_END(x)					((x) << 24)
+#define	CP_PERFMON_CNTL					0x87FC
+#define	CP_PFP_UCODE_ADDR				0xC150
+#define	CP_PFP_UCODE_DATA				0xC154
+#define	CP_QUEUE_THRESHOLDS				0x8760
+#define		ROQ_IB1_START(x)				((x) << 0)
+#define		ROQ_IB2_START(x)				((x) << 8)
+#define	CP_RB_BASE					0xC100
+#define	CP_RB_CNTL					0xC104
+#define		RB_BUFSZ(x)					((x)<<0)
+#define		RB_BLKSZ(x)					((x)<<8)
+#define		RB_NO_UPDATE					(1<<27)
+#define		RB_RPTR_WR_ENA					(1<<31)
+#define		BUF_SWAP_32BIT					(2 << 16)
+#define	CP_RB_RPTR					0x8700
+#define	CP_RB_RPTR_ADDR					0xC10C
+#define	CP_RB_RPTR_ADDR_HI				0xC110
+#define	CP_RB_RPTR_WR					0xC108
+#define	CP_RB_WPTR					0xC114
+#define	CP_RB_WPTR_ADDR					0xC118
+#define	CP_RB_WPTR_ADDR_HI				0xC11C
+#define	CP_RB_WPTR_DELAY				0x8704
+#define	CP_ROQ_IB1_STAT					0x8784
+#define	CP_ROQ_IB2_STAT					0x8788
+#define	CP_SEM_WAIT_TIMER				0x85BC
+
+#define	DB_DEBUG					0x9830
+#define		PREZ_MUST_WAIT_FOR_POSTZ_DONE			(1 << 31)
+#define	DB_DEPTH_BASE					0x2800C
+#define	DB_WATERMARKS					0x9838
+#define		DEPTH_FREE(x)					((x) << 0)
+#define		DEPTH_FLUSH(x)					((x) << 5)
+#define		DEPTH_PENDING_FREE(x)				((x) << 15)
+#define		DEPTH_CACHELINE_FREE(x)				((x) << 20)
+
+#define	DCP_TILING_CONFIG				0x6CA0
+#define		PIPE_TILING(x)					((x) << 1)
+#define 	BANK_TILING(x)					((x) << 4)
+#define		GROUP_SIZE(x)					((x) << 6)
+#define		ROW_TILING(x)					((x) << 8)
+#define		BANK_SWAPS(x)					((x) << 11)
+#define		SAMPLE_SPLIT(x)					((x) << 14)
+#define		BACKEND_MAP(x)					((x) << 16)
+
+#define GB_TILING_CONFIG				0x98F0
+
+#define	GC_USER_SHADER_PIPE_CONFIG			0x8954
+#define		INACTIVE_QD_PIPES(x)				((x) << 8)
+#define		INACTIVE_QD_PIPES_MASK				0x0000FF00
+#define		INACTIVE_SIMDS(x)				((x) << 16)
+#define		INACTIVE_SIMDS_MASK				0x00FF0000
+
+#define SQ_CONFIG                                         0x8c00
+#       define VC_ENABLE                                  (1 << 0)
+#       define EXPORT_SRC_C                               (1 << 1)
+#       define DX9_CONSTS                                 (1 << 2)
+#       define ALU_INST_PREFER_VECTOR                     (1 << 3)
+#       define DX10_CLAMP                                 (1 << 4)
+#       define CLAUSE_SEQ_PRIO(x)                         ((x) << 8)
+#       define PS_PRIO(x)                                 ((x) << 24)
+#       define VS_PRIO(x)                                 ((x) << 26)
+#       define GS_PRIO(x)                                 ((x) << 28)
+#       define ES_PRIO(x)                                 ((x) << 30)
+#define SQ_GPR_RESOURCE_MGMT_1                            0x8c04
+#       define NUM_PS_GPRS(x)                             ((x) << 0)
+#       define NUM_VS_GPRS(x)                             ((x) << 16)
+#       define NUM_CLAUSE_TEMP_GPRS(x)                    ((x) << 28)
+#define SQ_GPR_RESOURCE_MGMT_2                            0x8c08
+#       define NUM_GS_GPRS(x)                             ((x) << 0)
+#       define NUM_ES_GPRS(x)                             ((x) << 16)
+#define SQ_THREAD_RESOURCE_MGMT                           0x8c0c
+#       define NUM_PS_THREADS(x)                          ((x) << 0)
+#       define NUM_VS_THREADS(x)                          ((x) << 8)
+#       define NUM_GS_THREADS(x)                          ((x) << 16)
+#       define NUM_ES_THREADS(x)                          ((x) << 24)
+#define SQ_STACK_RESOURCE_MGMT_1                          0x8c10
+#       define NUM_PS_STACK_ENTRIES(x)                    ((x) << 0)
+#       define NUM_VS_STACK_ENTRIES(x)                    ((x) << 16)
+#define SQ_STACK_RESOURCE_MGMT_2                          0x8c14
+#       define NUM_GS_STACK_ENTRIES(x)                    ((x) << 0)
+#       define NUM_ES_STACK_ENTRIES(x)                    ((x) << 16)
+
+#define GRBM_CNTL                                       0x8000
+#       define GRBM_READ_TIMEOUT(x)                     ((x) << 0)
+#define	GRBM_STATUS					0x8010
+#define		CMDFIFO_AVAIL_MASK				0x0000001F
+#define		GUI_ACTIVE					(1<<31)
+#define	GRBM_STATUS2					0x8014
+#define	GRBM_SOFT_RESET					0x8020
+#define		SOFT_RESET_CP					(1<<0)
+
+#define	HDP_HOST_PATH_CNTL				0x2C00
+#define	HDP_NONSURFACE_BASE				0x2C04
+#define	HDP_NONSURFACE_INFO				0x2C08
+#define	HDP_NONSURFACE_SIZE				0x2C0C
+#define HDP_REG_COHERENCY_FLUSH_CNTL			0x54A0
+#define	HDP_TILING_CONFIG				0x2F3C
+
+#define MC_VM_AGP_TOP					0x2184
+#define MC_VM_AGP_BOT					0x2188
+#define	MC_VM_AGP_BASE					0x218C
+#define MC_VM_FB_LOCATION				0x2180
+#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
+#define 	ENABLE_L1_TLB					(1 << 0)
+#define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
+#define		ENABLE_L1_STRICT_ORDERING			(1 << 2)
+#define		SYSTEM_ACCESS_MODE_MASK				0x000000C0
+#define		SYSTEM_ACCESS_MODE_SHIFT			6
+#define		SYSTEM_ACCESS_MODE_PA_ONLY			(0 << 6)
+#define		SYSTEM_ACCESS_MODE_USE_SYS_MAP			(1 << 6)
+#define		SYSTEM_ACCESS_MODE_IN_SYS			(2 << 6)
+#define		SYSTEM_ACCESS_MODE_NOT_IN_SYS			(3 << 6)
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 8)
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE	(1 << 8)
+#define		ENABLE_SEMAPHORE_MODE				(1 << 10)
+#define		ENABLE_WAIT_L2_QUERY				(1 << 11)
+#define		EFFECTIVE_L1_TLB_SIZE(x)			(((x) & 7) << 12)
+#define		EFFECTIVE_L1_TLB_SIZE_MASK			0x00007000
+#define		EFFECTIVE_L1_TLB_SIZE_SHIFT			12
+#define		EFFECTIVE_L1_QUEUE_SIZE(x)			(((x) & 7) << 15)
+#define		EFFECTIVE_L1_QUEUE_SIZE_MASK			0x00038000
+#define		EFFECTIVE_L1_QUEUE_SIZE_SHIFT			15
+#define MC_VM_L1_TLB_MCD_RD_B_CNTL			0x21A0
+#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL			0x21FC
+#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL			0x2204
+#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL			0x2208
+#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL			0x220C
+#define	MC_VM_L1_TLB_MCB_RD_SYS_CNTL			0x2200
+#define MC_VM_L1_TLB_MCD_WR_A_CNTL			0x21A4
+#define MC_VM_L1_TLB_MCD_WR_B_CNTL			0x21A8
+#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL			0x2210
+#define MC_VM_L1_TLB_MCB_WR_HDP_CNTL			0x2218
+#define MC_VM_L1_TLB_MCB_WR_PDMA_CNTL			0x221C
+#define MC_VM_L1_TLB_MCB_WR_SEM_CNTL			0x2220
+#define MC_VM_L1_TLB_MCB_WR_SYS_CNTL			0x2214
+#define MC_VM_SYSTEM_APERTURE_LOW_ADDR			0x2190
+#define		LOGICAL_PAGE_NUMBER_MASK			0x000FFFFF
+#define		LOGICAL_PAGE_NUMBER_SHIFT			0
+#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2194
+#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x2198
+
+#define	PA_CL_ENHANCE					0x8A14
+#define		CLIP_VTX_REORDER_ENA				(1 << 0)
+#define		NUM_CLIP_SEQ(x)					((x) << 1)
+#define PA_SC_AA_CONFIG					0x28C04
+#define	PA_SC_AA_SAMPLE_LOCS_2S				0x8B40
+#define	PA_SC_AA_SAMPLE_LOCS_4S				0x8B44
+#define	PA_SC_AA_SAMPLE_LOCS_8S_WD0			0x8B48
+#define	PA_SC_AA_SAMPLE_LOCS_8S_WD1			0x8B4C
+#define		S0_X(x)						((x) << 0)
+#define		S0_Y(x)						((x) << 4)
+#define		S1_X(x)						((x) << 8)
+#define		S1_Y(x)						((x) << 12)
+#define		S2_X(x)						((x) << 16)
+#define		S2_Y(x)						((x) << 20)
+#define		S3_X(x)						((x) << 24)
+#define		S3_Y(x)						((x) << 28)
+#define		S4_X(x)						((x) << 0)
+#define		S4_Y(x)						((x) << 4)
+#define		S5_X(x)						((x) << 8)
+#define		S5_Y(x)						((x) << 12)
+#define		S6_X(x)						((x) << 16)
+#define		S6_Y(x)						((x) << 20)
+#define		S7_X(x)						((x) << 24)
+#define		S7_Y(x)						((x) << 28)
+#define PA_SC_CLIPRECT_RULE				0x2820c
+#define	PA_SC_ENHANCE					0x8BF0
+#define		FORCE_EOV_MAX_CLK_CNT(x)			((x) << 0)
+#define		FORCE_EOV_MAX_TILE_CNT(x)			((x) << 12)
+#define PA_SC_LINE_STIPPLE				0x28A0C
+#define	PA_SC_LINE_STIPPLE_STATE			0x8B10
+#define PA_SC_MODE_CNTL					0x28A4C
+#define	PA_SC_MULTI_CHIP_CNTL				0x8B20
+
+#define PA_SC_SCREEN_SCISSOR_TL                         0x28030
+#define PA_SC_GENERIC_SCISSOR_TL                        0x28240
+#define PA_SC_WINDOW_SCISSOR_TL                         0x28204
+
+#define	PCIE_PORT_INDEX					0x0038
+#define	PCIE_PORT_DATA					0x003C
+
+#define RAMCFG						0x2408
+#define		NOOFBANK_SHIFT					0
+#define		NOOFBANK_MASK					0x00000001
+#define		NOOFRANK_SHIFT					1
+#define		NOOFRANK_MASK					0x00000002
+#define		NOOFROWS_SHIFT					2
+#define		NOOFROWS_MASK					0x0000001C
+#define		NOOFCOLS_SHIFT					5
+#define		NOOFCOLS_MASK					0x00000060
+#define		CHANSIZE_SHIFT					7
+#define		CHANSIZE_MASK					0x00000080
+#define		BURSTLENGTH_SHIFT				8
+#define		BURSTLENGTH_MASK				0x00000100
+#define		CHANSIZE_OVERRIDE				(1 << 10)
+
+#define	SCRATCH_REG0					0x8500
+#define	SCRATCH_REG1					0x8504
+#define	SCRATCH_REG2					0x8508
+#define	SCRATCH_REG3					0x850C
+#define	SCRATCH_REG4					0x8510
+#define	SCRATCH_REG5					0x8514
+#define	SCRATCH_REG6					0x8518
+#define	SCRATCH_REG7					0x851C
+#define	SCRATCH_UMSK					0x8540
+#define	SCRATCH_ADDR					0x8544
+
+#define	SPI_CONFIG_CNTL					0x9100
+#define		GPR_WRITE_PRIORITY(x)				((x) << 0)
+#define		DISABLE_INTERP_1				(1 << 5)
+#define	SPI_CONFIG_CNTL_1				0x913C
+#define		VTX_DONE_DELAY(x)				((x) << 0)
+#define		INTERP_ONE_PRIM_PER_ROW				(1 << 4)
+#define	SPI_INPUT_Z					0x286D8
+#define	SPI_PS_IN_CONTROL_0				0x286CC
+#define		NUM_INTERP(x)					((x)<<0)
+#define		POSITION_ENA					(1<<8)
+#define		POSITION_CENTROID				(1<<9)
+#define		POSITION_ADDR(x)				((x)<<10)
+#define		PARAM_GEN(x)					((x)<<15)
+#define		PARAM_GEN_ADDR(x)				((x)<<19)
+#define		BARYC_SAMPLE_CNTL(x)				((x)<<26)
+#define		PERSP_GRADIENT_ENA				(1<<28)
+#define		LINEAR_GRADIENT_ENA				(1<<29)
+#define		POSITION_SAMPLE					(1<<30)
+#define		BARYC_AT_SAMPLE_ENA				(1<<31)
+#define	SPI_PS_IN_CONTROL_1				0x286D0
+#define		GEN_INDEX_PIX					(1<<0)
+#define		GEN_INDEX_PIX_ADDR(x)				((x)<<1)
+#define		FRONT_FACE_ENA					(1<<8)
+#define		FRONT_FACE_CHAN(x)				((x)<<9)
+#define		FRONT_FACE_ALL_BITS				(1<<11)
+#define		FRONT_FACE_ADDR(x)				((x)<<12)
+#define		FOG_ADDR(x)					((x)<<17)
+#define		FIXED_PT_POSITION_ENA				(1<<24)
+#define		FIXED_PT_POSITION_ADDR(x)			((x)<<25)
+
+#define	SQ_MS_FIFO_SIZES				0x8CF0
+#define		CACHE_FIFO_SIZE(x)				((x) << 0)
+#define		FETCH_FIFO_HIWATER(x)				((x) << 8)
+#define		DONE_FIFO_HIWATER(x)				((x) << 16)
+#define		ALU_UPDATE_FIFO_HIWATER(x)			((x) << 24)
+#define	SQ_PGM_START_ES					0x28880
+#define	SQ_PGM_START_FS					0x28894
+#define	SQ_PGM_START_GS					0x2886C
+#define	SQ_PGM_START_PS					0x28840
+#define SQ_PGM_RESOURCES_PS                             0x28850
+#define SQ_PGM_EXPORTS_PS                               0x28854
+#define SQ_PGM_CF_OFFSET_PS                             0x288cc
+#define	SQ_PGM_START_VS					0x28858
+#define SQ_PGM_RESOURCES_VS                             0x28868
+#define SQ_PGM_CF_OFFSET_VS                             0x288d0
+#define	SQ_VTX_CONSTANT_WORD6_0				0x38018
+#define		S__SQ_VTX_CONSTANT_TYPE(x)			(((x) & 3) << 30)
+#define		G__SQ_VTX_CONSTANT_TYPE(x)			(((x) >> 30) & 3)
+#define			SQ_TEX_VTX_INVALID_TEXTURE			0x0
+#define			SQ_TEX_VTX_INVALID_BUFFER			0x1
+#define			SQ_TEX_VTX_VALID_TEXTURE			0x2
+#define			SQ_TEX_VTX_VALID_BUFFER				0x3
+
+
+#define	SX_MISC						0x28350
+#define	SX_DEBUG_1					0x9054
+#define		SMX_EVENT_RELEASE				(1 << 0)
+#define		ENABLE_NEW_SMX_ADDRESS				(1 << 16)
+
+#define	TA_CNTL_AUX					0x9508
+#define		DISABLE_CUBE_WRAP				(1 << 0)
+#define		DISABLE_CUBE_ANISO				(1 << 1)
+#define		SYNC_GRADIENT					(1 << 24)
+#define		SYNC_WALKER					(1 << 25)
+#define		SYNC_ALIGNER					(1 << 26)
+#define		BILINEAR_PRECISION_6_BIT			(0 << 31)
+#define		BILINEAR_PRECISION_8_BIT			(1 << 31)
+
+#define	TC_CNTL						0x9608
+#define		TC_L2_SIZE(x)					((x)<<5)
+#define		L2_DISABLE_LATE_HIT				(1<<9)
+
+
+#define	VGT_CACHE_INVALIDATION				0x88C4
+#define		CACHE_INVALIDATION(x)				((x)<<0)
+#define			VC_ONLY						0
+#define			TC_ONLY						1
+#define			VC_AND_TC					2
+#define	VGT_DMA_BASE					0x287E8
+#define	VGT_DMA_BASE_HI					0x287E4
+#define	VGT_ES_PER_GS					0x88CC
+#define	VGT_GS_PER_ES					0x88C8
+#define	VGT_GS_PER_VS					0x88E8
+#define	VGT_GS_VERTEX_REUSE				0x88D4
+#define VGT_PRIMITIVE_TYPE                              0x8958
+#define	VGT_NUM_INSTANCES				0x8974
+#define	VGT_OUT_DEALLOC_CNTL				0x28C5C
+#define		DEALLOC_DIST_MASK				0x0000007F
+#define	VGT_STRMOUT_BASE_OFFSET_0			0x28B10
+#define	VGT_STRMOUT_BASE_OFFSET_1			0x28B14
+#define	VGT_STRMOUT_BASE_OFFSET_2			0x28B18
+#define	VGT_STRMOUT_BASE_OFFSET_3			0x28B1c
+#define	VGT_STRMOUT_BASE_OFFSET_HI_0			0x28B44
+#define	VGT_STRMOUT_BASE_OFFSET_HI_1			0x28B48
+#define	VGT_STRMOUT_BASE_OFFSET_HI_2			0x28B4c
+#define	VGT_STRMOUT_BASE_OFFSET_HI_3			0x28B50
+#define	VGT_STRMOUT_BUFFER_BASE_0			0x28AD8
+#define	VGT_STRMOUT_BUFFER_BASE_1			0x28AE8
+#define	VGT_STRMOUT_BUFFER_BASE_2			0x28AF8
+#define	VGT_STRMOUT_BUFFER_BASE_3			0x28B08
+#define	VGT_STRMOUT_BUFFER_OFFSET_0			0x28ADC
+#define	VGT_STRMOUT_BUFFER_OFFSET_1			0x28AEC
+#define	VGT_STRMOUT_BUFFER_OFFSET_2			0x28AFC
+#define	VGT_STRMOUT_BUFFER_OFFSET_3			0x28B0C
+#define	VGT_STRMOUT_EN					0x28AB0
+#define	VGT_VERTEX_REUSE_BLOCK_CNTL			0x28C58
+#define		VTX_REUSE_DEPTH_MASK				0x000000FF
+#define VGT_EVENT_INITIATOR                             0x28a90
+#       define CACHE_FLUSH_AND_INV_EVENT                        (0x16 << 0)
+
+#define VM_CONTEXT0_CNTL				0x1410
+#define		ENABLE_CONTEXT					(1 << 0)
+#define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
+#define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
+#define VM_CONTEXT0_INVALIDATION_LOW_ADDR		0x1490
+#define VM_CONTEXT0_INVALIDATION_HIGH_ADDR		0x14B0
+#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x1574
+#define VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x1594
+#define VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x15B4
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x1554
+#define VM_CONTEXT0_REQUEST_RESPONSE			0x1470
+#define		REQUEST_TYPE(x)					(((x) & 0xf) << 0)
+#define		RESPONSE_TYPE_MASK				0x000000F0
+#define		RESPONSE_TYPE_SHIFT				4
+#define VM_L2_CNTL					0x1400
+#define		ENABLE_L2_CACHE					(1 << 0)
+#define		ENABLE_L2_FRAGMENT_PROCESSING			(1 << 1)
+#define		ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE		(1 << 9)
+#define		EFFECTIVE_L2_QUEUE_SIZE(x)			(((x) & 7) << 13)
+#define VM_L2_CNTL2					0x1404
+#define		INVALIDATE_ALL_L1_TLBS				(1 << 0)
+#define		INVALIDATE_L2_CACHE				(1 << 1)
+#define VM_L2_CNTL3					0x1408
+#define		BANK_SELECT_0(x)				(((x) & 0x1f) << 0)
+#define		BANK_SELECT_1(x)				(((x) & 0x1f) << 5)
+#define		L2_CACHE_UPDATE_MODE(x)				(((x) & 3) << 10)
+#define	VM_L2_STATUS					0x140C
+#define		L2_BUSY						(1 << 0)
+
+#define	WAIT_UNTIL					0x8040
+#define         WAIT_2D_IDLE_bit                                (1 << 14)
+#define         WAIT_3D_IDLE_bit                                (1 << 15)
+#define         WAIT_2D_IDLECLEAN_bit                           (1 << 16)
+#define         WAIT_3D_IDLECLEAN_bit                           (1 << 17)
+
+
+
+/*
+ * PM4
+ */
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
+			 (((reg) >> 2) & 0xFFFF) |			\
+			 ((n) & 0x3FFF) << 16)
+#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
+			 (((op) & 0xFF) << 8) |				\
+			 ((n) & 0x3FFF) << 16)
+
+/* Packet 3 types */
+#define	PACKET3_NOP					0x10
+#define	PACKET3_INDIRECT_BUFFER_END			0x17
+#define	PACKET3_SET_PREDICATION				0x20
+#define	PACKET3_REG_RMW					0x21
+#define	PACKET3_COND_EXEC				0x22
+#define	PACKET3_PRED_EXEC				0x23
+#define	PACKET3_START_3D_CMDBUF				0x24
+#define	PACKET3_DRAW_INDEX_2				0x27
+#define	PACKET3_CONTEXT_CONTROL				0x28
+#define	PACKET3_DRAW_INDEX_IMMD_BE			0x29
+#define	PACKET3_INDEX_TYPE				0x2A
+#define	PACKET3_DRAW_INDEX				0x2B
+#define	PACKET3_DRAW_INDEX_AUTO				0x2D
+#define	PACKET3_DRAW_INDEX_IMMD				0x2E
+#define	PACKET3_NUM_INSTANCES				0x2F
+#define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
+#define	PACKET3_INDIRECT_BUFFER_MP			0x38
+#define	PACKET3_MEM_SEMAPHORE				0x39
+#define	PACKET3_MPEG_INDEX				0x3A
+#define	PACKET3_WAIT_REG_MEM				0x3C
+#define	PACKET3_MEM_WRITE				0x3D
+#define	PACKET3_INDIRECT_BUFFER				0x32
+#define	PACKET3_CP_INTERRUPT				0x40
+#define	PACKET3_SURFACE_SYNC				0x43
+#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
+#              define PACKET3_TC_ACTION_ENA        (1 << 23)
+#              define PACKET3_VC_ACTION_ENA        (1 << 24)
+#              define PACKET3_CB_ACTION_ENA        (1 << 25)
+#              define PACKET3_DB_ACTION_ENA        (1 << 26)
+#              define PACKET3_SH_ACTION_ENA        (1 << 27)
+#              define PACKET3_SMX_ACTION_ENA       (1 << 28)
+#define	PACKET3_ME_INITIALIZE				0x44
+#define		PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
+#define	PACKET3_COND_WRITE				0x45
+#define	PACKET3_EVENT_WRITE				0x46
+#define	PACKET3_EVENT_WRITE_EOP				0x47
+#define	PACKET3_ONE_REG_WRITE				0x57
+#define	PACKET3_SET_CONFIG_REG				0x68
+#define		PACKET3_SET_CONFIG_REG_OFFSET			0x00008000
+#define		PACKET3_SET_CONFIG_REG_END			0x0000ac00
+#define	PACKET3_SET_CONTEXT_REG				0x69
+#define		PACKET3_SET_CONTEXT_REG_OFFSET			0x00028000
+#define		PACKET3_SET_CONTEXT_REG_END			0x00029000
+#define	PACKET3_SET_ALU_CONST				0x6A
+#define		PACKET3_SET_ALU_CONST_OFFSET			0x00030000
+#define		PACKET3_SET_ALU_CONST_END			0x00032000
+#define	PACKET3_SET_BOOL_CONST				0x6B
+#define		PACKET3_SET_BOOL_CONST_OFFSET			0x0003e380
+#define		PACKET3_SET_BOOL_CONST_END			0x00040000
+#define	PACKET3_SET_LOOP_CONST				0x6C
+#define		PACKET3_SET_LOOP_CONST_OFFSET			0x0003e200
+#define		PACKET3_SET_LOOP_CONST_END			0x0003e380
+#define	PACKET3_SET_RESOURCE				0x6D
+#define		PACKET3_SET_RESOURCE_OFFSET			0x00038000
+#define		PACKET3_SET_RESOURCE_END			0x0003c000
+#define	PACKET3_SET_SAMPLER				0x6E
+#define		PACKET3_SET_SAMPLER_OFFSET			0x0003c000
+#define		PACKET3_SET_SAMPLER_END				0x0003cff0
+#define	PACKET3_SET_CTL_CONST				0x6F
+#define		PACKET3_SET_CTL_CONST_OFFSET			0x0003cff0
+#define		PACKET3_SET_CTL_CONST_END			0x0003e200
+#define	PACKET3_SURFACE_BASE_UPDATE			0x73
+
+
+#define	R_008020_GRBM_SOFT_RESET		0x8020
+#define		S_008020_SOFT_RESET_CP(x)		(((x) & 1) << 0)
+#define		S_008020_SOFT_RESET_CB(x)		(((x) & 1) << 1)
+#define		S_008020_SOFT_RESET_CR(x)		(((x) & 1) << 2)
+#define		S_008020_SOFT_RESET_DB(x)		(((x) & 1) << 3)
+#define		S_008020_SOFT_RESET_PA(x)		(((x) & 1) << 5)
+#define		S_008020_SOFT_RESET_SC(x)		(((x) & 1) << 6)
+#define		S_008020_SOFT_RESET_SMX(x)		(((x) & 1) << 7)
+#define		S_008020_SOFT_RESET_SPI(x)		(((x) & 1) << 8)
+#define		S_008020_SOFT_RESET_SH(x)		(((x) & 1) << 9)
+#define		S_008020_SOFT_RESET_SX(x)		(((x) & 1) << 10)
+#define		S_008020_SOFT_RESET_TC(x)		(((x) & 1) << 11)
+#define		S_008020_SOFT_RESET_TA(x)		(((x) & 1) << 12)
+#define		S_008020_SOFT_RESET_VC(x)		(((x) & 1) << 13)
+#define		S_008020_SOFT_RESET_VGT(x)		(((x) & 1) << 14)
+#define	R_008010_GRBM_STATUS			0x8010
+#define		S_008010_CMDFIFO_AVAIL(x)		(((x) & 0x1F) << 0)
+#define		S_008010_CP_RQ_PENDING(x)		(((x) & 1) << 6)
+#define		S_008010_CF_RQ_PENDING(x)		(((x) & 1) << 7)
+#define		S_008010_PF_RQ_PENDING(x)		(((x) & 1) << 8)
+#define		S_008010_GRBM_EE_BUSY(x)		(((x) & 1) << 10)
+#define		S_008010_VC_BUSY(x)			(((x) & 1) << 11)
+#define		S_008010_DB03_CLEAN(x)			(((x) & 1) << 12)
+#define		S_008010_CB03_CLEAN(x)			(((x) & 1) << 13)
+#define		S_008010_VGT_BUSY_NO_DMA(x)		(((x) & 1) << 16)
+#define		S_008010_VGT_BUSY(x)			(((x) & 1) << 17)
+#define		S_008010_TA03_BUSY(x)			(((x) & 1) << 18)
+#define		S_008010_TC_BUSY(x)			(((x) & 1) << 19)
+#define		S_008010_SX_BUSY(x)			(((x) & 1) << 20)
+#define		S_008010_SH_BUSY(x)			(((x) & 1) << 21)
+#define		S_008010_SPI03_BUSY(x)			(((x) & 1) << 22)
+#define		S_008010_SMX_BUSY(x)			(((x) & 1) << 23)
+#define		S_008010_SC_BUSY(x)			(((x) & 1) << 24)
+#define		S_008010_PA_BUSY(x)			(((x) & 1) << 25)
+#define		S_008010_DB03_BUSY(x)			(((x) & 1) << 26)
+#define		S_008010_CR_BUSY(x)			(((x) & 1) << 27)
+#define		S_008010_CP_COHERENCY_BUSY(x)		(((x) & 1) << 28)
+#define		S_008010_CP_BUSY(x)			(((x) & 1) << 29)
+#define		S_008010_CB03_BUSY(x)			(((x) & 1) << 30)
+#define		S_008010_GUI_ACTIVE(x)			(((x) & 1) << 31)
+#define		G_008010_CMDFIFO_AVAIL(x)		(((x) >> 0) & 0x1F)
+#define		G_008010_CP_RQ_PENDING(x)		(((x) >> 6) & 1)
+#define		G_008010_CF_RQ_PENDING(x)		(((x) >> 7) & 1)
+#define		G_008010_PF_RQ_PENDING(x)		(((x) >> 8) & 1)
+#define		G_008010_GRBM_EE_BUSY(x)		(((x) >> 10) & 1)
+#define		G_008010_VC_BUSY(x)			(((x) >> 11) & 1)
+#define		G_008010_DB03_CLEAN(x)			(((x) >> 12) & 1)
+#define		G_008010_CB03_CLEAN(x)			(((x) >> 13) & 1)
+#define		G_008010_VGT_BUSY_NO_DMA(x)		(((x) >> 16) & 1)
+#define		G_008010_VGT_BUSY(x)			(((x) >> 17) & 1)
+#define		G_008010_TA03_BUSY(x)			(((x) >> 18) & 1)
+#define		G_008010_TC_BUSY(x)			(((x) >> 19) & 1)
+#define		G_008010_SX_BUSY(x)			(((x) >> 20) & 1)
+#define		G_008010_SH_BUSY(x)			(((x) >> 21) & 1)
+#define		G_008010_SPI03_BUSY(x)			(((x) >> 22) & 1)
+#define		G_008010_SMX_BUSY(x)			(((x) >> 23) & 1)
+#define		G_008010_SC_BUSY(x)			(((x) >> 24) & 1)
+#define		G_008010_PA_BUSY(x)			(((x) >> 25) & 1)
+#define		G_008010_DB03_BUSY(x)			(((x) >> 26) & 1)
+#define		G_008010_CR_BUSY(x)			(((x) >> 27) & 1)
+#define		G_008010_CP_COHERENCY_BUSY(x)		(((x) >> 28) & 1)
+#define		G_008010_CP_BUSY(x)			(((x) >> 29) & 1)
+#define		G_008010_CB03_BUSY(x)			(((x) >> 30) & 1)
+#define		G_008010_GUI_ACTIVE(x)			(((x) >> 31) & 1)
+#define	R_008014_GRBM_STATUS2			0x8014
+#define		S_008014_CR_CLEAN(x)			(((x) & 1) << 0)
+#define		S_008014_SMX_CLEAN(x)			(((x) & 1) << 1)
+#define		S_008014_SPI0_BUSY(x)			(((x) & 1) << 8)
+#define		S_008014_SPI1_BUSY(x)			(((x) & 1) << 9)
+#define		S_008014_SPI2_BUSY(x)			(((x) & 1) << 10)
+#define		S_008014_SPI3_BUSY(x)			(((x) & 1) << 11)
+#define		S_008014_TA0_BUSY(x)			(((x) & 1) << 12)
+#define		S_008014_TA1_BUSY(x)			(((x) & 1) << 13)
+#define		S_008014_TA2_BUSY(x)			(((x) & 1) << 14)
+#define		S_008014_TA3_BUSY(x)			(((x) & 1) << 15)
+#define		S_008014_DB0_BUSY(x)			(((x) & 1) << 16)
+#define		S_008014_DB1_BUSY(x)			(((x) & 1) << 17)
+#define		S_008014_DB2_BUSY(x)			(((x) & 1) << 18)
+#define		S_008014_DB3_BUSY(x)			(((x) & 1) << 19)
+#define		S_008014_CB0_BUSY(x)			(((x) & 1) << 20)
+#define		S_008014_CB1_BUSY(x)			(((x) & 1) << 21)
+#define		S_008014_CB2_BUSY(x)			(((x) & 1) << 22)
+#define		S_008014_CB3_BUSY(x)			(((x) & 1) << 23)
+#define		G_008014_CR_CLEAN(x)			(((x) >> 0) & 1)
+#define		G_008014_SMX_CLEAN(x)			(((x) >> 1) & 1)
+#define		G_008014_SPI0_BUSY(x)			(((x) >> 8) & 1)
+#define		G_008014_SPI1_BUSY(x)			(((x) >> 9) & 1)
+#define		G_008014_SPI2_BUSY(x)			(((x) >> 10) & 1)
+#define		G_008014_SPI3_BUSY(x)			(((x) >> 11) & 1)
+#define		G_008014_TA0_BUSY(x)			(((x) >> 12) & 1)
+#define		G_008014_TA1_BUSY(x)			(((x) >> 13) & 1)
+#define		G_008014_TA2_BUSY(x)			(((x) >> 14) & 1)
+#define		G_008014_TA3_BUSY(x)			(((x) >> 15) & 1)
+#define		G_008014_DB0_BUSY(x)			(((x) >> 16) & 1)
+#define		G_008014_DB1_BUSY(x)			(((x) >> 17) & 1)
+#define		G_008014_DB2_BUSY(x)			(((x) >> 18) & 1)
+#define		G_008014_DB3_BUSY(x)			(((x) >> 19) & 1)
+#define		G_008014_CB0_BUSY(x)			(((x) >> 20) & 1)
+#define		G_008014_CB1_BUSY(x)			(((x) >> 21) & 1)
+#define		G_008014_CB2_BUSY(x)			(((x) >> 22) & 1)
+#define		G_008014_CB3_BUSY(x)			(((x) >> 23) & 1)
+#define	R_000E50_SRBM_STATUS				0x0E50
+#define		G_000E50_RLC_RQ_PENDING(x)		(((x) >> 3) & 1)
+#define		G_000E50_RCU_RQ_PENDING(x)		(((x) >> 4) & 1)
+#define		G_000E50_GRBM_RQ_PENDING(x)		(((x) >> 5) & 1)
+#define		G_000E50_HI_RQ_PENDING(x)		(((x) >> 6) & 1)
+#define		G_000E50_IO_EXTERN_SIGNAL(x)		(((x) >> 7) & 1)
+#define		G_000E50_VMC_BUSY(x)			(((x) >> 8) & 1)
+#define		G_000E50_MCB_BUSY(x)			(((x) >> 9) & 1)
+#define		G_000E50_MCDZ_BUSY(x)			(((x) >> 10) & 1)
+#define		G_000E50_MCDY_BUSY(x)			(((x) >> 11) & 1)
+#define		G_000E50_MCDX_BUSY(x)			(((x) >> 12) & 1)
+#define		G_000E50_MCDW_BUSY(x)			(((x) >> 13) & 1)
+#define		G_000E50_SEM_BUSY(x)			(((x) >> 14) & 1)
+#define		G_000E50_RLC_BUSY(x)			(((x) >> 15) & 1)
+#define	R_000E60_SRBM_SOFT_RESET			0x0E60
+#define		S_000E60_SOFT_RESET_BIF(x)		(((x) & 1) << 1)
+#define		S_000E60_SOFT_RESET_CG(x)		(((x) & 1) << 2)
+#define		S_000E60_SOFT_RESET_CMC(x)		(((x) & 1) << 3)
+#define		S_000E60_SOFT_RESET_CSC(x)		(((x) & 1) << 4)
+#define		S_000E60_SOFT_RESET_DC(x)		(((x) & 1) << 5)
+#define		S_000E60_SOFT_RESET_GRBM(x)		(((x) & 1) << 8)
+#define		S_000E60_SOFT_RESET_HDP(x)		(((x) & 1) << 9)
+#define		S_000E60_SOFT_RESET_IH(x)		(((x) & 1) << 10)
+#define		S_000E60_SOFT_RESET_MC(x)		(((x) & 1) << 11)
+#define		S_000E60_SOFT_RESET_RLC(x)		(((x) & 1) << 13)
+#define		S_000E60_SOFT_RESET_ROM(x)		(((x) & 1) << 14)
+#define		S_000E60_SOFT_RESET_SEM(x)		(((x) & 1) << 15)
+#define		S_000E60_SOFT_RESET_TSC(x)		(((x) & 1) << 16)
+#define		S_000E60_SOFT_RESET_VMC(x)		(((x) & 1) << 17)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index e47f2fc..3299733 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -50,8 +50,8 @@
 #include <linux/kref.h>
 
 #include "radeon_mode.h"
+#include "radeon_share.h"
 #include "radeon_reg.h"
-#include "r300.h"
 
 /*
  * Modules parameters.
@@ -112,10 +112,11 @@
 	CHIP_RV635,
 	CHIP_RV670,
 	CHIP_RS780,
+	CHIP_RS880,
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
-	CHIP_RS880,
+	CHIP_RV740,
 	CHIP_LAST,
 };
 
@@ -152,10 +153,21 @@
  */
 bool radeon_get_bios(struct radeon_device *rdev);
 
+
+/*
+ * Dummy page
+ */
+struct radeon_dummy_page {
+	struct page	*page;
+	dma_addr_t	addr;
+};
+int radeon_dummy_page_init(struct radeon_device *rdev);
+void radeon_dummy_page_fini(struct radeon_device *rdev);
+
+
 /*
  * Clocks
  */
-
 struct radeon_clock {
 	struct radeon_pll p1pll;
 	struct radeon_pll p2pll;
@@ -166,6 +178,7 @@
 	uint32_t default_sclk;
 };
 
+
 /*
  * Fences.
  */
@@ -332,14 +345,18 @@
 	resource_size_t		aper_size;
 	resource_size_t		aper_base;
 	resource_size_t		agp_base;
-	unsigned		gtt_location;
-	unsigned		gtt_size;
-	unsigned		vram_location;
 	/* for some chips with <= 32MB we need to lie
 	 * about vram size near mc fb location */
-	unsigned		mc_vram_size;
+	u64			mc_vram_size;
+	u64			gtt_location;
+	u64			gtt_size;
+	u64			gtt_start;
+	u64			gtt_end;
+	u64			vram_location;
+	u64			vram_start;
+	u64			vram_end;
 	unsigned		vram_width;
-	unsigned		real_vram_size;
+	u64			real_vram_size;
 	int			vram_mtrr;
 	bool			vram_is_ddr;
 };
@@ -411,6 +428,16 @@
 	bool			ready;
 };
 
+struct r600_blit {
+	struct radeon_object	*shader_obj;
+	u64 shader_gpu_addr;
+	u32 vs_offset, ps_offset;
+	u32 state_offset;
+	u32 state_len;
+	u32 vb_used, vb_total;
+	struct radeon_ib *vb_ib;
+};
+
 int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -463,6 +490,7 @@
 	int			chunk_relocs_idx;
 	struct radeon_ib	*ib;
 	void			*track;
+	unsigned		family;
 };
 
 struct radeon_cs_packet {
@@ -559,6 +587,9 @@
  */
 struct radeon_asic {
 	int (*init)(struct radeon_device *rdev);
+	void (*fini)(struct radeon_device *rdev);
+	int (*resume)(struct radeon_device *rdev);
+	int (*suspend)(struct radeon_device *rdev);
 	void (*errata)(struct radeon_device *rdev);
 	void (*vram_info)(struct radeon_device *rdev);
 	int (*gpu_reset)(struct radeon_device *rdev);
@@ -573,7 +604,11 @@
 	int (*cp_init)(struct radeon_device *rdev, unsigned ring_size);
 	void (*cp_fini)(struct radeon_device *rdev);
 	void (*cp_disable)(struct radeon_device *rdev);
+	void (*cp_commit)(struct radeon_device *rdev);
 	void (*ring_start)(struct radeon_device *rdev);
+	int (*ring_test)(struct radeon_device *rdev);
+	void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+	int (*ib_test)(struct radeon_device *rdev);
 	int (*irq_set)(struct radeon_device *rdev);
 	int (*irq_process)(struct radeon_device *rdev);
 	u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
@@ -613,6 +648,8 @@
 union radeon_asic_config {
 	struct r300_asic	r300;
 	struct r100_asic	r100;
+	struct r600_asic	r600;
+	struct rv770_asic	rv770;
 };
 
 
@@ -698,12 +735,16 @@
 	struct radeon_pm		pm;
 	struct mutex			cs_mutex;
 	struct radeon_wb		wb;
+	struct radeon_dummy_page	dummy_page;
 	bool				gpu_lockup;
 	bool				shutdown;
 	bool				suspend;
 	bool				need_dma32;
+	bool				new_init_path;
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
-	const struct firmware *fw;	/* firmware */
+	const struct firmware *me_fw;	/* all family ME firmware */
+	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
+	struct r600_blit r600_blit;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -713,6 +754,13 @@
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
+/* r600 blit */
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
+void r600_kms_blit_copy(struct radeon_device *rdev,
+			u64 src_gpu_addr, u64 dst_gpu_addr,
+			int size_bytes);
+
 static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
 {
 	if (reg < 0x10000)
@@ -740,6 +788,7 @@
 #define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg))
 #define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg))
 #define RREG32(reg) r100_mm_rreg(rdev, (reg))
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
 #define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
@@ -763,6 +812,7 @@
 		tmp_ |= ((val) & ~(mask));			\
 		WREG32_PLL(reg, tmp_);				\
 	} while (0)
+#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
 
 /*
  * Indirect registers accessor
@@ -827,51 +877,6 @@
 /*
  * RING helpers.
  */
-#define CP_PACKET0			0x00000000
-#define		PACKET0_BASE_INDEX_SHIFT	0
-#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
-#define		PACKET0_COUNT_SHIFT		16
-#define		PACKET0_COUNT_MASK		(0x3fff << 16)
-#define CP_PACKET1			0x40000000
-#define CP_PACKET2			0x80000000
-#define		PACKET2_PAD_SHIFT		0
-#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
-#define CP_PACKET3			0xC0000000
-#define		PACKET3_IT_OPCODE_SHIFT		8
-#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
-#define		PACKET3_COUNT_SHIFT		16
-#define		PACKET3_COUNT_MASK		(0x3fff << 16)
-/* PACKET3 op code */
-#define		PACKET3_NOP			0x10
-#define		PACKET3_3D_DRAW_VBUF		0x28
-#define		PACKET3_3D_DRAW_IMMD		0x29
-#define		PACKET3_3D_DRAW_INDX		0x2A
-#define		PACKET3_3D_LOAD_VBPNTR		0x2F
-#define		PACKET3_INDX_BUFFER		0x33
-#define		PACKET3_3D_DRAW_VBUF_2		0x34
-#define		PACKET3_3D_DRAW_IMMD_2		0x35
-#define		PACKET3_3D_DRAW_INDX_2		0x36
-#define		PACKET3_BITBLT_MULTI		0x9B
-
-#define PACKET0(reg, n)	(CP_PACKET0 |					\
-			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
-			 REG_SET(PACKET0_COUNT, (n)))
-#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
-#define PACKET3(op, n)	(CP_PACKET3 |					\
-			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
-			 REG_SET(PACKET3_COUNT, (n)))
-
-#define	PACKET_TYPE0	0
-#define	PACKET_TYPE1	1
-#define	PACKET_TYPE2	2
-#define	PACKET_TYPE3	3
-
-#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
-#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
-#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
-#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
-#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
-
 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 {
 #if DRM_DEBUG_CODE
@@ -890,6 +895,9 @@
  * ASICs macro.
  */
 #define radeon_init(rdev) (rdev)->asic->init((rdev))
+#define radeon_fini(rdev) (rdev)->asic->fini((rdev))
+#define radeon_resume(rdev) (rdev)->asic->resume((rdev))
+#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
 #define radeon_cs_parse(p) rdev->asic->cs_parse((p))
 #define radeon_errata(rdev) (rdev)->asic->errata((rdev))
 #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev))
@@ -905,7 +913,11 @@
 #define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize))
 #define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev))
 #define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev))
+#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev))
 #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
+#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev))
+#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib))
+#define radeon_ib_test(rdev) (rdev)->asic->ib_test((rdev))
 #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index c9cbd8a..e87bb91 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -60,6 +60,7 @@
 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
 void r100_cp_fini(struct radeon_device *rdev);
 void r100_cp_disable(struct radeon_device *rdev);
+void r100_cp_commit(struct radeon_device *rdev);
 void r100_ring_start(struct radeon_device *rdev);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -78,6 +79,9 @@
 			 uint32_t offset, uint32_t obj_size);
 int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
 void r100_bandwidth_update(struct radeon_device *rdev);
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r100_ib_test(struct radeon_device *rdev);
+int r100_ring_test(struct radeon_device *rdev);
 
 static struct radeon_asic r100_asic = {
 	.init = &r100_init,
@@ -95,7 +99,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r100_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -156,7 +164,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -197,7 +209,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -245,7 +261,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &r100_irq_set,
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
@@ -298,7 +318,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -341,7 +365,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -391,7 +419,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &rv515_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -434,7 +466,11 @@
 	.cp_init = &r100_cp_init,
 	.cp_fini = &r100_cp_fini,
 	.cp_disable = &r100_cp_disable,
+	.cp_commit = &r100_cp_commit,
 	.ring_start = &rv515_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.ib_test = &r100_ib_test,
 	.irq_set = &rs600_irq_set,
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
@@ -453,9 +489,127 @@
 };
 
 /*
- * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710
+ * r600,rv610,rv630,rv620,rv635,rv670,rs780,rs880
  */
+int r600_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
+int r600_suspend(struct radeon_device *rdev);
+int r600_resume(struct radeon_device *rdev);
+int r600_wb_init(struct radeon_device *rdev);
+void r600_wb_fini(struct radeon_device *rdev);
+void r600_cp_commit(struct radeon_device *rdev);
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
 uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
 void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+int r600_cs_parse(struct radeon_cs_parser *p);
+void r600_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence);
+int r600_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence);
+int r600_irq_process(struct radeon_device *rdev);
+int r600_irq_set(struct radeon_device *rdev);
+int r600_gpu_reset(struct radeon_device *rdev);
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size);
+int r600_clear_surface_reg(struct radeon_device *rdev, int reg);
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r600_ib_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev);
+int r600_copy_blit(struct radeon_device *rdev,
+		   uint64_t src_offset, uint64_t dst_offset,
+		   unsigned num_pages, struct radeon_fence *fence);
+
+static struct radeon_asic r600_asic = {
+	.errata = NULL,
+	.init = &r600_init,
+	.fini = &r600_fini,
+	.suspend = &r600_suspend,
+	.resume = &r600_resume,
+	.cp_commit = &r600_cp_commit,
+	.vram_info = NULL,
+	.gpu_reset = &r600_gpu_reset,
+	.mc_init = NULL,
+	.mc_fini = NULL,
+	.wb_init = &r600_wb_init,
+	.wb_fini = &r600_wb_fini,
+	.gart_enable = NULL,
+	.gart_disable = NULL,
+	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+	.gart_set_page = &rs600_gart_set_page,
+	.cp_init = NULL,
+	.cp_fini = NULL,
+	.cp_disable = NULL,
+	.ring_start = NULL,
+	.ring_test = &r600_ring_test,
+	.ring_ib_execute = &r600_ring_ib_execute,
+	.ib_test = &r600_ib_test,
+	.irq_set = &r600_irq_set,
+	.irq_process = &r600_irq_process,
+	.fence_ring_emit = &r600_fence_ring_emit,
+	.cs_parse = &r600_cs_parse,
+	.copy_blit = &r600_copy_blit,
+	.copy_dma = &r600_copy_blit,
+	.copy = NULL,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r600_set_surface_reg,
+	.clear_surface_reg = r600_clear_surface_reg,
+	.bandwidth_update = &r520_bandwidth_update,
+};
+
+/*
+ * rv770,rv730,rv710,rv740
+ */
+int rv770_init(struct radeon_device *rdev);
+void rv770_fini(struct radeon_device *rdev);
+int rv770_suspend(struct radeon_device *rdev);
+int rv770_resume(struct radeon_device *rdev);
+int rv770_gpu_reset(struct radeon_device *rdev);
+
+static struct radeon_asic rv770_asic = {
+	.errata = NULL,
+	.init = &rv770_init,
+	.fini = &rv770_fini,
+	.suspend = &rv770_suspend,
+	.resume = &rv770_resume,
+	.cp_commit = &r600_cp_commit,
+	.vram_info = NULL,
+	.gpu_reset = &rv770_gpu_reset,
+	.mc_init = NULL,
+	.mc_fini = NULL,
+	.wb_init = &r600_wb_init,
+	.wb_fini = &r600_wb_fini,
+	.gart_enable = NULL,
+	.gart_disable = NULL,
+	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+	.gart_set_page = &rs600_gart_set_page,
+	.cp_init = NULL,
+	.cp_fini = NULL,
+	.cp_disable = NULL,
+	.ring_start = NULL,
+	.ring_test = &r600_ring_test,
+	.ring_ib_execute = &r600_ring_ib_execute,
+	.ib_test = &r600_ib_test,
+	.irq_set = &r600_irq_set,
+	.irq_process = &r600_irq_process,
+	.fence_ring_emit = &r600_fence_ring_emit,
+	.cs_parse = &r600_cs_parse,
+	.copy_blit = &r600_copy_blit,
+	.copy_dma = &r600_copy_blit,
+	.copy = NULL,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r600_set_surface_reg,
+	.clear_surface_reg = r600_clear_surface_reg,
+	.bandwidth_update = &r520_bandwidth_update,
+};
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bba9b4b..a8fb392 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -370,10 +370,6 @@
 						       && record->
 						       ucRecordType <=
 						       ATOM_MAX_OBJECT_RECORD_NUMBER) {
-							DRM_ERROR
-							    ("record type %d\n",
-							     record->
-							     ucRecordType);
 							switch (record->
 								ucRecordType) {
 							case ATOM_I2C_RECORD_TYPE:
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c
index a37cbce..152eef1 100644
--- a/drivers/gpu/drm/radeon/radeon_clocks.c
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -102,10 +102,12 @@
 			p1pll->reference_div = 12;
 		if (p2pll->reference_div < 2)
 			p2pll->reference_div = 12;
-		if (spll->reference_div < 2)
-			spll->reference_div =
-			    RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
-			    RADEON_M_SPLL_REF_DIV_MASK;
+		if (rdev->family < CHIP_RS600) {
+			if (spll->reference_div < 2)
+				spll->reference_div =
+					RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
+					RADEON_M_SPLL_REF_DIV_MASK;
+		}
 		if (mpll->reference_div < 2)
 			mpll->reference_div = spll->reference_div;
 	} else {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 7693f7c..f2469c5 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -37,7 +37,7 @@
 /*
  * Clear GPU surface registers.
  */
-static void radeon_surface_init(struct radeon_device *rdev)
+void radeon_surface_init(struct radeon_device *rdev)
 {
 	/* FIXME: check this out */
 	if (rdev->family < CHIP_R600) {
@@ -56,7 +56,7 @@
 /*
  * GPU scratch registers helpers function.
  */
-static void radeon_scratch_init(struct radeon_device *rdev)
+void radeon_scratch_init(struct radeon_device *rdev)
 {
 	int i;
 
@@ -156,16 +156,14 @@
 		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
 		rdev->mc.gtt_location = tmp;
 	}
-	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20);
+	DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20));
 	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
-		 rdev->mc.vram_location,
-		 rdev->mc.vram_location + rdev->mc.mc_vram_size - 1);
-	if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size)
-		DRM_INFO("radeon: VRAM less than aperture workaround enabled\n");
-	DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
+		 (unsigned)rdev->mc.vram_location,
+		 (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1));
+	DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20));
 	DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
-		 rdev->mc.gtt_location,
-		 rdev->mc.gtt_location + rdev->mc.gtt_size - 1);
+		 (unsigned)rdev->mc.gtt_location,
+		 (unsigned)(rdev->mc.gtt_location + rdev->mc.gtt_size - 1));
 	return 0;
 }
 
@@ -205,6 +203,31 @@
 
 }
 
+int radeon_dummy_page_init(struct radeon_device *rdev)
+{
+	rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
+	if (rdev->dummy_page.page == NULL)
+		return -ENOMEM;
+	rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page,
+					0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (!rdev->dummy_page.addr) {
+		__free_page(rdev->dummy_page.page);
+		rdev->dummy_page.page = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void radeon_dummy_page_fini(struct radeon_device *rdev)
+{
+	if (rdev->dummy_page.page == NULL)
+		return;
+	pci_unmap_page(rdev->pdev, rdev->dummy_page.addr,
+			PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	__free_page(rdev->dummy_page.page);
+	rdev->dummy_page.page = NULL;
+}
+
 
 /*
  * Registers accessors functions.
@@ -323,9 +346,15 @@
 	case CHIP_RV635:
 	case CHIP_RV670:
 	case CHIP_RS780:
+	case CHIP_RS880:
+		rdev->asic = &r600_asic;
+		break;
 	case CHIP_RV770:
 	case CHIP_RV730:
 	case CHIP_RV710:
+	case CHIP_RV740:
+		rdev->asic = &rv770_asic;
+		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
@@ -448,7 +477,7 @@
 		       struct pci_dev *pdev,
 		       uint32_t flags)
 {
-	int r, ret;
+	int r, ret = 0;
 	int dma_bits;
 
 	DRM_INFO("radeon: Initializing kernel modesetting.\n");
@@ -487,10 +516,6 @@
 	if (r) {
 		return r;
 	}
-	r = radeon_init(rdev);
-	if (r) {
-		return r;
-	}
 
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
@@ -521,111 +546,118 @@
 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base);
 	DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size);
 
-	/* Setup errata flags */
-	radeon_errata(rdev);
-	/* Initialize scratch registers */
-	radeon_scratch_init(rdev);
-	/* Initialize surface registers */
-	radeon_surface_init(rdev);
+	rdev->new_init_path = false;
+	r = radeon_init(rdev);
+	if (r) {
+		return r;
+	}
+	if (!rdev->new_init_path) {
+		/* Setup errata flags */
+		radeon_errata(rdev);
+		/* Initialize scratch registers */
+		radeon_scratch_init(rdev);
+		/* Initialize surface registers */
+		radeon_surface_init(rdev);
 
-	/* TODO: disable VGA need to use VGA request */
-	/* BIOS*/
-	if (!radeon_get_bios(rdev)) {
-		if (ASIC_IS_AVIVO(rdev))
-			return -EINVAL;
-	}
-	if (rdev->is_atom_bios) {
-		r = radeon_atombios_init(rdev);
-		if (r) {
-			return r;
+		/* TODO: disable VGA need to use VGA request */
+		/* BIOS*/
+		if (!radeon_get_bios(rdev)) {
+			if (ASIC_IS_AVIVO(rdev))
+				return -EINVAL;
 		}
-	} else {
-		r = radeon_combios_init(rdev);
-		if (r) {
-			return r;
-		}
-	}
-	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-	if (radeon_gpu_reset(rdev)) {
-		/* FIXME: what do we want to do here ? */
-	}
-	/* check if cards are posted or not */
-	if (!radeon_card_posted(rdev) && rdev->bios) {
-		DRM_INFO("GPU not posted. posting now...\n");
 		if (rdev->is_atom_bios) {
-			atom_asic_init(rdev->mode_info.atom_context);
+			r = radeon_atombios_init(rdev);
+			if (r) {
+				return r;
+			}
 		} else {
-			radeon_combios_asic_init(rdev->ddev);
+			r = radeon_combios_init(rdev);
+			if (r) {
+				return r;
+			}
 		}
-	}
-	/* Initialize clocks */
-	r = radeon_clocks_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Get vram informations */
-	radeon_vram_info(rdev);
+		/* Reset gpu before posting otherwise ATOM will enter infinite loop */
+		if (radeon_gpu_reset(rdev)) {
+			/* FIXME: what do we want to do here ? */
+		}
+		/* check if cards are posted or not */
+		if (!radeon_card_posted(rdev) && rdev->bios) {
+			DRM_INFO("GPU not posted. posting now...\n");
+			if (rdev->is_atom_bios) {
+				atom_asic_init(rdev->mode_info.atom_context);
+			} else {
+				radeon_combios_asic_init(rdev->ddev);
+			}
+		}
+		/* Initialize clocks */
+		r = radeon_clocks_init(rdev);
+		if (r) {
+			return r;
+		}
+		/* Get vram informations */
+		radeon_vram_info(rdev);
 
-	/* Add an MTRR for the VRAM */
-	rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
-				      MTRR_TYPE_WRCOMB, 1);
-	DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
-		 rdev->mc.real_vram_size >> 20,
-		 (unsigned)rdev->mc.aper_size >> 20);
-	DRM_INFO("RAM width %dbits %cDR\n",
-		 rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
-	/* Initialize memory controller (also test AGP) */
-	r = radeon_mc_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Fence driver */
-	r = radeon_fence_driver_init(rdev);
-	if (r) {
-		return r;
-	}
-	r = radeon_irq_kms_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Memory manager */
-	r = radeon_object_init(rdev);
-	if (r) {
-		return r;
-	}
-	/* Initialize GART (initialize after TTM so we can allocate
-	 * memory through TTM but finalize after TTM) */
-	r = radeon_gart_enable(rdev);
-	if (!r) {
-		r = radeon_gem_init(rdev);
-	}
+		/* Add an MTRR for the VRAM */
+		rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
+				MTRR_TYPE_WRCOMB, 1);
+		DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
+				(unsigned)(rdev->mc.mc_vram_size >> 20),
+				(unsigned)(rdev->mc.aper_size >> 20));
+		DRM_INFO("RAM width %dbits %cDR\n",
+				rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
+		/* Initialize memory controller (also test AGP) */
+		r = radeon_mc_init(rdev);
+		if (r) {
+			return r;
+		}
+		/* Fence driver */
+		r = radeon_fence_driver_init(rdev);
+		if (r) {
+			return r;
+		}
+		r = radeon_irq_kms_init(rdev);
+		if (r) {
+			return r;
+		}
+		/* Memory manager */
+		r = radeon_object_init(rdev);
+		if (r) {
+			return r;
+		}
+		/* Initialize GART (initialize after TTM so we can allocate
+		 * memory through TTM but finalize after TTM) */
+		r = radeon_gart_enable(rdev);
+		if (!r) {
+			r = radeon_gem_init(rdev);
+		}
 
-	/* 1M ring buffer */
-	if (!r) {
-		r = radeon_cp_init(rdev, 1024 * 1024);
-	}
-	if (!r) {
-		r = radeon_wb_init(rdev);
-		if (r) {
-			DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
-			return r;
+		/* 1M ring buffer */
+		if (!r) {
+			r = radeon_cp_init(rdev, 1024 * 1024);
 		}
-	}
-	if (!r) {
-		r = radeon_ib_pool_init(rdev);
-		if (r) {
-			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
-			return r;
+		if (!r) {
+			r = radeon_wb_init(rdev);
+			if (r) {
+				DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
+				return r;
+			}
 		}
-	}
-	if (!r) {
-		r = radeon_ib_test(rdev);
-		if (r) {
-			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-			return r;
+		if (!r) {
+			r = radeon_ib_pool_init(rdev);
+			if (r) {
+				DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+				return r;
+			}
 		}
+		if (!r) {
+			r = radeon_ib_test(rdev);
+			if (r) {
+				DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+				return r;
+			}
+		}
+		ret = r;
 	}
-	ret = r;
 	r = radeon_modeset_init(rdev);
 	if (r) {
 		return r;
@@ -651,26 +683,29 @@
 	rdev->shutdown = true;
 	/* Order matter so becarefull if you rearrange anythings */
 	radeon_modeset_fini(rdev);
-	radeon_ib_pool_fini(rdev);
-	radeon_cp_fini(rdev);
-	radeon_wb_fini(rdev);
-	radeon_gem_fini(rdev);
-	radeon_object_fini(rdev);
-	/* mc_fini must be after object_fini */
-	radeon_mc_fini(rdev);
+	if (!rdev->new_init_path) {
+		radeon_ib_pool_fini(rdev);
+		radeon_cp_fini(rdev);
+		radeon_wb_fini(rdev);
+		radeon_gem_fini(rdev);
+		radeon_mc_fini(rdev);
 #if __OS_HAS_AGP
-	radeon_agp_fini(rdev);
+		radeon_agp_fini(rdev);
 #endif
-	radeon_irq_kms_fini(rdev);
-	radeon_fence_driver_fini(rdev);
-	radeon_clocks_fini(rdev);
-	if (rdev->is_atom_bios) {
-		radeon_atombios_fini(rdev);
+		radeon_irq_kms_fini(rdev);
+		radeon_fence_driver_fini(rdev);
+		radeon_clocks_fini(rdev);
+		radeon_object_fini(rdev);
+		if (rdev->is_atom_bios) {
+			radeon_atombios_fini(rdev);
+		} else {
+			radeon_combios_fini(rdev);
+		}
+		kfree(rdev->bios);
+		rdev->bios = NULL;
 	} else {
-		radeon_combios_fini(rdev);
+		radeon_fini(rdev);
 	}
-	kfree(rdev->bios);
-	rdev->bios = NULL;
 	iounmap(rdev->rmmio);
 	rdev->rmmio = NULL;
 }
@@ -708,9 +743,12 @@
 	/* wait for gpu to finish processing current batch */
 	radeon_fence_wait_last(rdev);
 
-	radeon_cp_disable(rdev);
-	radeon_gart_disable(rdev);
-
+	if (!rdev->new_init_path) {
+		radeon_cp_disable(rdev);
+		radeon_gart_disable(rdev);
+	} else {
+		radeon_suspend(rdev);
+	}
 	/* evict remaining vram memory */
 	radeon_object_evict_vram(rdev);
 
@@ -746,33 +784,37 @@
 	if (radeon_gpu_reset(rdev)) {
 		/* FIXME: what do we want to do here ? */
 	}
-	/* post card */
-	if (rdev->is_atom_bios) {
-		atom_asic_init(rdev->mode_info.atom_context);
+	if (!rdev->new_init_path) {
+		/* post card */
+		if (rdev->is_atom_bios) {
+			atom_asic_init(rdev->mode_info.atom_context);
+		} else {
+			radeon_combios_asic_init(rdev->ddev);
+		}
+		/* Initialize clocks */
+		r = radeon_clocks_init(rdev);
+		if (r) {
+			release_console_sem();
+			return r;
+		}
+		/* Enable IRQ */
+		rdev->irq.sw_int = true;
+		radeon_irq_set(rdev);
+		/* Initialize GPU Memory Controller */
+		r = radeon_mc_init(rdev);
+		if (r) {
+			goto out;
+		}
+		r = radeon_gart_enable(rdev);
+		if (r) {
+			goto out;
+		}
+		r = radeon_cp_init(rdev, rdev->cp.ring_size);
+		if (r) {
+			goto out;
+		}
 	} else {
-		radeon_combios_asic_init(rdev->ddev);
-	}
-	/* Initialize clocks */
-	r = radeon_clocks_init(rdev);
-	if (r) {
-		release_console_sem();
-		return r;
-	}
-	/* Enable IRQ */
-	rdev->irq.sw_int = true;
-	radeon_irq_set(rdev);
-	/* Initialize GPU Memory Controller */
-	r = radeon_mc_init(rdev);
-	if (r) {
-		goto out;
-	}
-	r = radeon_gart_enable(rdev);
-	if (r) {
-		goto out;
-	}
-	r = radeon_cp_init(rdev, rdev->cp.ring_size);
-	if (r) {
-		goto out;
+		radeon_resume(rdev);
 	}
 out:
 	fb_set_suspend(rdev->fbdev_info, 0);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 40294a0..c7b1859 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -356,6 +356,12 @@
 	int r700_sc_hiz_tile_fifo_size;
 	int r700_sc_earlyz_tile_fifo_fize;
 
+	struct mutex cs_mutex;
+	u32 cs_id_scnt;
+	u32 cs_id_wcnt;
+	/* r6xx/r7xx drm blit vertex buffer */
+	struct drm_buf *blit_vb;
+
 	/* firmware */
 	const struct firmware *me_fw, *pfp_fw;
 } drm_radeon_private_t;
@@ -396,6 +402,9 @@
 		(off >= gart_start && off <= gart_end));
 }
 
+/* radeon_state.c */
+extern void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf);
+
 				/* radeon_cp.c */
 extern int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_priv);
 extern int radeon_cp_start(struct drm_device *dev, void *data, struct drm_file *file_priv);
@@ -487,6 +496,22 @@
 				     struct drm_buf *buf, int start, int end);
 extern int r600_page_table_init(struct drm_device *dev);
 extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info);
+extern int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv);
+extern void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv);
+extern int r600_cp_dispatch_texture(struct drm_device *dev,
+				    struct drm_file *file_priv,
+				    drm_radeon_texture_t *tex,
+				    drm_radeon_tex_image_t *image);
+/* r600_blit.c */
+extern int r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv);
+extern void r600_done_blit_copy(struct drm_device *dev);
+extern void r600_blit_copy(struct drm_device *dev,
+			   uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+			   int size_bytes);
+extern void r600_blit_swap(struct drm_device *dev,
+			   uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+			   int sx, int sy, int dx, int dy,
+			   int w, int h, int src_pitch, int dst_pitch, int cpp);
 
 /* Flags for stats.boxes
  */
@@ -1114,13 +1139,71 @@
 #	define RADEON_CNTL_BITBLT_MULTI		0x00009B00
 #	define RADEON_CNTL_SET_SCISSORS		0xC0001E00
 
-#	define R600_IT_INDIRECT_BUFFER		0x00003200
-#	define R600_IT_ME_INITIALIZE		0x00004400
+#       define R600_IT_INDIRECT_BUFFER_END      0x00001700
+#       define R600_IT_SET_PREDICATION          0x00002000
+#       define R600_IT_REG_RMW                  0x00002100
+#       define R600_IT_COND_EXEC                0x00002200
+#       define R600_IT_PRED_EXEC                0x00002300
+#       define R600_IT_START_3D_CMDBUF          0x00002400
+#       define R600_IT_DRAW_INDEX_2             0x00002700
+#       define R600_IT_CONTEXT_CONTROL          0x00002800
+#       define R600_IT_DRAW_INDEX_IMMD_BE       0x00002900
+#       define R600_IT_INDEX_TYPE               0x00002A00
+#       define R600_IT_DRAW_INDEX               0x00002B00
+#       define R600_IT_DRAW_INDEX_AUTO          0x00002D00
+#       define R600_IT_DRAW_INDEX_IMMD          0x00002E00
+#       define R600_IT_NUM_INSTANCES            0x00002F00
+#       define R600_IT_STRMOUT_BUFFER_UPDATE    0x00003400
+#       define R600_IT_INDIRECT_BUFFER_MP       0x00003800
+#       define R600_IT_MEM_SEMAPHORE            0x00003900
+#       define R600_IT_MPEG_INDEX               0x00003A00
+#       define R600_IT_WAIT_REG_MEM             0x00003C00
+#       define R600_IT_MEM_WRITE                0x00003D00
+#       define R600_IT_INDIRECT_BUFFER          0x00003200
+#       define R600_IT_CP_INTERRUPT             0x00004000
+#       define R600_IT_SURFACE_SYNC             0x00004300
+#              define R600_CB0_DEST_BASE_ENA    (1 << 6)
+#              define R600_TC_ACTION_ENA        (1 << 23)
+#              define R600_VC_ACTION_ENA        (1 << 24)
+#              define R600_CB_ACTION_ENA        (1 << 25)
+#              define R600_DB_ACTION_ENA        (1 << 26)
+#              define R600_SH_ACTION_ENA        (1 << 27)
+#              define R600_SMX_ACTION_ENA       (1 << 28)
+#       define R600_IT_ME_INITIALIZE            0x00004400
 #	       define R600_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-#	define R600_IT_EVENT_WRITE		0x00004600
-#	define R600_IT_SET_CONFIG_REG		0x00006800
-#	define R600_SET_CONFIG_REG_OFFSET       0x00008000
-#	define R600_SET_CONFIG_REG_END          0x0000ac00
+#       define R600_IT_COND_WRITE               0x00004500
+#       define R600_IT_EVENT_WRITE              0x00004600
+#       define R600_IT_EVENT_WRITE_EOP          0x00004700
+#       define R600_IT_ONE_REG_WRITE            0x00005700
+#       define R600_IT_SET_CONFIG_REG           0x00006800
+#              define R600_SET_CONFIG_REG_OFFSET 0x00008000
+#              define R600_SET_CONFIG_REG_END   0x0000ac00
+#       define R600_IT_SET_CONTEXT_REG          0x00006900
+#              define R600_SET_CONTEXT_REG_OFFSET 0x00028000
+#              define R600_SET_CONTEXT_REG_END  0x00029000
+#       define R600_IT_SET_ALU_CONST            0x00006A00
+#              define R600_SET_ALU_CONST_OFFSET 0x00030000
+#              define R600_SET_ALU_CONST_END    0x00032000
+#       define R600_IT_SET_BOOL_CONST           0x00006B00
+#              define R600_SET_BOOL_CONST_OFFSET 0x0003e380
+#              define R600_SET_BOOL_CONST_END   0x00040000
+#       define R600_IT_SET_LOOP_CONST           0x00006C00
+#              define R600_SET_LOOP_CONST_OFFSET 0x0003e200
+#              define R600_SET_LOOP_CONST_END   0x0003e380
+#       define R600_IT_SET_RESOURCE             0x00006D00
+#              define R600_SET_RESOURCE_OFFSET  0x00038000
+#              define R600_SET_RESOURCE_END     0x0003c000
+#              define R600_SQ_TEX_VTX_INVALID_TEXTURE  0x0
+#              define R600_SQ_TEX_VTX_INVALID_BUFFER   0x1
+#              define R600_SQ_TEX_VTX_VALID_TEXTURE    0x2
+#              define R600_SQ_TEX_VTX_VALID_BUFFER     0x3
+#       define R600_IT_SET_SAMPLER              0x00006E00
+#              define R600_SET_SAMPLER_OFFSET   0x0003c000
+#              define R600_SET_SAMPLER_END      0x0003cff0
+#       define R600_IT_SET_CTL_CONST            0x00006F00
+#              define R600_SET_CTL_CONST_OFFSET 0x0003cff0
+#              define R600_SET_CTL_CONST_END    0x0003e200
+#       define R600_IT_SURFACE_BASE_UPDATE      0x00007300
 
 #define RADEON_CP_PACKET_MASK		0xC0000000
 #define RADEON_CP_PACKET_COUNT_MASK	0x3fff0000
@@ -1598,6 +1681,52 @@
 #define R600_CB_COLOR7_BASE                                    0x2805c
 #define R600_CB_COLOR7_FRAG                                    0x280fc
 
+#define R600_CB_COLOR0_SIZE                                    0x28060
+#define R600_CB_COLOR0_VIEW                                    0x28080
+#define R600_CB_COLOR0_INFO                                    0x280a0
+#define R600_CB_COLOR0_TILE                                    0x280c0
+#define R600_CB_COLOR0_FRAG                                    0x280e0
+#define R600_CB_COLOR0_MASK                                    0x28100
+
+#define AVIVO_D1MODE_VLINE_START_END                           0x6538
+#define AVIVO_D2MODE_VLINE_START_END                           0x6d38
+#define R600_CP_COHER_BASE                                     0x85f8
+#define R600_DB_DEPTH_BASE                                     0x2800c
+#define R600_SQ_PGM_START_FS                                   0x28894
+#define R600_SQ_PGM_START_ES                                   0x28880
+#define R600_SQ_PGM_START_VS                                   0x28858
+#define R600_SQ_PGM_RESOURCES_VS                               0x28868
+#define R600_SQ_PGM_CF_OFFSET_VS                               0x288d0
+#define R600_SQ_PGM_START_GS                                   0x2886c
+#define R600_SQ_PGM_START_PS                                   0x28840
+#define R600_SQ_PGM_RESOURCES_PS                               0x28850
+#define R600_SQ_PGM_EXPORTS_PS                                 0x28854
+#define R600_SQ_PGM_CF_OFFSET_PS                               0x288cc
+#define R600_VGT_DMA_BASE                                      0x287e8
+#define R600_VGT_DMA_BASE_HI                                   0x287e4
+#define R600_VGT_STRMOUT_BASE_OFFSET_0                         0x28b10
+#define R600_VGT_STRMOUT_BASE_OFFSET_1                         0x28b14
+#define R600_VGT_STRMOUT_BASE_OFFSET_2                         0x28b18
+#define R600_VGT_STRMOUT_BASE_OFFSET_3                         0x28b1c
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_0                      0x28b44
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_1                      0x28b48
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_2                      0x28b4c
+#define R600_VGT_STRMOUT_BASE_OFFSET_HI_3                      0x28b50
+#define R600_VGT_STRMOUT_BUFFER_BASE_0                         0x28ad8
+#define R600_VGT_STRMOUT_BUFFER_BASE_1                         0x28ae8
+#define R600_VGT_STRMOUT_BUFFER_BASE_2                         0x28af8
+#define R600_VGT_STRMOUT_BUFFER_BASE_3                         0x28b08
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_0                       0x28adc
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_1                       0x28aec
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_2                       0x28afc
+#define R600_VGT_STRMOUT_BUFFER_OFFSET_3                       0x28b0c
+
+#define R600_VGT_PRIMITIVE_TYPE                                0x8958
+
+#define R600_PA_SC_SCREEN_SCISSOR_TL                           0x28030
+#define R600_PA_SC_GENERIC_SCISSOR_TL                          0x28240
+#define R600_PA_SC_WINDOW_SCISSOR_TL                           0x28204
+
 #define R600_TC_CNTL                                           0x9608
 #       define R600_TC_L2_SIZE(x)                              ((x) << 5)
 #       define R600_L2_DISABLE_LATE_HIT                        (1 << 9)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index b4e48dd..506dd4d 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -53,9 +53,9 @@
 		 * away
 		 */
 		WREG32(rdev->fence_drv.scratch_reg, fence->seq);
-	} else {
+	} else
 		radeon_fence_ring_emit(rdev, fence);
-	}
+
 	fence->emited = true;
 	fence->timeout = jiffies + ((2000 * HZ) / 1000);
 	list_del(&fence->list);
@@ -168,7 +168,47 @@
 	return signaled;
 }
 
-int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
+int r600_fence_wait(struct radeon_fence *fence,  bool intr, bool lazy)
+{
+	struct radeon_device *rdev;
+	unsigned long cur_jiffies;
+	unsigned long timeout;
+	int ret = 0;
+
+	cur_jiffies = jiffies;
+	timeout = HZ / 100;
+
+	if (time_after(fence->timeout, cur_jiffies)) {
+		timeout = fence->timeout - cur_jiffies;
+	}
+
+	rdev = fence->rdev;
+
+	__set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+
+	while (1) {
+		if (radeon_fence_signaled(fence))
+			break;
+
+		if (time_after_eq(jiffies, timeout)) {
+			ret = -EBUSY;
+			break;
+		}
+
+		if (lazy)
+			schedule_timeout(1);
+
+		if (intr && signal_pending(current)) {
+			ret = -ERESTART;
+			break;
+		}
+	}
+	__set_current_state(TASK_RUNNING);
+	return ret;
+}
+
+
+int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	struct radeon_device *rdev;
 	unsigned long cur_jiffies;
@@ -176,7 +216,6 @@
 	bool expired = false;
 	int r;
 
-
 	if (fence == NULL) {
 		WARN(1, "Querying an invalid fence : %p !\n", fence);
 		return 0;
@@ -185,13 +224,18 @@
 	if (radeon_fence_signaled(fence)) {
 		return 0;
 	}
+
+	if (rdev->family >= CHIP_R600)
+		return r600_fence_wait(fence, intr, 0);
+
 retry:
 	cur_jiffies = jiffies;
 	timeout = HZ / 100;
 	if (time_after(fence->timeout, cur_jiffies)) {
 		timeout = fence->timeout - cur_jiffies;
 	}
-	if (interruptible) {
+
+	if (intr) {
 		r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
 				radeon_fence_signaled(fence), timeout);
 		if (unlikely(r == -ERESTARTSYS)) {
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index 28be2f1..21da871 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -3255,6 +3255,24 @@
 #define RADEON_CP_RB_WPTR                   0x0714
 #define RADEON_CP_RB_RPTR_WR                0x071c
 
+#define RADEON_SCRATCH_UMSK		    0x0770
+#define RADEON_SCRATCH_ADDR		    0x0774
+
+#define R600_CP_RB_BASE                     0xc100
+#define R600_CP_RB_CNTL                     0xc104
+#       define R600_RB_BUFSZ(x)             ((x) << 0)
+#       define R600_RB_BLKSZ(x)             ((x) << 8)
+#       define R600_RB_NO_UPDATE            (1 << 27)
+#       define R600_RB_RPTR_WR_ENA          (1 << 31)
+#define R600_CP_RB_RPTR_WR                  0xc108
+#define R600_CP_RB_RPTR_ADDR                0xc10c
+#define R600_CP_RB_RPTR_ADDR_HI             0xc110
+#define R600_CP_RB_WPTR                     0xc114
+#define R600_CP_RB_WPTR_ADDR                0xc118
+#define R600_CP_RB_WPTR_ADDR_HI             0xc11c
+#define R600_CP_RB_RPTR                     0x8700
+#define R600_CP_RB_WPTR_DELAY               0x8704
+
 #define RADEON_CP_IB_BASE                   0x0738
 #define RADEON_CP_IB_BUFSZ                  0x073c
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 60d1593..aa9837a 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -110,7 +110,6 @@
 		return;
 	}
 	list_del(&tmp->list);
-	INIT_LIST_HEAD(&tmp->list);
 	if (tmp->fence) {
 		radeon_fence_unref(&tmp->fence);
 	}
@@ -119,19 +118,11 @@
 	mutex_unlock(&rdev->ib_pool.mutex);
 }
 
-static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib)
-{
-	while ((ib->length_dw & rdev->cp.align_mask)) {
-		ib->ptr[ib->length_dw++] = PACKET2(0);
-	}
-}
-
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	int r = 0;
 
 	mutex_lock(&rdev->ib_pool.mutex);
-	radeon_ib_align(rdev, ib);
 	if (!ib->length_dw || !rdev->cp.ready) {
 		/* TODO: Nothings in the ib we should report. */
 		mutex_unlock(&rdev->ib_pool.mutex);
@@ -145,9 +136,7 @@
 		mutex_unlock(&rdev->ib_pool.mutex);
 		return r;
 	}
-	radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
-	radeon_ring_write(rdev, ib->gpu_addr);
-	radeon_ring_write(rdev, ib->length_dw);
+	radeon_ring_ib_execute(rdev, ib);
 	radeon_fence_emit(rdev, ib->fence);
 	radeon_ring_unlock_commit(rdev);
 	list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs);
@@ -215,69 +204,16 @@
 	mutex_unlock(&rdev->ib_pool.mutex);
 }
 
-int radeon_ib_test(struct radeon_device *rdev)
-{
-	struct radeon_ib *ib;
-	uint32_t scratch;
-	uint32_t tmp = 0;
-	unsigned i;
-	int r;
-
-	r = radeon_scratch_get(rdev, &scratch);
-	if (r) {
-		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
-		return r;
-	}
-	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ib_get(rdev, &ib);
-	if (r) {
-		return r;
-	}
-	ib->ptr[0] = PACKET0(scratch, 0);
-	ib->ptr[1] = 0xDEADBEEF;
-	ib->ptr[2] = PACKET2(0);
-	ib->ptr[3] = PACKET2(0);
-	ib->ptr[4] = PACKET2(0);
-	ib->ptr[5] = PACKET2(0);
-	ib->ptr[6] = PACKET2(0);
-	ib->ptr[7] = PACKET2(0);
-	ib->length_dw = 8;
-	r = radeon_ib_schedule(rdev, ib);
-	if (r) {
-		radeon_scratch_free(rdev, scratch);
-		radeon_ib_free(rdev, &ib);
-		return r;
-	}
-	r = radeon_fence_wait(ib->fence, false);
-	if (r) {
-		return r;
-	}
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF) {
-			break;
-		}
-		DRM_UDELAY(1);
-	}
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ib test succeeded in %u usecs\n", i);
-	} else {
-		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
-		r = -EINVAL;
-	}
-	radeon_scratch_free(rdev, scratch);
-	radeon_ib_free(rdev, &ib);
-	return r;
-}
-
 
 /*
  * Ring.
  */
 void radeon_ring_free_size(struct radeon_device *rdev)
 {
-	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	if (rdev->family >= CHIP_R600)
+		rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
+	else
+		rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
 	/* This works because ring_size is a power of 2 */
 	rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4));
 	rdev->cp.ring_free_dw -= rdev->cp.wptr;
@@ -320,11 +256,10 @@
 	count_dw_pad = (rdev->cp.align_mask + 1) -
 		       (rdev->cp.wptr & rdev->cp.align_mask);
 	for (i = 0; i < count_dw_pad; i++) {
-		radeon_ring_write(rdev, PACKET2(0));
+		radeon_ring_write(rdev, 2 << 30);
 	}
 	DRM_MEMORYBARRIER();
-	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
-	(void)RREG32(RADEON_CP_RB_WPTR);
+	radeon_cp_commit(rdev);
 	mutex_unlock(&rdev->cp.mutex);
 }
 
@@ -334,46 +269,6 @@
 	mutex_unlock(&rdev->cp.mutex);
 }
 
-int radeon_ring_test(struct radeon_device *rdev)
-{
-	uint32_t scratch;
-	uint32_t tmp = 0;
-	unsigned i;
-	int r;
-
-	r = radeon_scratch_get(rdev, &scratch);
-	if (r) {
-		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
-		return r;
-	}
-	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ring_lock(rdev, 2);
-	if (r) {
-		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
-		radeon_scratch_free(rdev, scratch);
-		return r;
-	}
-	radeon_ring_write(rdev, PACKET0(scratch, 0));
-	radeon_ring_write(rdev, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev);
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF) {
-			break;
-		}
-		DRM_UDELAY(1);
-	}
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test succeeded in %d usecs\n", i);
-	} else {
-		DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
-			  scratch, tmp);
-		r = -EINVAL;
-	}
-	radeon_scratch_free(rdev, scratch);
-	return r;
-}
-
 int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size)
 {
 	int r;
diff --git a/drivers/gpu/drm/radeon/radeon_share.h b/drivers/gpu/drm/radeon/radeon_share.h
index 63a7735..5f9e358 100644
--- a/drivers/gpu/drm/radeon/radeon_share.h
+++ b/drivers/gpu/drm/radeon/radeon_share.h
@@ -28,12 +28,89 @@
 #ifndef __RADEON_SHARE_H__
 #define __RADEON_SHARE_H__
 
+/* Common */
+struct radeon_device;
+struct radeon_cs_parser;
+int radeon_clocks_init(struct radeon_device *rdev);
+void radeon_clocks_fini(struct radeon_device *rdev);
+void radeon_scratch_init(struct radeon_device *rdev);
+void radeon_surface_init(struct radeon_device *rdev);
+int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
+
+
+/* R100, RV100, RS100, RV200, RS200, R200, RV250, RS300, RV280 */
 void r100_vram_init_sizes(struct radeon_device *rdev);
 
+
+/* R300, R350, RV350, RV380 */
+struct r300_asic {
+	const unsigned	*reg_safe_bm;
+	unsigned	reg_safe_bm_size;
+};
+
+
+/* RS690, RS740 */
 void rs690_line_buffer_adjust(struct radeon_device *rdev,
 			      struct drm_display_mode *mode1,
 			      struct drm_display_mode *mode2);
 
+
+/* RV515 */
 void rv515_bandwidth_avivo_update(struct radeon_device *rdev);
 
+
+/* R600, RV610, RV630, RV620, RV635, RV670, RS780, RS880 */
+bool r600_card_posted(struct radeon_device *rdev);
+void r600_cp_stop(struct radeon_device *rdev);
+void r600_ring_init(struct radeon_device *rdev, unsigned ring_size);
+int r600_cp_resume(struct radeon_device *rdev);
+int r600_count_pipe_bits(uint32_t val);
+int r600_gart_clear_page(struct radeon_device *rdev, int i);
+int r600_mc_wait_for_idle(struct radeon_device *rdev);
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
+int r600_ib_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev);
+int r600_wb_init(struct radeon_device *rdev);
+void r600_wb_fini(struct radeon_device *rdev);
+void r600_scratch_init(struct radeon_device *rdev);
+int r600_blit_init(struct radeon_device *rdev);
+void r600_blit_fini(struct radeon_device *rdev);
+int r600_cp_init_microcode(struct radeon_device *rdev);
+struct r600_asic {
+	unsigned max_pipes;
+	unsigned max_tile_pipes;
+	unsigned max_simds;
+	unsigned max_backends;
+	unsigned max_gprs;
+	unsigned max_threads;
+	unsigned max_stack_entries;
+	unsigned max_hw_contexts;
+	unsigned max_gs_threads;
+	unsigned sx_max_export_size;
+	unsigned sx_max_export_pos_size;
+	unsigned sx_max_export_smx_size;
+	unsigned sq_num_cf_insts;
+};
+
+/* RV770, RV7300, RV710 */
+struct rv770_asic {
+	unsigned max_pipes;
+	unsigned max_tile_pipes;
+	unsigned max_simds;
+	unsigned max_backends;
+	unsigned max_gprs;
+	unsigned max_threads;
+	unsigned max_stack_entries;
+	unsigned max_hw_contexts;
+	unsigned max_gs_threads;
+	unsigned sx_max_export_size;
+	unsigned sx_max_export_pos_size;
+	unsigned sx_max_export_smx_size;
+	unsigned sq_num_cf_insts;
+	unsigned sx_num_of_sets;
+	unsigned sc_prim_fifo_size;
+	unsigned sc_hiz_tile_fifo_size;
+	unsigned sc_earlyz_tile_fifo_fize;
+};
+
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 2882f40..aad0c6f 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -1546,7 +1546,7 @@
 	} while (i < nbox);
 }
 
-static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
+void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	struct drm_radeon_master_private *master_priv = master->driver_priv;
@@ -2213,7 +2213,10 @@
 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
-	radeon_cp_dispatch_swap(dev, file_priv->master);
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
+		r600_cp_dispatch_swap(dev, file_priv);
+	else
+		radeon_cp_dispatch_swap(dev, file_priv->master);
 	sarea_priv->ctx_owner = 0;
 
 	COMMIT_RING();
@@ -2412,7 +2415,10 @@
 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
 	VB_AGE_TEST_WITH_RETURN(dev_priv);
 
-	ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
+		ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
+	else
+		ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
 
 	return ret;
 }
@@ -2495,8 +2501,9 @@
 		radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
 	}
 
-	if (indirect->discard)
+	if (indirect->discard) {
 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
+	}
 
 	COMMIT_RING();
 	return 0;
@@ -3227,7 +3234,8 @@
 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
+	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
 };
 
 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index dc7a442..acd889c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -376,9 +376,8 @@
 		radeon_move_null(bo, new_mem);
 		return 0;
 	}
-	if (!rdev->cp.ready) {
+	if (!rdev->cp.ready || rdev->asic->copy == NULL) {
 		/* use memcpy */
-		DRM_ERROR("CP is not ready use memcpy.\n");
 		goto memcpy;
 	}
 
@@ -495,7 +494,7 @@
 		return r;
 	}
 	DRM_INFO("radeon: %uM of VRAM memory ready\n",
-		 rdev->mc.real_vram_size / (1024 * 1024));
+		 (unsigned)rdev->mc.real_vram_size / (1024 * 1024));
 	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0,
 			   ((rdev->mc.gtt_size) >> PAGE_SHIFT));
 	if (r) {
@@ -503,7 +502,7 @@
 		return r;
 	}
 	DRM_INFO("radeon: %uM of GTT memory ready.\n",
-		 rdev->mc.gtt_size / (1024 * 1024));
+		 (unsigned)(rdev->mc.gtt_size / (1024 * 1024)));
 	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
 		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
 	}
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index b29affd..8c3ea7e 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -63,7 +63,7 @@
 		break;
 	default:
 		DRM_ERROR("Unable to use IGP GART size %uM\n",
-			  rdev->mc.gtt_size >> 20);
+			  (unsigned)(rdev->mc.gtt_size >> 20));
 		DRM_ERROR("Valid GART size for IGP are 32M,64M,128M,256M,512M,1G,2G\n");
 		DRM_ERROR("Forcing to 32M GART size\n");
 		rdev->mc.gtt_size = 32 * 1024 * 1024;
diff --git a/drivers/gpu/drm/radeon/rs780.c b/drivers/gpu/drm/radeon/rs780.c
deleted file mode 100644
index 0affcff..0000000
--- a/drivers/gpu/drm/radeon/rs780.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2008 Advanced Micro Devices, Inc.
- * Copyright 2008 Red Hat Inc.
- * Copyright 2009 Jerome Glisse.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie
- *          Alex Deucher
- *          Jerome Glisse
- */
-#include "drmP.h"
-#include "radeon_reg.h"
-#include "radeon.h"
-
-/* rs780  depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
-
-/* This files gather functions specifics to:
- * rs780
- *
- * Some of these functions might be used by newer ASICs.
- */
-int rs780_mc_wait_for_idle(struct radeon_device *rdev);
-void rs780_gpu_init(struct radeon_device *rdev);
-
-
-/*
- * MC
- */
-int rs780_mc_init(struct radeon_device *rdev)
-{
-	rs780_gpu_init(rdev);
-	/* FIXME: implement */
-
-	rs600_mc_disable_clients(rdev);
-	if (rs780_mc_wait_for_idle(rdev)) {
-		printk(KERN_WARNING "Failed to wait MC idle while "
-		       "programming pipes. Bad things might happen.\n");
-	}
-	return 0;
-}
-
-void rs780_mc_fini(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-}
-
-
-/*
- * Global GPU functions
- */
-void rs780_errata(struct radeon_device *rdev)
-{
-	rdev->pll_errata = 0;
-}
-
-int rs780_mc_wait_for_idle(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-	return 0;
-}
-
-void rs780_gpu_init(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-}
-
-
-/*
- * VRAM info
- */
-void rs780_vram_get_type(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-}
-
-void rs780_vram_info(struct radeon_device *rdev)
-{
-	rs780_vram_get_type(rdev);
-
-	/* FIXME: implement */
-	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
-}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 97965c4..99e397f 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -27,7 +27,7 @@
  */
 #include <linux/seq_file.h>
 #include "drmP.h"
-#include "rv515r.h"
+#include "rv515d.h"
 #include "radeon.h"
 #include "radeon_share.h"
 
diff --git a/drivers/gpu/drm/radeon/rv515r.h b/drivers/gpu/drm/radeon/rv515d.h
similarity index 78%
rename from drivers/gpu/drm/radeon/rv515r.h
rename to drivers/gpu/drm/radeon/rv515d.h
index f3cf840..a65e17e 100644
--- a/drivers/gpu/drm/radeon/rv515r.h
+++ b/drivers/gpu/drm/radeon/rv515d.h
@@ -25,10 +25,12 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#ifndef RV515R_H
-#define RV515R_H
+#ifndef __RV515D_H__
+#define __RV515D_H__
 
-/* RV515 registers */
+/*
+ * RV515 registers
+ */
 #define PCIE_INDEX			0x0030
 #define PCIE_DATA			0x0034
 #define	MC_IND_INDEX			0x0070
@@ -166,5 +168,53 @@
 #define		MC_GLOBW_INIT_LAT_MASK			0xF0000000
 
 
+/*
+ * PM4 packet
+ */
+#define CP_PACKET0			0x00000000
+#define		PACKET0_BASE_INDEX_SHIFT	0
+#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
+#define		PACKET0_COUNT_SHIFT		16
+#define		PACKET0_COUNT_MASK		(0x3fff << 16)
+#define CP_PACKET1			0x40000000
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+#define CP_PACKET3			0xC0000000
+#define		PACKET3_IT_OPCODE_SHIFT		8
+#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
+#define		PACKET3_COUNT_SHIFT		16
+#define		PACKET3_COUNT_MASK		(0x3fff << 16)
+/* PACKET3 op code */
+#define		PACKET3_NOP			0x10
+#define		PACKET3_3D_DRAW_VBUF		0x28
+#define		PACKET3_3D_DRAW_IMMD		0x29
+#define		PACKET3_3D_DRAW_INDX		0x2A
+#define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_INDX_BUFFER		0x33
+#define		PACKET3_3D_DRAW_VBUF_2		0x34
+#define		PACKET3_3D_DRAW_IMMD_2		0x35
+#define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_BITBLT_MULTI		0x9B
+
+#define PACKET0(reg, n)	(CP_PACKET0 |					\
+			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
+			 REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n)	(CP_PACKET3 |					\
+			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
+			 REG_SET(PACKET3_COUNT, (n)))
+
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
 #endif
 
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 21d8ffd..57765f6 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -25,100 +25,975 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
 #include "drmP.h"
-#include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_share.h"
+#include "rv770d.h"
+#include "avivod.h"
+#include "atom.h"
 
-/* rv770,rv730,rv710  depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
+#define R700_PFP_UCODE_SIZE 848
+#define R700_PM4_UCODE_SIZE 1360
 
-/* This files gather functions specifics to:
- * rv770,rv730,rv710
- *
- * Some of these functions might be used by newer ASICs.
+static void rv770_gpu_init(struct radeon_device *rdev);
+void rv770_fini(struct radeon_device *rdev);
+
+
+/*
+ * GART
  */
-int rv770_mc_wait_for_idle(struct radeon_device *rdev);
-void rv770_gpu_init(struct radeon_device *rdev);
+int rv770_pcie_gart_enable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int r, i;
+
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+	r = radeon_gart_table_vram_alloc(rdev);
+	if (r) {
+		return r;
+	}
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+			(u32)(rdev->dummy_page.addr >> 12));
+	for (i = 1; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	r600_pcie_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void rv770_pcie_gart_disable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int i;
+
+	/* Clear ptes*/
+	for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+		r600_gart_clear_page(rdev, i);
+	r600_pcie_gart_tlb_flush(rdev);
+	/* Disable all tables */
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+}
 
 
 /*
  * MC
  */
-int rv770_mc_init(struct radeon_device *rdev)
+static void rv770_mc_resume(struct radeon_device *rdev)
 {
-	uint32_t tmp;
+	u32 d1vga_control, d2vga_control;
+	u32 vga_render_control, vga_hdp_control;
+	u32 d1crtc_control, d2crtc_control;
+	u32 new_d1grph_primary, new_d1grph_secondary;
+	u32 new_d2grph_primary, new_d2grph_secondary;
+	u64 old_vram_start;
+	u32 tmp;
+	int i, j;
 
-	rv770_gpu_init(rdev);
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-	/* setup the gart before changing location so we can ask to
-	 * discard unmapped mc request
-	 */
-	/* FIXME: disable out of gart access */
-	tmp = rdev->mc.gtt_location / 4096;
-	tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
-	tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
-	tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
-	WREG32(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
+	d1vga_control = RREG32(D1VGA_CONTROL);
+	d2vga_control = RREG32(D2VGA_CONTROL);
+	vga_render_control = RREG32(VGA_RENDER_CONTROL);
+	vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+	d1crtc_control = RREG32(D1CRTC_CONTROL);
+	d2crtc_control = RREG32(D2CRTC_CONTROL);
+	old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+	new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d1grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d1grph_secondary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS);
+	new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS);
+	new_d2grph_primary += rdev->mc.vram_start - old_vram_start;
+	new_d2grph_secondary += rdev->mc.vram_start - old_vram_start;
 
-	rs600_mc_disable_clients(rdev);
-	if (rv770_mc_wait_for_idle(rdev)) {
-		printk(KERN_WARNING "Failed to wait MC idle while "
-		       "programming pipes. Bad things might happen.\n");
+	/* Stop all video */
+	WREG32(D1VGA_CONTROL, 0);
+	WREG32(D2VGA_CONTROL, 0);
+	WREG32(VGA_RENDER_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, 0);
+	WREG32(D2CRTC_CONTROL, 0);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
 	}
 
-	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-	tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24);
-	tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24);
-	WREG32(R700_MC_VM_FB_LOCATION, tmp);
-	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-	tmp = REG_SET(R700_MC_AGP_TOP, tmp >> 22);
-	WREG32(R700_MC_VM_AGP_TOP, tmp);
-	tmp = REG_SET(R700_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
-	WREG32(R700_MC_VM_AGP_BOT, tmp);
-	return 0;
-}
+	/* Lockout access through VGA aperture*/
+	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
 
-void rv770_mc_fini(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
+	/* Update configuration */
+	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+	tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16;
+	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+	WREG32(MC_VM_FB_LOCATION, tmp);
+	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+	WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+	if (rdev->flags & RADEON_IS_AGP) {
+		WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16);
+		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+	} else {
+		WREG32(MC_VM_AGP_BASE, 0);
+		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	}
+	WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary);
+	WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary);
+	WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary);
+	WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary);
+	WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
+
+	/* Unlock host access */
+	WREG32(VGA_HDP_CONTROL, vga_hdp_control);
+
+	mdelay(1);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "[drm] MC not idle !\n");
+	}
+
+	/* Restore video state */
+	WREG32(D1CRTC_UPDATE_LOCK, 1);
+	WREG32(D2CRTC_UPDATE_LOCK, 1);
+	WREG32(D1CRTC_CONTROL, d1crtc_control);
+	WREG32(D2CRTC_CONTROL, d2crtc_control);
+	WREG32(D1CRTC_UPDATE_LOCK, 0);
+	WREG32(D2CRTC_UPDATE_LOCK, 0);
+	WREG32(D1VGA_CONTROL, d1vga_control);
+	WREG32(D2VGA_CONTROL, d2vga_control);
+	WREG32(VGA_RENDER_CONTROL, vga_render_control);
 }
 
 
 /*
- * Global GPU functions
+ * CP.
  */
-void rv770_errata(struct radeon_device *rdev)
+void r700_cp_stop(struct radeon_device *rdev)
 {
-	rdev->pll_errata = 0;
+	WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
 }
 
-int rv770_mc_wait_for_idle(struct radeon_device *rdev)
+
+static int rv770_cp_load_microcode(struct radeon_device *rdev)
 {
-	/* FIXME: implement */
+	const __be32 *fw_data;
+	int i;
+
+	if (!rdev->me_fw || !rdev->pfp_fw)
+		return -EINVAL;
+
+	r700_cp_stop(rdev);
+	WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0));
+
+	/* Reset cp */
+	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+	RREG32(GRBM_SOFT_RESET);
+	mdelay(15);
+	WREG32(GRBM_SOFT_RESET, 0);
+
+	fw_data = (const __be32 *)rdev->pfp_fw->data;
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
+		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+
+	fw_data = (const __be32 *)rdev->me_fw->data;
+	WREG32(CP_ME_RAM_WADDR, 0);
+	for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
+		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
+
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	WREG32(CP_ME_RAM_WADDR, 0);
+	WREG32(CP_ME_RAM_RADDR, 0);
 	return 0;
 }
 
-void rv770_gpu_init(struct radeon_device *rdev)
-{
-	/* FIXME: implement */
-}
-
 
 /*
- * VRAM info
+ * Core functions
  */
-void rv770_vram_get_type(struct radeon_device *rdev)
+static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+						u32 num_backends,
+						u32 backend_disable_mask)
 {
-	/* FIXME: implement */
+	u32 backend_map = 0;
+	u32 enabled_backends_mask;
+	u32 enabled_backends_count;
+	u32 cur_pipe;
+	u32 swizzle_pipe[R7XX_MAX_PIPES];
+	u32 cur_backend;
+	u32 i;
+
+	if (num_tile_pipes > R7XX_MAX_PIPES)
+		num_tile_pipes = R7XX_MAX_PIPES;
+	if (num_tile_pipes < 1)
+		num_tile_pipes = 1;
+	if (num_backends > R7XX_MAX_BACKENDS)
+		num_backends = R7XX_MAX_BACKENDS;
+	if (num_backends < 1)
+		num_backends = 1;
+
+	enabled_backends_mask = 0;
+	enabled_backends_count = 0;
+	for (i = 0; i < R7XX_MAX_BACKENDS; ++i) {
+		if (((backend_disable_mask >> i) & 1) == 0) {
+			enabled_backends_mask |= (1 << i);
+			++enabled_backends_count;
+		}
+		if (enabled_backends_count == num_backends)
+			break;
+	}
+
+	if (enabled_backends_count == 0) {
+		enabled_backends_mask = 1;
+		enabled_backends_count = 1;
+	}
+
+	if (enabled_backends_count != num_backends)
+		num_backends = enabled_backends_count;
+
+	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
+	switch (num_tile_pipes) {
+	case 1:
+		swizzle_pipe[0] = 0;
+		break;
+	case 2:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		break;
+	case 3:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 1;
+		break;
+	case 4:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 3;
+		swizzle_pipe[3] = 1;
+		break;
+	case 5:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 1;
+		swizzle_pipe[4] = 3;
+		break;
+	case 6:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 5;
+		swizzle_pipe[4] = 3;
+		swizzle_pipe[5] = 1;
+		break;
+	case 7:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 3;
+		swizzle_pipe[5] = 1;
+		swizzle_pipe[6] = 5;
+		break;
+	case 8:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 2;
+		swizzle_pipe[2] = 4;
+		swizzle_pipe[3] = 6;
+		swizzle_pipe[4] = 3;
+		swizzle_pipe[5] = 1;
+		swizzle_pipe[6] = 7;
+		swizzle_pipe[7] = 5;
+		break;
+	}
+
+	cur_backend = 0;
+	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+		while (((1 << cur_backend) & enabled_backends_mask) == 0)
+			cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
+
+		backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
+
+		cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS;
+	}
+
+	return backend_map;
 }
 
-void rv770_vram_info(struct radeon_device *rdev)
+static void rv770_gpu_init(struct radeon_device *rdev)
 {
-	rv770_vram_get_type(rdev);
+	int i, j, num_qd_pipes;
+	u32 sx_debug_1;
+	u32 smx_dc_ctl0;
+	u32 num_gs_verts_per_thread;
+	u32 vgt_gs_per_es;
+	u32 gs_prim_buffer_depth = 0;
+	u32 sq_ms_fifo_sizes;
+	u32 sq_config;
+	u32 sq_thread_resource_mgmt;
+	u32 hdp_host_path_cntl;
+	u32 sq_dyn_gpr_size_simd_ab_0;
+	u32 backend_map;
+	u32 gb_tiling_config = 0;
+	u32 cc_rb_backend_disable = 0;
+	u32 cc_gc_shader_pipe_config = 0;
+	u32 mc_arb_ramcfg;
+	u32 db_debug4;
 
-	/* FIXME: implement */
+	/* setup chip specs */
+	switch (rdev->family) {
+	case CHIP_RV770:
+		rdev->config.rv770.max_pipes = 4;
+		rdev->config.rv770.max_tile_pipes = 8;
+		rdev->config.rv770.max_simds = 10;
+		rdev->config.rv770.max_backends = 4;
+		rdev->config.rv770.max_gprs = 256;
+		rdev->config.rv770.max_threads = 248;
+		rdev->config.rv770.max_stack_entries = 512;
+		rdev->config.rv770.max_hw_contexts = 8;
+		rdev->config.rv770.max_gs_threads = 16 * 2;
+		rdev->config.rv770.sx_max_export_size = 128;
+		rdev->config.rv770.sx_max_export_pos_size = 16;
+		rdev->config.rv770.sx_max_export_smx_size = 112;
+		rdev->config.rv770.sq_num_cf_insts = 2;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0xF9;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+		break;
+	case CHIP_RV730:
+		rdev->config.rv770.max_pipes = 2;
+		rdev->config.rv770.max_tile_pipes = 4;
+		rdev->config.rv770.max_simds = 8;
+		rdev->config.rv770.max_backends = 2;
+		rdev->config.rv770.max_gprs = 128;
+		rdev->config.rv770.max_threads = 248;
+		rdev->config.rv770.max_stack_entries = 256;
+		rdev->config.rv770.max_hw_contexts = 8;
+		rdev->config.rv770.max_gs_threads = 16 * 2;
+		rdev->config.rv770.sx_max_export_size = 256;
+		rdev->config.rv770.sx_max_export_pos_size = 32;
+		rdev->config.rv770.sx_max_export_smx_size = 224;
+		rdev->config.rv770.sq_num_cf_insts = 2;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0xf9;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
+			rdev->config.rv770.sx_max_export_pos_size -= 16;
+			rdev->config.rv770.sx_max_export_smx_size += 16;
+		}
+		break;
+	case CHIP_RV710:
+		rdev->config.rv770.max_pipes = 2;
+		rdev->config.rv770.max_tile_pipes = 2;
+		rdev->config.rv770.max_simds = 2;
+		rdev->config.rv770.max_backends = 1;
+		rdev->config.rv770.max_gprs = 256;
+		rdev->config.rv770.max_threads = 192;
+		rdev->config.rv770.max_stack_entries = 256;
+		rdev->config.rv770.max_hw_contexts = 4;
+		rdev->config.rv770.max_gs_threads = 8 * 2;
+		rdev->config.rv770.sx_max_export_size = 128;
+		rdev->config.rv770.sx_max_export_pos_size = 16;
+		rdev->config.rv770.sx_max_export_smx_size = 112;
+		rdev->config.rv770.sq_num_cf_insts = 1;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0x40;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+		break;
+	case CHIP_RV740:
+		rdev->config.rv770.max_pipes = 4;
+		rdev->config.rv770.max_tile_pipes = 4;
+		rdev->config.rv770.max_simds = 8;
+		rdev->config.rv770.max_backends = 4;
+		rdev->config.rv770.max_gprs = 256;
+		rdev->config.rv770.max_threads = 248;
+		rdev->config.rv770.max_stack_entries = 512;
+		rdev->config.rv770.max_hw_contexts = 8;
+		rdev->config.rv770.max_gs_threads = 16 * 2;
+		rdev->config.rv770.sx_max_export_size = 256;
+		rdev->config.rv770.sx_max_export_pos_size = 32;
+		rdev->config.rv770.sx_max_export_smx_size = 224;
+		rdev->config.rv770.sq_num_cf_insts = 2;
+
+		rdev->config.rv770.sx_num_of_sets = 7;
+		rdev->config.rv770.sc_prim_fifo_size = 0x100;
+		rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130;
+
+		if (rdev->config.rv770.sx_max_export_pos_size > 16) {
+			rdev->config.rv770.sx_max_export_pos_size -= 16;
+			rdev->config.rv770.sx_max_export_smx_size += 16;
+		}
+		break;
+	default:
+		break;
+	}
+
+	/* Initialize HDP */
+	j = 0;
+	for (i = 0; i < 32; i++) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+		j += 0x18;
+	}
+
+	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+	/* setup tiling, simd, pipe config */
+	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
+
+	switch (rdev->config.rv770.max_tile_pipes) {
+	case 1:
+		gb_tiling_config |= PIPE_TILING(0);
+		break;
+	case 2:
+		gb_tiling_config |= PIPE_TILING(1);
+		break;
+	case 4:
+		gb_tiling_config |= PIPE_TILING(2);
+		break;
+	case 8:
+		gb_tiling_config |= PIPE_TILING(3);
+		break;
+	default:
+		break;
+	}
+
+	if (rdev->family == CHIP_RV770)
+		gb_tiling_config |= BANK_TILING(1);
+	else
+		gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_SHIFT) >> NOOFBANK_MASK);
+
+	gb_tiling_config |= GROUP_SIZE(0);
+
+	if (((mc_arb_ramcfg & NOOFROWS_MASK) & NOOFROWS_SHIFT) > 3) {
+		gb_tiling_config |= ROW_TILING(3);
+		gb_tiling_config |= SAMPLE_SPLIT(3);
+	} else {
+		gb_tiling_config |=
+			ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
+		gb_tiling_config |=
+			SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT));
+	}
+
+	gb_tiling_config |= BANK_SWAPS(1);
+
+	backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes,
+							rdev->config.rv770.max_backends,
+							(0xff << rdev->config.rv770.max_backends) & 0xff);
+	gb_tiling_config |= BACKEND_MAP(backend_map);
+
+	cc_gc_shader_pipe_config =
+		INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK);
+	cc_gc_shader_pipe_config |=
+		INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK);
+
+	cc_rb_backend_disable =
+		BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK);
+
+	WREG32(GB_TILING_CONFIG, gb_tiling_config);
+	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+
+	WREG32(CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
+	WREG32(CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
+	WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
+
+	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+	WREG32(CGTS_SYS_TCC_DISABLE, 0);
+	WREG32(CGTS_TCC_DISABLE, 0);
+	WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
+	WREG32(CGTS_USER_TCC_DISABLE, 0);
+
+	num_qd_pipes =
+		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK);
+	WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
+	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
+
+	/* set HW defaults for 3D engine */
+	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
+						ROQ_IB2_START(0x2b)));
+
+	WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
+
+	WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO |
+					SYNC_GRADIENT |
+					SYNC_WALKER |
+					SYNC_ALIGNER));
+
+	sx_debug_1 = RREG32(SX_DEBUG_1);
+	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
+	WREG32(SX_DEBUG_1, sx_debug_1);
+
+	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
+	smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff);
+	smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
+	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
+
+	WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
+					  GS_FLUSH_CTL(4) |
+					  ACK_FLUSH_CTL(3) |
+					  SYNC_FLUSH_CTL));
+
+	if (rdev->family == CHIP_RV770)
+		WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f));
+	else {
+		db_debug4 = RREG32(DB_DEBUG4);
+		db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
+		WREG32(DB_DEBUG4, db_debug4);
+	}
+
+	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) |
+						   POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) |
+						   SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1)));
+
+	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) |
+						 SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) |
+						 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize)));
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+
+	WREG32(VGT_NUM_INSTANCES, 1);
+
+	WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
+
+	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
+
+	WREG32(CP_PERFMON_CNTL, 0);
+
+	sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) |
+			    DONE_FIFO_HIWATER(0xe0) |
+			    ALU_UPDATE_FIFO_HIWATER(0x8));
+	switch (rdev->family) {
+	case CHIP_RV770:
+		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
+		break;
+	case CHIP_RV730:
+	case CHIP_RV710:
+	case CHIP_RV740:
+	default:
+		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
+		break;
+	}
+	WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes);
+
+	/* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
+	 * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
+	 */
+	sq_config = RREG32(SQ_CONFIG);
+	sq_config &= ~(PS_PRIO(3) |
+		       VS_PRIO(3) |
+		       GS_PRIO(3) |
+		       ES_PRIO(3));
+	sq_config |= (DX9_CONSTS |
+		      VC_ENABLE |
+		      EXPORT_SRC_C |
+		      PS_PRIO(0) |
+		      VS_PRIO(1) |
+		      GS_PRIO(2) |
+		      ES_PRIO(3));
+	if (rdev->family == CHIP_RV710)
+		/* no vertex cache */
+		sq_config &= ~VC_ENABLE;
+
+	WREG32(SQ_CONFIG, sq_config);
+
+	WREG32(SQ_GPR_RESOURCE_MGMT_1,  (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
+						    NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) |
+						    NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2)));
+
+	WREG32(SQ_GPR_RESOURCE_MGMT_2,  (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) |
+						    NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64)));
+
+	sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) |
+				   NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) |
+				   NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8));
+	if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads)
+		sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads);
+	else
+		sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8);
+	WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
+
+	WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
+						     NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
+
+	WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) |
+						     NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4)));
+
+	sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) |
+				     SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) |
+				     SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) |
+				     SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64));
+
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0);
+	WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0);
+
+	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
+						     FORCE_EOV_MAX_REZ_CNT(255)));
+
+	if (rdev->family == CHIP_RV710)
+		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) |
+							   AUTO_INVLD_EN(ES_AND_GS_AUTO)));
+	else
+		WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) |
+							   AUTO_INVLD_EN(ES_AND_GS_AUTO)));
+
+	switch (rdev->family) {
+	case CHIP_RV770:
+	case CHIP_RV730:
+	case CHIP_RV740:
+		gs_prim_buffer_depth = 384;
+		break;
+	case CHIP_RV710:
+		gs_prim_buffer_depth = 128;
+		break;
+	default:
+		break;
+	}
+
+	num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16;
+	vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread;
+	/* Max value for this is 256 */
+	if (vgt_gs_per_es > 256)
+		vgt_gs_per_es = 256;
+
+	WREG32(VGT_ES_PER_GS, 128);
+	WREG32(VGT_GS_PER_ES, vgt_gs_per_es);
+	WREG32(VGT_GS_PER_VS, 2);
+
+	/* more default values. 2D/3D driver should adjust as needed */
+	WREG32(VGT_GS_VERTEX_REUSE, 16);
+	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+	WREG32(VGT_STRMOUT_EN, 0);
+	WREG32(SX_MISC, 0);
+	WREG32(PA_SC_MODE_CNTL, 0);
+	WREG32(PA_SC_EDGERULE, 0xaaaaaaaa);
+	WREG32(PA_SC_AA_CONFIG, 0);
+	WREG32(PA_SC_CLIPRECT_RULE, 0xffff);
+	WREG32(PA_SC_LINE_STIPPLE, 0);
+	WREG32(SPI_INPUT_Z, 0);
+	WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
+	WREG32(CB_COLOR7_FRAG, 0);
+
+	/* clear render buffer base addresses */
+	WREG32(CB_COLOR0_BASE, 0);
+	WREG32(CB_COLOR1_BASE, 0);
+	WREG32(CB_COLOR2_BASE, 0);
+	WREG32(CB_COLOR3_BASE, 0);
+	WREG32(CB_COLOR4_BASE, 0);
+	WREG32(CB_COLOR5_BASE, 0);
+	WREG32(CB_COLOR6_BASE, 0);
+	WREG32(CB_COLOR7_BASE, 0);
+
+	WREG32(TCP_CNTL, 0);
+
+	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
+	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
+
+	WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+
+	WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
+					  NUM_CLIP_SEQ(3)));
+
+}
+
+int rv770_mc_init(struct radeon_device *rdev)
+{
+	fixed20_12 a;
+	u32 tmp;
+	int r;
+
+	/* Get VRAM informations */
+	/* FIXME: Don't know how to determine vram width, need to check
+	 * vram_width usage
+	 */
+	rdev->mc.vram_width = 128;
+	rdev->mc.vram_is_ddr = true;
 	/* Could aper size report 0 ? */
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* Setup GPU memory space */
+	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r)
+			return r;
+		/* gtt_size is setup by radeon_agp_init */
+		rdev->mc.gtt_location = rdev->mc.agp_base;
+		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+		/* Try to put vram before or after AGP because we
+		 * we want SYSTEM_APERTURE to cover both VRAM and
+		 * AGP so that GPU can catch out of VRAM/AGP access
+		 */
+		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+			/* Enought place before */
+			rdev->mc.vram_location = rdev->mc.gtt_location -
+							rdev->mc.mc_vram_size;
+		} else if (tmp > rdev->mc.mc_vram_size) {
+			/* Enought place after */
+			rdev->mc.vram_location = rdev->mc.gtt_location +
+							rdev->mc.gtt_size;
+		} else {
+			/* Try to setup VRAM then AGP might not
+			 * not work on some card
+			 */
+			rdev->mc.vram_location = 0x00000000UL;
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		}
+	} else {
+		rdev->mc.vram_location = 0x00000000UL;
+		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+	}
+	rdev->mc.vram_start = rdev->mc.vram_location;
+	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+	rdev->mc.gtt_start = rdev->mc.gtt_location;
+	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size;
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	return 0;
+}
+int rv770_gpu_reset(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int rv770_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	rv770_mc_resume(rdev);
+	r = rv770_pcie_gart_enable(rdev);
+	if (r)
+		return r;
+	rv770_gpu_init(rdev);
+	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	if (r)
+		return r;
+	r = rv770_cp_load_microcode(rdev);
+	if (r)
+		return r;
+	r = r600_cp_resume(rdev);
+	if (r)
+		return r;
+	r = r600_wb_init(rdev);
+	if (r)
+		return r;
+	return 0;
+}
+
+int rv770_suspend(struct radeon_device *rdev)
+{
+	/* FIXME: we should wait for ring to be empty */
+	r700_cp_stop(rdev);
+	return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int rv770_init(struct radeon_device *rdev)
+{
+	int r;
+
+	rdev->new_init_path = true;
+	r = radeon_dummy_page_init(rdev);
+	if (r)
+		return r;
+	/* This don't do much */
+	r = radeon_gem_init(rdev);
+	if (r)
+		return r;
+	/* Read BIOS */
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	/* Must be an ATOMBIOS */
+	if (!rdev->is_atom_bios)
+		return -EINVAL;
+	r = radeon_atombios_init(rdev);
+	if (r)
+		return r;
+	/* Post card if necessary */
+	if (!r600_card_posted(rdev) && rdev->bios) {
+		DRM_INFO("GPU not posted. posting now...\n");
+		atom_asic_init(rdev->mode_info.atom_context);
+	}
+	/* Initialize scratch registers */
+	r600_scratch_init(rdev);
+	/* Initialize surface registers */
+	radeon_surface_init(rdev);
+	r = radeon_clocks_init(rdev);
+	if (r)
+		return r;
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r)
+		return r;
+	r = rv770_mc_init(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			rv770_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return rv770_init(rdev);
+		}
+		return r;
+	}
+	/* Memory manager */
+	r = radeon_object_init(rdev);
+	if (r)
+		return r;
+	rdev->cp.ring_obj = NULL;
+	r600_ring_init(rdev, 1024 * 1024);
+
+	if (!rdev->me_fw || !rdev->pfp_fw) {
+		r = r600_cp_init_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load firmware!\n");
+			return r;
+		}
+	}
+
+	r = rv770_resume(rdev);
+	if (r) {
+		if (rdev->flags & RADEON_IS_AGP) {
+			/* Retry with disabling AGP */
+			rv770_fini(rdev);
+			rdev->flags &= ~RADEON_IS_AGP;
+			return rv770_init(rdev);
+		}
+		return r;
+	}
+	r = r600_blit_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled blitter (%d).\n", r);
+		return r;
+	}
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+		return r;
+	}
+	r = radeon_ib_test(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			return r;
+	}
+	return 0;
+}
+
+void rv770_fini(struct radeon_device *rdev)
+{
+	r600_blit_fini(rdev);
+	radeon_ring_fini(rdev);
+	rv770_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+	radeon_gem_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_clocks_fini(rdev);
+#if __OS_HAS_AGP
+	if (rdev->flags & RADEON_IS_AGP)
+		radeon_agp_fini(rdev);
+#endif
+	radeon_object_fini(rdev);
+	if (rdev->is_atom_bios) {
+		radeon_atombios_fini(rdev);
+	} else {
+		radeon_combios_fini(rdev);
+	}
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	radeon_dummy_page_fini(rdev);
 }
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
new file mode 100644
index 0000000..4b9c3d6
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef RV770_H
+#define RV770_H
+
+#define R7XX_MAX_SH_GPRS           256
+#define R7XX_MAX_TEMP_GPRS         16
+#define R7XX_MAX_SH_THREADS        256
+#define R7XX_MAX_SH_STACK_ENTRIES  4096
+#define R7XX_MAX_BACKENDS          8
+#define R7XX_MAX_BACKENDS_MASK     0xff
+#define R7XX_MAX_SIMDS             16
+#define R7XX_MAX_SIMDS_MASK        0xffff
+#define R7XX_MAX_PIPES             8
+#define R7XX_MAX_PIPES_MASK        0xff
+
+/* Registers */
+#define	CB_COLOR0_BASE					0x28040
+#define	CB_COLOR1_BASE					0x28044
+#define	CB_COLOR2_BASE					0x28048
+#define	CB_COLOR3_BASE					0x2804C
+#define	CB_COLOR4_BASE					0x28050
+#define	CB_COLOR5_BASE					0x28054
+#define	CB_COLOR6_BASE					0x28058
+#define	CB_COLOR7_BASE					0x2805C
+#define	CB_COLOR7_FRAG					0x280FC
+
+#define	CC_GC_SHADER_PIPE_CONFIG			0x8950
+#define	CC_RB_BACKEND_DISABLE				0x98F4
+#define		BACKEND_DISABLE(x)				((x) << 16)
+#define	CC_SYS_RB_BACKEND_DISABLE			0x3F88
+
+#define	CGTS_SYS_TCC_DISABLE				0x3F90
+#define	CGTS_TCC_DISABLE				0x9148
+#define	CGTS_USER_SYS_TCC_DISABLE			0x3F94
+#define	CGTS_USER_TCC_DISABLE				0x914C
+
+#define	CONFIG_MEMSIZE					0x5428
+
+#define	CP_ME_CNTL					0x86D8
+#define		CP_ME_HALT					(1<<28)
+#define		CP_PFP_HALT					(1<<26)
+#define	CP_ME_RAM_DATA					0xC160
+#define	CP_ME_RAM_RADDR					0xC158
+#define	CP_ME_RAM_WADDR					0xC15C
+#define CP_MEQ_THRESHOLDS				0x8764
+#define		STQ_SPLIT(x)					((x) << 0)
+#define	CP_PERFMON_CNTL					0x87FC
+#define	CP_PFP_UCODE_ADDR				0xC150
+#define	CP_PFP_UCODE_DATA				0xC154
+#define	CP_QUEUE_THRESHOLDS				0x8760
+#define		ROQ_IB1_START(x)				((x) << 0)
+#define		ROQ_IB2_START(x)				((x) << 8)
+#define	CP_RB_CNTL					0xC104
+#define		RB_BUFSZ(x)					((x)<<0)
+#define		RB_BLKSZ(x)					((x)<<8)
+#define		RB_NO_UPDATE					(1<<27)
+#define		RB_RPTR_WR_ENA					(1<<31)
+#define		BUF_SWAP_32BIT					(2 << 16)
+#define	CP_RB_RPTR					0x8700
+#define	CP_RB_RPTR_ADDR					0xC10C
+#define	CP_RB_RPTR_ADDR_HI				0xC110
+#define	CP_RB_RPTR_WR					0xC108
+#define	CP_RB_WPTR					0xC114
+#define	CP_RB_WPTR_ADDR					0xC118
+#define	CP_RB_WPTR_ADDR_HI				0xC11C
+#define	CP_RB_WPTR_DELAY				0x8704
+#define	CP_SEM_WAIT_TIMER				0x85BC
+
+#define	DB_DEBUG3					0x98B0
+#define		DB_CLK_OFF_DELAY(x)				((x) << 11)
+#define DB_DEBUG4					0x9B8C
+#define		DISABLE_TILE_COVERED_FOR_PS_ITER		(1 << 6)
+
+#define	DCP_TILING_CONFIG				0x6CA0
+#define		PIPE_TILING(x)					((x) << 1)
+#define 	BANK_TILING(x)					((x) << 4)
+#define		GROUP_SIZE(x)					((x) << 6)
+#define		ROW_TILING(x)					((x) << 8)
+#define		BANK_SWAPS(x)					((x) << 11)
+#define		SAMPLE_SPLIT(x)					((x) << 14)
+#define		BACKEND_MAP(x)					((x) << 16)
+
+#define GB_TILING_CONFIG				0x98F0
+
+#define	GC_USER_SHADER_PIPE_CONFIG			0x8954
+#define		INACTIVE_QD_PIPES(x)				((x) << 8)
+#define		INACTIVE_QD_PIPES_MASK				0x0000FF00
+#define		INACTIVE_SIMDS(x)				((x) << 16)
+#define		INACTIVE_SIMDS_MASK				0x00FF0000
+
+#define	GRBM_CNTL					0x8000
+#define		GRBM_READ_TIMEOUT(x)				((x) << 0)
+#define	GRBM_SOFT_RESET					0x8020
+#define		SOFT_RESET_CP					(1<<0)
+#define	GRBM_STATUS					0x8010
+#define		CMDFIFO_AVAIL_MASK				0x0000000F
+#define		GUI_ACTIVE					(1<<31)
+#define	GRBM_STATUS2					0x8014
+
+#define	HDP_HOST_PATH_CNTL				0x2C00
+#define	HDP_NONSURFACE_BASE				0x2C04
+#define	HDP_NONSURFACE_INFO				0x2C08
+#define	HDP_NONSURFACE_SIZE				0x2C0C
+#define HDP_REG_COHERENCY_FLUSH_CNTL			0x54A0
+#define	HDP_TILING_CONFIG				0x2F3C
+
+#define	MC_ARB_RAMCFG					0x2760
+#define		NOOFBANK_SHIFT					0
+#define		NOOFBANK_MASK					0x00000003
+#define		NOOFRANK_SHIFT					2
+#define		NOOFRANK_MASK					0x00000004
+#define		NOOFROWS_SHIFT					3
+#define		NOOFROWS_MASK					0x00000038
+#define		NOOFCOLS_SHIFT					6
+#define		NOOFCOLS_MASK					0x000000C0
+#define		CHANSIZE_SHIFT					8
+#define		CHANSIZE_MASK					0x00000100
+#define		BURSTLENGTH_SHIFT				9
+#define		BURSTLENGTH_MASK				0x00000200
+#define	MC_VM_AGP_TOP					0x2028
+#define	MC_VM_AGP_BOT					0x202C
+#define	MC_VM_AGP_BASE					0x2030
+#define	MC_VM_FB_LOCATION				0x2024
+#define	MC_VM_MB_L1_TLB0_CNTL				0x2234
+#define	MC_VM_MB_L1_TLB1_CNTL				0x2238
+#define	MC_VM_MB_L1_TLB2_CNTL				0x223C
+#define	MC_VM_MB_L1_TLB3_CNTL				0x2240
+#define		ENABLE_L1_TLB					(1 << 0)
+#define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
+#define		SYSTEM_ACCESS_MODE_PA_ONLY			(0 << 3)
+#define		SYSTEM_ACCESS_MODE_USE_SYS_MAP			(1 << 3)
+#define		SYSTEM_ACCESS_MODE_IN_SYS			(2 << 3)
+#define		SYSTEM_ACCESS_MODE_NOT_IN_SYS			(3 << 3)
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 5)
+#define		EFFECTIVE_L1_TLB_SIZE(x)			((x)<<15)
+#define		EFFECTIVE_L1_QUEUE_SIZE(x)			((x)<<18)
+#define	MC_VM_MD_L1_TLB0_CNTL				0x2654
+#define	MC_VM_MD_L1_TLB1_CNTL				0x2658
+#define	MC_VM_MD_L1_TLB2_CNTL				0x265C
+#define	MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x203C
+#define	MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2038
+#define	MC_VM_SYSTEM_APERTURE_LOW_ADDR			0x2034
+
+#define	PA_CL_ENHANCE					0x8A14
+#define		CLIP_VTX_REORDER_ENA				(1 << 0)
+#define		NUM_CLIP_SEQ(x)					((x) << 1)
+#define PA_SC_AA_CONFIG					0x28C04
+#define PA_SC_CLIPRECT_RULE				0x2820C
+#define	PA_SC_EDGERULE					0x28230
+#define	PA_SC_FIFO_SIZE					0x8BCC
+#define		SC_PRIM_FIFO_SIZE(x)				((x) << 0)
+#define		SC_HIZ_TILE_FIFO_SIZE(x)			((x) << 12)
+#define	PA_SC_FORCE_EOV_MAX_CNTS			0x8B24
+#define		FORCE_EOV_MAX_CLK_CNT(x)			((x)<<0)
+#define		FORCE_EOV_MAX_REZ_CNT(x)			((x)<<16)
+#define PA_SC_LINE_STIPPLE				0x28A0C
+#define	PA_SC_LINE_STIPPLE_STATE			0x8B10
+#define PA_SC_MODE_CNTL					0x28A4C
+#define	PA_SC_MULTI_CHIP_CNTL				0x8B20
+#define		SC_EARLYZ_TILE_FIFO_SIZE(x)			((x) << 20)
+
+#define	SCRATCH_REG0					0x8500
+#define	SCRATCH_REG1					0x8504
+#define	SCRATCH_REG2					0x8508
+#define	SCRATCH_REG3					0x850C
+#define	SCRATCH_REG4					0x8510
+#define	SCRATCH_REG5					0x8514
+#define	SCRATCH_REG6					0x8518
+#define	SCRATCH_REG7					0x851C
+#define	SCRATCH_UMSK					0x8540
+#define	SCRATCH_ADDR					0x8544
+
+#define	SMX_DC_CTL0					0xA020
+#define		USE_HASH_FUNCTION				(1 << 0)
+#define		CACHE_DEPTH(x)					((x) << 1)
+#define		FLUSH_ALL_ON_EVENT				(1 << 10)
+#define		STALL_ON_EVENT					(1 << 11)
+#define	SMX_EVENT_CTL					0xA02C
+#define		ES_FLUSH_CTL(x)					((x) << 0)
+#define		GS_FLUSH_CTL(x)					((x) << 3)
+#define		ACK_FLUSH_CTL(x)				((x) << 6)
+#define		SYNC_FLUSH_CTL					(1 << 8)
+
+#define	SPI_CONFIG_CNTL					0x9100
+#define		GPR_WRITE_PRIORITY(x)				((x) << 0)
+#define		DISABLE_INTERP_1				(1 << 5)
+#define	SPI_CONFIG_CNTL_1				0x913C
+#define		VTX_DONE_DELAY(x)				((x) << 0)
+#define		INTERP_ONE_PRIM_PER_ROW				(1 << 4)
+#define	SPI_INPUT_Z					0x286D8
+#define	SPI_PS_IN_CONTROL_0				0x286CC
+#define		NUM_INTERP(x)					((x)<<0)
+#define		POSITION_ENA					(1<<8)
+#define		POSITION_CENTROID				(1<<9)
+#define		POSITION_ADDR(x)				((x)<<10)
+#define		PARAM_GEN(x)					((x)<<15)
+#define		PARAM_GEN_ADDR(x)				((x)<<19)
+#define		BARYC_SAMPLE_CNTL(x)				((x)<<26)
+#define		PERSP_GRADIENT_ENA				(1<<28)
+#define		LINEAR_GRADIENT_ENA				(1<<29)
+#define		POSITION_SAMPLE					(1<<30)
+#define		BARYC_AT_SAMPLE_ENA				(1<<31)
+
+#define	SQ_CONFIG					0x8C00
+#define		VC_ENABLE					(1 << 0)
+#define		EXPORT_SRC_C					(1 << 1)
+#define		DX9_CONSTS					(1 << 2)
+#define		ALU_INST_PREFER_VECTOR				(1 << 3)
+#define		DX10_CLAMP					(1 << 4)
+#define		CLAUSE_SEQ_PRIO(x)				((x) << 8)
+#define		PS_PRIO(x)					((x) << 24)
+#define		VS_PRIO(x)					((x) << 26)
+#define		GS_PRIO(x)					((x) << 28)
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_0			0x8DB0
+#define		SIMDA_RING0(x)					((x)<<0)
+#define		SIMDA_RING1(x)					((x)<<8)
+#define		SIMDB_RING0(x)					((x)<<16)
+#define		SIMDB_RING1(x)					((x)<<24)
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_1			0x8DB4
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_2			0x8DB8
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_3			0x8DBC
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_4			0x8DC0
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_5			0x8DC4
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_6			0x8DC8
+#define	SQ_DYN_GPR_SIZE_SIMD_AB_7			0x8DCC
+#define		ES_PRIO(x)					((x) << 30)
+#define	SQ_GPR_RESOURCE_MGMT_1				0x8C04
+#define		NUM_PS_GPRS(x)					((x) << 0)
+#define		NUM_VS_GPRS(x)					((x) << 16)
+#define		DYN_GPR_ENABLE					(1 << 27)
+#define		NUM_CLAUSE_TEMP_GPRS(x)				((x) << 28)
+#define	SQ_GPR_RESOURCE_MGMT_2				0x8C08
+#define		NUM_GS_GPRS(x)					((x) << 0)
+#define		NUM_ES_GPRS(x)					((x) << 16)
+#define	SQ_MS_FIFO_SIZES				0x8CF0
+#define		CACHE_FIFO_SIZE(x)				((x) << 0)
+#define		FETCH_FIFO_HIWATER(x)				((x) << 8)
+#define		DONE_FIFO_HIWATER(x)				((x) << 16)
+#define		ALU_UPDATE_FIFO_HIWATER(x)			((x) << 24)
+#define	SQ_STACK_RESOURCE_MGMT_1			0x8C10
+#define		NUM_PS_STACK_ENTRIES(x)				((x) << 0)
+#define		NUM_VS_STACK_ENTRIES(x)				((x) << 16)
+#define	SQ_STACK_RESOURCE_MGMT_2			0x8C14
+#define		NUM_GS_STACK_ENTRIES(x)				((x) << 0)
+#define		NUM_ES_STACK_ENTRIES(x)				((x) << 16)
+#define	SQ_THREAD_RESOURCE_MGMT				0x8C0C
+#define		NUM_PS_THREADS(x)				((x) << 0)
+#define		NUM_VS_THREADS(x)				((x) << 8)
+#define		NUM_GS_THREADS(x)				((x) << 16)
+#define		NUM_ES_THREADS(x)				((x) << 24)
+
+#define	SX_DEBUG_1					0x9058
+#define		ENABLE_NEW_SMX_ADDRESS				(1 << 16)
+#define	SX_EXPORT_BUFFER_SIZES				0x900C
+#define		COLOR_BUFFER_SIZE(x)				((x) << 0)
+#define		POSITION_BUFFER_SIZE(x)				((x) << 8)
+#define		SMX_BUFFER_SIZE(x)				((x) << 16)
+#define	SX_MISC						0x28350
+
+#define	TA_CNTL_AUX					0x9508
+#define		DISABLE_CUBE_WRAP				(1 << 0)
+#define		DISABLE_CUBE_ANISO				(1 << 1)
+#define		SYNC_GRADIENT					(1 << 24)
+#define		SYNC_WALKER					(1 << 25)
+#define		SYNC_ALIGNER					(1 << 26)
+#define		BILINEAR_PRECISION_6_BIT			(0 << 31)
+#define		BILINEAR_PRECISION_8_BIT			(1 << 31)
+
+#define	TCP_CNTL					0x9610
+
+#define	VGT_CACHE_INVALIDATION				0x88C4
+#define		CACHE_INVALIDATION(x)				((x)<<0)
+#define			VC_ONLY						0
+#define			TC_ONLY						1
+#define			VC_AND_TC					2
+#define		AUTO_INVLD_EN(x)				((x) << 6)
+#define			NO_AUTO						0
+#define			ES_AUTO						1
+#define			GS_AUTO						2
+#define			ES_AND_GS_AUTO					3
+#define	VGT_ES_PER_GS					0x88CC
+#define	VGT_GS_PER_ES					0x88C8
+#define	VGT_GS_PER_VS					0x88E8
+#define	VGT_GS_VERTEX_REUSE				0x88D4
+#define	VGT_NUM_INSTANCES				0x8974
+#define	VGT_OUT_DEALLOC_CNTL				0x28C5C
+#define		DEALLOC_DIST_MASK				0x0000007F
+#define	VGT_STRMOUT_EN					0x28AB0
+#define	VGT_VERTEX_REUSE_BLOCK_CNTL			0x28C58
+#define		VTX_REUSE_DEPTH_MASK				0x000000FF
+
+#define VM_CONTEXT0_CNTL				0x1410
+#define		ENABLE_CONTEXT					(1 << 0)
+#define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
+#define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
+#define	VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x153C
+#define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x157C
+#define	VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x155C
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x1518
+#define VM_L2_CNTL					0x1400
+#define		ENABLE_L2_CACHE					(1 << 0)
+#define		ENABLE_L2_FRAGMENT_PROCESSING			(1 << 1)
+#define		ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE		(1 << 9)
+#define		EFFECTIVE_L2_QUEUE_SIZE(x)			(((x) & 7) << 14)
+#define VM_L2_CNTL2					0x1404
+#define		INVALIDATE_ALL_L1_TLBS				(1 << 0)
+#define		INVALIDATE_L2_CACHE				(1 << 1)
+#define VM_L2_CNTL3					0x1408
+#define		BANK_SELECT(x)					((x) << 0)
+#define		CACHE_UPDATE_MODE(x)				((x) << 6)
+#define	VM_L2_STATUS					0x140C
+#define		L2_BUSY						(1 << 0)
+
+#define	WAIT_UNTIL					0x8040
+
+#endif