drm/radeon: Add asic hook for dma copy to r200 cards.

r200 cards have dma engine which can be used to tranfer data
between vram and system memory.

r300 dma engine registers match r200 dma engine. Enabling
dma copy for r200 is simple as hooking r200 asic to already
existing function r300_copy_dma.

Rename r300_dma_copy to r200_dma_copyto reflect that supports
starts from r200 cards.

v2: Created a new asic object for r200 cards.

Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index ff1e0cd..1146c99 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -31,6 +31,7 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 
+#include "r100d.h"
 #include "r200_reg_safe.h"
 
 #include "r100_track.h"
@@ -79,6 +80,51 @@
 	return vtx_size;
 }
 
+int r200_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence)
+{
+	uint32_t size;
+	uint32_t cur_size;
+	int i, num_loops;
+	int r = 0;
+
+	/* radeon pitch is /64 */
+	size = num_pages << PAGE_SHIFT;
+	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
+	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+	/* Must wait for 2D idle & clean before DMA or hangs might happen */
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, (1 << 16));
+	for (i = 0; i < num_loops; i++) {
+		cur_size = size;
+		if (cur_size > 0x1FFFFF) {
+			cur_size = 0x1FFFFF;
+		}
+		size -= cur_size;
+		radeon_ring_write(rdev, PACKET0(0x720, 2));
+		radeon_ring_write(rdev, src_offset);
+		radeon_ring_write(rdev, dst_offset);
+		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
+		src_offset += cur_size;
+		dst_offset += cur_size;
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
+	if (fence) {
+		r = radeon_fence_emit(rdev, fence);
+	}
+	radeon_ring_unlock_commit(rdev);
+	return r;
+}
+
+
 static int r200_get_vtx_size_1(uint32_t vtx_fmt_1)
 {
 	int vtx_size, i, tex_size;
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 654aca1..a4f3952 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -201,50 +201,6 @@
 	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
 }
 
-int r300_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset,
-		  uint64_t dst_offset,
-		  unsigned num_pages,
-		  struct radeon_fence *fence)
-{
-	uint32_t size;
-	uint32_t cur_size;
-	int i, num_loops;
-	int r = 0;
-
-	/* radeon pitch is /64 */
-	size = num_pages << PAGE_SHIFT;
-	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
-	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
-	/* Must wait for 2D idle & clean before DMA or hangs might happen */
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
-	radeon_ring_write(rdev, RADEON_WAIT_2D_IDLECLEAN);
-	for (i = 0; i < num_loops; i++) {
-		cur_size = size;
-		if (cur_size > 0x1FFFFF) {
-			cur_size = 0x1FFFFF;
-		}
-		size -= cur_size;
-		radeon_ring_write(rdev, PACKET0(0x720, 2));
-		radeon_ring_write(rdev, src_offset);
-		radeon_ring_write(rdev, dst_offset);
-		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
-		src_offset += cur_size;
-		dst_offset += cur_size;
-	}
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
-	if (fence) {
-		r = radeon_fence_emit(rdev, fence);
-	}
-	radeon_ring_unlock_commit(rdev);
-	return r;
-}
-
 void r300_ring_start(struct radeon_device *rdev)
 {
 	unsigned gb_tile_config;
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 9d2a113..b7030d7 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -43,7 +43,7 @@
 void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 
 /*
- * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+ * r100,rv100,rs100,rv200,rs200
  */
 extern int r100_init(struct radeon_device *rdev);
 extern void r100_fini(struct radeon_device *rdev);
@@ -121,6 +121,51 @@
 	.ioctl_wait_idle = NULL,
 };
 
+/*
+ * r200,rv250,rs300,rv280
+ */
+extern int r200_copy_dma(struct radeon_device *rdev,
+			uint64_t src_offset,
+			uint64_t dst_offset,
+			unsigned num_pages,
+			struct radeon_fence *fence);
+static struct radeon_asic r200_asic = {
+	.init = &r100_init,
+	.fini = &r100_fini,
+	.suspend = &r100_suspend,
+	.resume = &r100_resume,
+	.vga_set_state = &r100_vga_set_state,
+	.gpu_reset = &r100_gpu_reset,
+	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
+	.gart_set_page = &r100_pci_gart_set_page,
+	.cp_commit = &r100_cp_commit,
+	.ring_start = &r100_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.get_vblank_counter = &r100_get_vblank_counter,
+	.fence_ring_emit = &r100_fence_ring_emit,
+	.cs_parse = &r100_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r200_copy_dma,
+	.copy = &r100_copy_blit,
+	.get_engine_clock = &radeon_legacy_get_engine_clock,
+	.set_engine_clock = &radeon_legacy_set_engine_clock,
+	.get_memory_clock = &radeon_legacy_get_memory_clock,
+	.set_memory_clock = NULL,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
+	.hpd_init = &r100_hpd_init,
+	.hpd_fini = &r100_hpd_fini,
+	.hpd_sense = &r100_hpd_sense,
+	.hpd_set_polarity = &r100_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
+};
+
 
 /*
  * r300,r350,rv350,rv380
@@ -140,11 +185,7 @@
 extern void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
 extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
-extern int r300_copy_dma(struct radeon_device *rdev,
-			uint64_t src_offset,
-			uint64_t dst_offset,
-			unsigned num_pages,
-			struct radeon_fence *fence);
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.fini = &r300_fini,
@@ -164,7 +205,7 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_legacy_get_engine_clock,
 	.set_engine_clock = &radeon_legacy_set_engine_clock,
@@ -247,7 +288,7 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
@@ -297,7 +338,7 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_legacy_get_engine_clock,
 	.set_engine_clock = &radeon_legacy_set_engine_clock,
@@ -357,7 +398,7 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
@@ -404,8 +445,8 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
-	.copy = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
+	.copy = &r200_copy_dma,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -457,7 +498,7 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
@@ -501,7 +542,7 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 7a30f69..7be3a69 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -340,11 +340,13 @@
 	case CHIP_RS100:
 	case CHIP_RV200:
 	case CHIP_RS200:
+		rdev->asic = &r100_asic;
+		break;
 	case CHIP_R200:
 	case CHIP_RV250:
 	case CHIP_RS300:
 	case CHIP_RV280:
-		rdev->asic = &r100_asic;
+		rdev->asic = &r200_asic;
 		break;
 	case CHIP_R300:
 	case CHIP_R350: