drm: Use register writes instead of BITBLT_MULTI packets for buffer swap blits

This takes up two more ring buffer entries per rectangle blitted but makes sure
the blit is performed top to bottom, reducing the likelyhood of tearing.

Signed-off-by: Dave Airlie <airlied@linux.ie>
diff --git a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h
index a3190fc..f45cd7f 100644
--- a/drivers/char/drm/radeon_drv.h
+++ b/drivers/char/drm/radeon_drv.h
@@ -425,6 +425,8 @@
 #define RADEON_RB3D_COLOROFFSET		0x1c40
 #define RADEON_RB3D_COLORPITCH		0x1c48
 
+#define	RADEON_SRC_X_Y			0x1590
+
 #define RADEON_DP_GUI_MASTER_CNTL	0x146c
 #	define RADEON_GMC_SRC_PITCH_OFFSET_CNTL	(1 << 0)
 #	define RADEON_GMC_DST_PITCH_OFFSET_CNTL	(1 << 1)
@@ -442,6 +444,7 @@
 #	define RADEON_ROP3_S			0x00cc0000
 #	define RADEON_ROP3_P			0x00f00000
 #define RADEON_DP_WRITE_MASK		0x16cc
+#define RADEON_SRC_PITCH_OFFSET		0x1428
 #define RADEON_DST_PITCH_OFFSET		0x142c
 #define RADEON_DST_PITCH_OFFSET_C	0x1c80
 #	define RADEON_DST_TILE_LINEAR		(0 << 30)
diff --git a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c
index bb4b2e1..feac5f0 100644
--- a/drivers/char/drm/radeon_state.c
+++ b/drivers/char/drm/radeon_state.c
@@ -1269,9 +1269,9 @@
 
 		DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
 
-		BEGIN_RING(7);
+		BEGIN_RING(9);
 
-		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
+		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 			 RADEON_GMC_BRUSH_NONE |
@@ -1283,6 +1283,7 @@
 
 		/* Make this work even if front & back are flipped:
 		 */
+		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
 		if (dev_priv->current_page == 0) {
 			OUT_RING(dev_priv->back_pitch_offset);
 			OUT_RING(dev_priv->front_pitch_offset);
@@ -1291,6 +1292,7 @@
 			OUT_RING(dev_priv->back_pitch_offset);
 		}
 
+		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
 		OUT_RING((x << 16) | y);
 		OUT_RING((x << 16) | y);
 		OUT_RING((w << 16) | h);