drm/radeon/kms/r600: reduce gpu cache flushing

Only flush the gpu caches before we submit a fence.
This leads to a small performance boost when we take
the extra gpu cache flushes out of the ddx and mesa.
Once this patch is in and the drm version is bumped,
we can remove the flushes from the ddx and drm.

Also, remove the extra cache flushes from the blit
routine.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 9661a46..7c32a23 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1783,6 +1783,13 @@
 			  struct radeon_fence *fence)
 {
 	/* Also consider EVENT_WRITE_EOP.  it handles the interrupts + timestamps + events */
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+	/* wait for 3D idle clean */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
 	/* Emit fence sequence & fire IRQ */
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 	radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index af1c3ca..2d7d16e 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -403,8 +403,6 @@
 	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
 	radeon_ring_write(rdev, dwords);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
-	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
 	/* SQ config */
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
 	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
@@ -581,9 +579,9 @@
 	ring_size = num_loops * dwords_per_loop;
 	/* set default  + shaders */
 	ring_size += 40; /* shaders + def state */
-	ring_size += 7; /* fence emit for VB IB */
+	ring_size += 12; /* fence emit for VB IB */
 	ring_size += 5; /* done copy */
-	ring_size += 7; /* fence emit for done copy */
+	ring_size += 12; /* fence emit for done copy */
 	r = radeon_ring_lock(rdev, ring_size);
 	if (r)
 		return r;
@@ -597,13 +595,6 @@
 {
 	int r;
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
-	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
-	/* wait for 3D idle clean */
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
-
 	if (rdev->r600_blit.vb_ib)
 		r600_vb_ib_put(rdev);
 
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c
index d745e81..a112c59 100644
--- a/drivers/gpu/drm/radeon/r600_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c
@@ -9,11 +9,6 @@
 	0xc0012800,
 	0x80000000,
 	0x80000000,
-	0xc0004600,
-	0x00000016,
-	0xc0016800,
-	0x00000010,
-	0x00028000,
 	0xc0016800,
 	0x00000010,
 	0x00008000,
@@ -531,11 +526,6 @@
 	0xc0012800,
 	0x80000000,
 	0x80000000,
-	0xc0004600,
-	0x00000016,
-	0xc0016800,
-	0x00000010,
-	0x00028000,
 	0xc0016800,
 	0x00000010,
 	0x00008000,