drm/radeon: record what is next valid wptr for each ring v4

Before emitting any indirect buffer, emit the offset of the next
valid ring content if any. This allow code that want to resume
ring to resume ring right after ib that caused GPU lockup.

v2: use scratch registers instead of storing it into memory
v3: skip over the surface sync for ni and si as well
v4: use SET_CONFIG_REG instead of PACKET0

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c808fa9..3156d25 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2155,18 +2155,27 @@
 void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size)
 {
 	u32 rb_bufsz;
+	int r;
 
 	/* Align ring size */
 	rb_bufsz = drm_order(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
 	ring->ring_size = ring_size;
 	ring->align_mask = 16 - 1;
+
+	r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
+	if (r) {
+		DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
+		ring->rptr_save_reg = 0;
+	}
 }
 
 void r600_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r600_cp_stop(rdev);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 
@@ -2568,7 +2577,14 @@
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
 
-	/* FIXME: implement */
+	if (ring->rptr_save_reg) {
+		uint32_t next_rptr = ring->wptr + 3 + 4;
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, ((ring->rptr_save_reg -
+					 PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN