drm/radeon/kms: fence cleanup + more reliable GPU lockup detection V4

This patch cleanup the fence code, it drops the timeout field of
fence as the time to complete each IB is unpredictable and shouldn't
be bound.

The fence cleanup lead to GPU lockup detection improvement, this
patch introduce a callback, allowing to do asic specific test for
lockup detection. In this patch the CP is use as a first indicator
of GPU lockup. If CP doesn't make progress during 1second we assume
we are facing a GPU lockup.

To avoid overhead of testing GPU lockup frequently due to fence
taking time to be signaled we query the lockup callback every
500msec. There is plenty code comment explaining the design & choise
inside the code.

This have been tested mostly on R3XX/R5XX hw, in normal running
destkop (compiz firefox, quake3 running) the lockup callback wasn't
call once (1 hour session). Also tested with forcing GPU lockup and
lockup was reported after the 1s CP activity timeout.

V2 switch to 500ms timeout so GPU lockup get call at least 2 times
   in less than 2sec.
V3 store last jiffies in fence struct so on ERESTART, EBUSY we keep
   track of how long we already wait for a given fence
V4 make sure we got up to date cp read pointer so we don't have
   false positive

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 5509354..a09c062 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -784,7 +784,7 @@
 		dev_info(rdev->dev, "  R_008020_GRBM_SOFT_RESET=0x%08X\n", tmp);
 		WREG32(R_008020_GRBM_SOFT_RESET, tmp);
 		(void)RREG32(R_008020_GRBM_SOFT_RESET);
-		udelay(50);
+		mdelay(1);
 		WREG32(R_008020_GRBM_SOFT_RESET, 0);
 		(void)RREG32(R_008020_GRBM_SOFT_RESET);
 	}
@@ -824,16 +824,16 @@
 	dev_info(rdev->dev, "  R_000E60_SRBM_SOFT_RESET=0x%08X\n", srbm_reset);
 	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
-	udelay(50);
+	mdelay(1);
 	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
 	WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
-	udelay(50);
+	mdelay(1);
 	WREG32(R_000E60_SRBM_SOFT_RESET, 0);
 	(void)RREG32(R_000E60_SRBM_SOFT_RESET);
 	/* Wait a little for things to settle down */
-	udelay(50);
+	mdelay(1);
 	dev_info(rdev->dev, "  R_008010_GRBM_STATUS=0x%08X\n",
 		RREG32(R_008010_GRBM_STATUS));
 	dev_info(rdev->dev, "  R_008014_GRBM_STATUS2=0x%08X\n",
@@ -848,6 +848,32 @@
 	return 0;
 }
 
+bool r600_gpu_is_lockup(struct radeon_device *rdev)
+{
+	u32 srbm_status;
+	u32 grbm_status;
+	u32 grbm_status2;
+	int r;
+
+	srbm_status = RREG32(R_000E50_SRBM_STATUS);
+	grbm_status = RREG32(R_008010_GRBM_STATUS);
+	grbm_status2 = RREG32(R_008014_GRBM_STATUS2);
+	if (!G_008010_GUI_ACTIVE(grbm_status)) {
+		r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp);
+		return false;
+	}
+	/* force CP activities */
+	r = radeon_ring_lock(rdev, 2);
+	if (!r) {
+		/* PACKET2 NOP */
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_write(rdev, 0x80000000);
+		radeon_ring_unlock_commit(rdev);
+	}
+	rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
+	return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp);
+}
+
 int r600_gpu_reset(struct radeon_device *rdev)
 {
 	return r600_gpu_soft_reset(rdev);