msm: kgsl: Do not crash on context detach timeout failure

If context wait for global timeouts in detach path there is
no use to process its pending commands. Hence to handle context
detach timeout failure, invalidate the context and remove all
the pending commands from that context. So that other context
commands proceed successfully after the context detach timeout
fault recovery.

Change-Id: Ie4ff0ed5d08312d345b248a2404ce085552b0b09
Signed-off-by: Hareesh Gundu <hareeshg@codeaurora.org>
Signed-off-by: Lynus Vaz <lvaz@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 8349a9f..670c2a1 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -208,6 +208,7 @@
 #define ADRENO_IOMMU_PAGE_FAULT BIT(3)
 #define ADRENO_PREEMPT_FAULT BIT(4)
 #define ADRENO_GMU_FAULT BIT(5)
+#define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6)
 
 #define ADRENO_SPTP_PC_CTRL 0
 #define ADRENO_PPD_CTRL     1
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index 6bdc486..6d2e8457 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -1836,7 +1836,8 @@
 	 * because we won't see this cmdobj again
 	 */
 
-	if (fault & ADRENO_TIMEOUT_FAULT)
+	if ((fault & ADRENO_TIMEOUT_FAULT) ||
+				(fault & ADRENO_CTX_DETATCH_TIMEOUT_FAULT))
 		bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
 
 	/*
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index 2615d44..d984c6d 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -304,6 +304,7 @@
 	/* Give the bad news to everybody waiting around */
 	wake_up_all(&drawctxt->waiting);
 	wake_up_all(&drawctxt->wq);
+	wake_up_all(&drawctxt->timeout);
 }
 
 /*
@@ -398,6 +399,7 @@
 	spin_lock_init(&drawctxt->lock);
 	init_waitqueue_head(&drawctxt->wq);
 	init_waitqueue_head(&drawctxt->waiting);
+	init_waitqueue_head(&drawctxt->timeout);
 
 	/* Set the context priority */
 	_set_context_priority(drawctxt);
@@ -520,21 +522,32 @@
 		drawctxt->internal_timestamp, 30 * 1000);
 
 	/*
-	 * If the wait for global fails due to timeout then nothing after this
-	 * point is likely to work very well - Get GPU snapshot and BUG_ON()
-	 * so we can take advantage of the debug tools to figure out what the
-	 * h - e - double hockey sticks happened. If EAGAIN error is returned
+	 * If the wait for global fails due to timeout then mark it as
+	 * context detach timeout fault and schedule dispatcher to kick
+	 * in GPU recovery. For a ADRENO_CTX_DETATCH_TIMEOUT_FAULT we clear
+	 * the policy and invalidate the context. If EAGAIN error is returned
 	 * then recovery will kick in and there will be no more commands in the
-	 * RB pipe from this context which is waht we are waiting for, so ignore
-	 * -EAGAIN error
+	 * RB pipe from this context which is what we are waiting for, so ignore
+	 * -EAGAIN error.
 	 */
 	if (ret && ret != -EAGAIN) {
-		KGSL_DRV_ERR(device, "Wait for global ts=%d type=%d error=%d\n",
-				drawctxt->internal_timestamp,
+		KGSL_DRV_ERR(device,
+				"Wait for global ctx=%d ts=%d type=%d error=%d\n",
+				drawctxt->base.id, drawctxt->internal_timestamp,
 				drawctxt->type, ret);
-		device->force_panic = 1;
-		kgsl_device_snapshot(device, context,
-				adreno_gmu_gpu_fault(adreno_dev));
+
+		adreno_set_gpu_fault(adreno_dev,
+				ADRENO_CTX_DETATCH_TIMEOUT_FAULT);
+		mutex_unlock(&device->mutex);
+
+		/* Schedule dispatcher to kick in recovery */
+		adreno_dispatcher_schedule(device);
+
+		/* Wait for context to be invalidated and release context */
+		wait_event_interruptible_timeout(drawctxt->timeout,
+					kgsl_context_invalid(&drawctxt->base),
+					msecs_to_jiffies(5000));
+		return;
 	}
 
 	kgsl_sharedmem_writel(device, &device->memstore,
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index aeacf09..eef506f 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -40,6 +40,7 @@
  * @pending: Priority list node for the dispatcher list of pending contexts
  * @wq: Workqueue structure for contexts to sleep pending room in the queue
  * @waiting: Workqueue structure for contexts waiting for a timestamp or event
+ * @timeout: Workqueue structure for contexts waiting to invalidate
  * @queued: Number of commands queued in the drawqueue
  * @fault_policy: GFT fault policy set in _skip_cmd();
  * @debug_root: debugfs entry for this context.
@@ -68,6 +69,7 @@
 	struct plist_node pending;
 	wait_queue_head_t wq;
 	wait_queue_head_t waiting;
+	wait_queue_head_t timeout;
 
 	int queued;
 	unsigned int fault_policy;