Merge "msm: kgsl: Update GPU HARD RESET processing" into msm-4.9
diff --git a/drivers/gpu/msm/a6xx_reg.h b/drivers/gpu/msm/a6xx_reg.h
index 04c8408..e982afe 100644
--- a/drivers/gpu/msm/a6xx_reg.h
+++ b/drivers/gpu/msm/a6xx_reg.h
@@ -783,6 +783,7 @@
 #define A6XX_GMU_GX_SPTPRAC_POWER_CONTROL	0x1A881
 #define A6XX_GMU_CM3_ITCM_START			0x1B400
 #define A6XX_GMU_CM3_DTCM_START			0x1C400
+#define A6XX_GMU_NMI_CONTROL_STATUS		0x1CBF0
 #define A6XX_GMU_BOOT_SLUMBER_OPTION		0x1CBF8
 #define A6XX_GMU_GX_VOTE_IDX			0x1CBF9
 #define A6XX_GMU_MX_VOTE_IDX			0x1CBFA
@@ -794,6 +795,7 @@
 #define A6XX_GMU_CM3_BOOT_CONFIG		0x1F801
 #define A6XX_GMU_CM3_FW_BUSY			0x1F81A
 #define A6XX_GMU_CM3_FW_INIT_RESULT		0x1F81C
+#define A6XX_GMU_CM3_CFG			0x1F82D
 #define A6XX_GMU_PWR_COL_INTER_FRAME_CTRL	0x1F8C0
 #define A6XX_GMU_PWR_COL_INTER_FRAME_HYST	0x1F8C1
 #define A6XX_GMU_PWR_COL_SPTPRAC_HYST		0x1F8C2
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index ec9d863..627b351 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -56,9 +56,6 @@
 #define DRIVER_VERSION_MAJOR   3
 #define DRIVER_VERSION_MINOR   1
 
-/* Number of times to try hard reset */
-#define NUM_TIMES_RESET_RETRY 5
-
 #define KGSL_LOG_LEVEL_DEFAULT 3
 
 static void adreno_input_work(struct work_struct *work);
@@ -514,8 +511,6 @@
 	.id_table = adreno_input_ids,
 };
 
-static int adreno_soft_reset(struct kgsl_device *device);
-
 /*
  * _soft_reset() - Soft reset GPU
  * @adreno_dev: Pointer to adreno device
@@ -1625,7 +1620,7 @@
  * Power up the GPU and initialize it.  If priority is specified then elevate
  * the thread priority for the duration of the start operation
  */
-static int adreno_start(struct kgsl_device *device, int priority)
+int adreno_start(struct kgsl_device *device, int priority)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int nice = task_nice(current);
@@ -2310,7 +2305,7 @@
  * The GPU hardware is reset but we never pull power so we can skip
  * a lot of the standard adreno_stop/adreno_start sequence
  */
-static int adreno_soft_reset(struct kgsl_device *device)
+int adreno_soft_reset(struct kgsl_device *device)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index f35f645..91f03d0 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -159,10 +159,12 @@
 #define KGSL_END_OF_PROFILE_IDENTIFIER	0x2DEFADE2
 #define KGSL_PWRON_FIXUP_IDENTIFIER	0x2AFAFAFA
 
+/* Number of times to try hard reset */
+#define NUM_TIMES_RESET_RETRY 5
+
 /* One cannot wait forever for the core to idle, so set an upper limit to the
  * amount of time to wait for the core to go idle
  */
-
 #define ADRENO_IDLE_TIMEOUT (20 * 1000)
 
 #define ADRENO_UCHE_GMEM_BASE	0x100000
@@ -204,6 +206,7 @@
 #define ADRENO_TIMEOUT_FAULT BIT(2)
 #define ADRENO_IOMMU_PAGE_FAULT BIT(3)
 #define ADRENO_PREEMPT_FAULT BIT(4)
+#define ADRENO_GMU_FAULT BIT(5)
 
 #define ADRENO_SPTP_PC_CTRL 0
 #define ADRENO_PPD_CTRL     1
@@ -866,6 +869,7 @@
 	int (*wait_for_gmu_idle)(struct adreno_device *);
 	const char *(*iommu_fault_block)(struct adreno_device *adreno_dev,
 				unsigned int fsynr1);
+	int (*reset)(struct kgsl_device *, int fault);
 	int (*soft_reset)(struct adreno_device *);
 };
 
@@ -955,6 +959,8 @@
 extern int adreno_wake_nice;
 extern unsigned int adreno_wake_timeout;
 
+int adreno_start(struct kgsl_device *device, int priority);
+int adreno_soft_reset(struct kgsl_device *device);
 long adreno_ioctl(struct kgsl_device_private *dev_priv,
 		unsigned int cmd, unsigned long arg);
 
diff --git a/drivers/gpu/msm/adreno_a6xx.c b/drivers/gpu/msm/adreno_a6xx.c
index 2631ec1..6e025c8 100644
--- a/drivers/gpu/msm/adreno_a6xx.c
+++ b/drivers/gpu/msm/adreno_a6xx.c
@@ -1562,7 +1562,8 @@
 	 * For the soft reset case with GMU enabled this part is done
 	 * by the GMU firmware
 	 */
-	if (kgsl_gmu_isenabled(device))
+	if (kgsl_gmu_isenabled(device) &&
+		!test_bit(ADRENO_DEVICE_HARD_RESET, &adreno_dev->priv))
 		return 0;
 
 
@@ -1750,6 +1751,89 @@
 	return ret;
 }
 
+/**
+ * a6xx_reset() - Helper function to reset the GPU
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @fault: Type of fault. Needed to skip soft reset for MMU fault
+ *
+ * Try to reset the GPU to recover from a fault.  First, try to do a low latency
+ * soft reset.  If the soft reset fails for some reason, then bring out the big
+ * guns and toggle the footswitch.
+ */
+static int a6xx_reset(struct kgsl_device *device, int fault)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = -EINVAL;
+	int i = 0;
+
+	/* Use the regular reset sequence for No GMU */
+	if (!kgsl_gmu_isenabled(device))
+		return adreno_reset(device, fault);
+
+	/* Transition from ACTIVE to RESET state */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_RESET);
+
+	/* Try soft reset first */
+	if (!(fault & ADRENO_IOMMU_PAGE_FAULT)) {
+		int acked;
+
+		/* NMI */
+		kgsl_gmu_regwrite(device, A6XX_GMU_NMI_CONTROL_STATUS, 0);
+		kgsl_gmu_regwrite(device, A6XX_GMU_CM3_CFG, (1 << 9));
+
+		for (i = 0; i < 10; i++) {
+			kgsl_gmu_regread(device,
+					A6XX_GMU_NMI_CONTROL_STATUS, &acked);
+
+			/* NMI FW ACK recevied */
+			if (acked == 0x1)
+				break;
+
+			udelay(100);
+		}
+
+		if (acked)
+			ret = adreno_soft_reset(device);
+		if (ret)
+			KGSL_DEV_ERR_ONCE(device, "Device soft reset failed\n");
+	}
+	if (ret) {
+		/* If soft reset failed/skipped, then pull the power */
+		set_bit(ADRENO_DEVICE_HARD_RESET, &adreno_dev->priv);
+		/* since device is officially off now clear start bit */
+		clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+		/* Keep trying to start the device until it works */
+		for (i = 0; i < NUM_TIMES_RESET_RETRY; i++) {
+			ret = adreno_start(device, 0);
+			if (!ret)
+				break;
+
+			msleep(20);
+		}
+	}
+
+	clear_bit(ADRENO_DEVICE_HARD_RESET, &adreno_dev->priv);
+
+	if (ret)
+		return ret;
+
+	if (i != 0)
+		KGSL_DRV_WARN(device, "Device hard reset tried %d tries\n", i);
+
+	/*
+	 * If active_cnt is non-zero then the system was active before
+	 * going into a reset - put it back in that state
+	 */
+
+	if (atomic_read(&device->active_cnt))
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+	else
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP);
+
+	return ret;
+}
+
 static void a6xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit)
 {
 	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
@@ -2500,5 +2584,6 @@
 	.hw_isidle = a6xx_hw_isidle, /* Replaced by NULL if GMU is disabled */
 	.wait_for_gmu_idle = a6xx_wait_for_gmu_idle,
 	.iommu_fault_block = a6xx_iommu_fault_block,
+	.reset = a6xx_reset,
 	.soft_reset = a6xx_soft_reset,
 };
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index b831d0d..d01a5e9 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -2183,7 +2183,11 @@
 		kgsl_process_event_group(device, &hung_rb->events);
 	}
 
-	ret = adreno_reset(device, fault);
+	if (gpudev->reset)
+		ret = gpudev->reset(device, fault);
+	else
+		ret = adreno_reset(device, fault);
+
 	mutex_unlock(&device->mutex);
 	/* if any other fault got in until reset then ignore */
 	atomic_set(&dispatcher->fault, 0);
diff --git a/drivers/gpu/msm/kgsl_gmu.c b/drivers/gpu/msm/kgsl_gmu.c
index 3d9e1be..7354e82 100644
--- a/drivers/gpu/msm/kgsl_gmu.c
+++ b/drivers/gpu/msm/kgsl_gmu.c
@@ -748,6 +748,7 @@
 {
 	struct gmu_device *gmu = data;
 	struct kgsl_device *device = container_of(gmu, struct kgsl_device, gmu);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	unsigned int status = 0;
 
 	adreno_read_gmureg(ADRENO_DEVICE(device),
@@ -756,9 +757,12 @@
 			ADRENO_REG_GMU_AO_HOST_INTERRUPT_CLR, status);
 
 	/* Ignore GMU_INT_RSCC_COMP and GMU_INT_DBD WAKEUP interrupts */
-	if (status & GMU_INT_WDOG_BITE)
+	if (status & GMU_INT_WDOG_BITE) {
 		dev_err_ratelimited(&gmu->pdev->dev,
 				"GMU watchdog expired interrupt received\n");
+		adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT);
+		adreno_dispatcher_schedule(device);
+	}
 	if (status & GMU_INT_HOST_AHB_BUS_ERR)
 		dev_err_ratelimited(&gmu->pdev->dev,
 				"AHB bus error interrupt received\n");
@@ -775,6 +779,7 @@
 	struct kgsl_hfi *hfi = data;
 	struct gmu_device *gmu = container_of(hfi, struct gmu_device, hfi);
 	struct kgsl_device *device = container_of(gmu, struct kgsl_device, gmu);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	unsigned int status = 0;
 
 	adreno_read_gmureg(ADRENO_DEVICE(device),
@@ -784,9 +789,12 @@
 
 	if (status & HFI_IRQ_MSGQ_MASK)
 		tasklet_hi_schedule(&hfi->tasklet);
-	if (status & HFI_IRQ_CM3_FAULT_MASK)
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
 		dev_err_ratelimited(&gmu->pdev->dev,
 				"GMU CM3 fault interrupt received\n");
+		adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT);
+		adreno_dispatcher_schedule(device);
+	}
 	if (status & ~HFI_IRQ_MASK)
 		dev_err_ratelimited(&gmu->pdev->dev,
 				"Unhandled HFI interrupts 0x%lx\n",