msm: kgsl: Add exceptions to snapshot based on GX and SPTP status

It is possible that during a GPU/GMU hang either the entire GX
is powered off or just the SPTPRAC headswitch is turned off
by the GMU firmware. Therefore, while doing snapshot check
if we have the blocks powered on before trying to dump them.

In addition, mask all GMU interrupts prior to dumping
snapshot in order to reduce instability.

CRs-Fixed: 2062271
Change-Id: I7bf7d27bc6ebdc642e5675a4c9645957051273d5
Signed-off-by: Shrenuj Bansal <shrenujb@codeaurora.org>
Signed-off-by: George Shen <sqiao@codeaurora.org>
diff --git a/drivers/gpu/msm/a6xx_reg.h b/drivers/gpu/msm/a6xx_reg.h
index f4552b6..32ebe0c 100644
--- a/drivers/gpu/msm/a6xx_reg.h
+++ b/drivers/gpu/msm/a6xx_reg.h
@@ -875,6 +875,7 @@
 #define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS	0x23B0C
 #define A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2	0x23B0D
 #define A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK	0x23B0E
+#define A6XX_GMU_AO_AHB_FENCE_CTRL		0x23B10
 #define A6XX_GMU_AHB_FENCE_STATUS		0x23B13
 #define A6XX_GMU_RBBM_INT_UNMASKED_STATUS	0x23B15
 #define A6XX_GMU_AO_SPARE_CNTL			0x23B16
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index da8951b..4900b3a 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -876,6 +876,8 @@
 				unsigned int fsynr1);
 	int (*reset)(struct kgsl_device *, int fault);
 	int (*soft_reset)(struct adreno_device *);
+	bool (*gx_is_on)(struct adreno_device *);
+	bool (*sptprac_is_on)(struct adreno_device *);
 };
 
 /**
diff --git a/drivers/gpu/msm/adreno_a6xx.c b/drivers/gpu/msm/adreno_a6xx.c
index 30ada8f..a513cf8 100644
--- a/drivers/gpu/msm/adreno_a6xx.c
+++ b/drivers/gpu/msm/adreno_a6xx.c
@@ -1188,6 +1188,56 @@
 	return regulator_disable(gmu->gx_gdsc);
 }
 
+#define SPTPRAC_POWER_OFF	BIT(2)
+#define SP_CLK_OFF		BIT(4)
+#define GX_GDSC_POWER_OFF	BIT(6)
+#define GX_CLK_OFF		BIT(7)
+
+/*
+ * a6xx_gx_is_on() - Check if GX is on using pwr status register
+ * @adreno_dev - Pointer to adreno_device
+ * This check should only be performed if the keepalive bit is set or it
+ * can be guaranteed that the power state of the GPU will remain unchanged
+ */
+static bool a6xx_gx_is_on(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int val;
+	bool state;
+
+	if (!kgsl_gmu_isenabled(device))
+		return true;
+
+	kgsl_gmu_regread(device, A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &val);
+	state = !(val & (GX_GDSC_POWER_OFF | GX_CLK_OFF));
+
+	/* If GMU is holding on to the fence then we cannot dump any GX stuff */
+	kgsl_gmu_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &val);
+	if (val)
+		return false;
+
+	return state;
+
+}
+
+/*
+ * a6xx_sptprac_is_on() - Check if SPTP is on using pwr status register
+ * @adreno_dev - Pointer to adreno_device
+ * This check should only be performed if the keepalive bit is set or it
+ * can be guaranteed that the power state of the GPU will remain unchanged
+ */
+static bool a6xx_sptprac_is_on(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int val;
+
+	if (!kgsl_gmu_isenabled(device))
+		return true;
+
+	kgsl_gmu_regread(device, A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &val);
+	return !(val & (SPTPRAC_POWER_OFF | SP_CLK_OFF));
+}
+
 /*
  * a6xx_hm_sptprac_enable() - Turn on HM and SPTPRAC
  * @device: Pointer to KGSL device
@@ -2778,4 +2828,6 @@
 	.preemption_set_marker = a6xx_preemption_set_marker,
 	.preemption_context_init = a6xx_preemption_context_init,
 	.preemption_context_destroy = a6xx_preemption_context_destroy,
+	.gx_is_on = a6xx_gx_is_on,
+	.sptprac_is_on = a6xx_sptprac_is_on,
 };
diff --git a/drivers/gpu/msm/adreno_a6xx_snapshot.c b/drivers/gpu/msm/adreno_a6xx_snapshot.c
index 2161083..ed0129f 100644
--- a/drivers/gpu/msm/adreno_a6xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a6xx_snapshot.c
@@ -1408,6 +1408,18 @@
 	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
 	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
 	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
+	bool sptprac_on;
+
+	/* GMU TCM data dumped through AHB */
+	a6xx_snapshot_gmu(device, snapshot);
+
+	sptprac_on = gpudev->sptprac_is_on(adreno_dev);
+
+	/* Return if the GX is off */
+	if (!gpudev->gx_is_on(adreno_dev)) {
+		pr_err("GX is off. Only dumping GMU data in snapshot\n");
+		return;
+	}
 
 	/* Dump the registers which get affected by crash dumper trigger */
 	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
@@ -1419,7 +1431,8 @@
 		ARRAY_SIZE(a6xx_vbif_snapshot_registers));
 
 	/* Try to run the crash dumper */
-	_a6xx_do_crashdump(device);
+	if (sptprac_on)
+		_a6xx_do_crashdump(device);
 
 	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
 		snapshot, a6xx_snapshot_registers, NULL);
@@ -1451,19 +1464,19 @@
 	/* Mempool debug data */
 	a6xx_snapshot_mempool(device, snapshot);
 
-	/* Shader memory */
-	a6xx_snapshot_shader(device, snapshot);
+	if (sptprac_on) {
+		/* Shader memory */
+		a6xx_snapshot_shader(device, snapshot);
 
-	/* MVC register section */
-	a6xx_snapshot_mvc_regs(device, snapshot);
+		/* MVC register section */
+		a6xx_snapshot_mvc_regs(device, snapshot);
 
-	/* registers dumped through DBG AHB */
-	a6xx_snapshot_dbgahb_regs(device, snapshot);
+		/* registers dumped through DBG AHB */
+		a6xx_snapshot_dbgahb_regs(device, snapshot);
+	}
 
 	a6xx_snapshot_debugbus(device, snapshot);
 
-	/* GMU TCM data dumped through AHB */
-	a6xx_snapshot_gmu(device, snapshot);
 }
 
 static int _a6xx_crashdump_init_mvc(uint64_t *ptr, uint64_t *offset)
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index e8b1c67..422c434 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -2060,11 +2060,25 @@
 	int ret, i;
 	int fault;
 	int halt;
+	bool gx_on = true;
 
 	fault = atomic_xchg(&dispatcher->fault, 0);
 	if (fault == 0)
 		return 0;
 
+	/* Mask all GMU interrupts */
+	if (kgsl_gmu_isenabled(device)) {
+		adreno_write_gmureg(adreno_dev,
+			ADRENO_REG_GMU_AO_HOST_INTERRUPT_MASK,
+			0xFFFFFFFF);
+		adreno_write_gmureg(adreno_dev,
+			ADRENO_REG_GMU_GMU2HOST_INTR_MASK,
+			0xFFFFFFFF);
+	}
+
+	if (gpudev->gx_is_on)
+		gx_on = gpudev->gx_is_on(adreno_dev);
+
 	/*
 	 * In the very unlikely case that the power is off, do nothing - the
 	 * state will be reset on power up and everybody will be happy
@@ -2084,7 +2098,8 @@
 	 * else return early to give the fault handler a chance to run.
 	 */
 	if (!(fault & ADRENO_IOMMU_PAGE_FAULT) &&
-		(adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev))) {
+		(adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev)) &&
+		gx_on) {
 		unsigned int val;
 
 		mutex_lock(&device->mutex);
@@ -2106,14 +2121,15 @@
 
 	mutex_lock(&device->mutex);
 
-	adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
-		ADRENO_REG_CP_RB_BASE_HI, &base);
+	if (gx_on)
+		adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
+			ADRENO_REG_CP_RB_BASE_HI, &base);
 
 	/*
 	 * Force the CP off for anything but a hard fault to make sure it is
 	 * good and stopped
 	 */
-	if (!(fault & ADRENO_HARD_FAULT)) {
+	if (!(fault & ADRENO_HARD_FAULT) && gx_on) {
 		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
 		if (adreno_is_a5xx(adreno_dev) || adreno_is_a6xx(adreno_dev))
 			reg |= 1 | (1 << 1);
@@ -2149,8 +2165,9 @@
 		trace_adreno_cmdbatch_fault(cmdobj, fault);
 	}
 
-	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
-		ADRENO_REG_CP_IB1_BASE_HI, &base);
+	if (gx_on)
+		adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+			ADRENO_REG_CP_IB1_BASE_HI, &base);
 
 	do_header_and_snapshot(device, hung_rb, cmdobj);
 
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index 92b541d..0840aba 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -840,6 +840,15 @@
 	setup_fault_process(device, snapshot,
 			context ? context->proc_priv : NULL);
 
+	/* Add GPU specific sections - registers mainly, but other stuff too */
+	if (gpudev->snapshot)
+		gpudev->snapshot(adreno_dev, snapshot);
+
+	/* Dumping these buffers is useless if the GX is not on */
+	if (gpudev->gx_is_on)
+		if (!gpudev->gx_is_on(adreno_dev))
+			return;
+
 	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
 			ADRENO_REG_CP_IB1_BASE_HI, &snapshot->ib1base);
 	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &snapshot->ib1size);
@@ -862,10 +871,6 @@
 		adreno_snapshot_ringbuffer(device, snapshot,
 			adreno_dev->next_rb);
 
-	/* Add GPU specific sections - registers mainly, but other stuff too */
-	if (gpudev->snapshot)
-		gpudev->snapshot(adreno_dev, snapshot);
-
 	/* Dump selected global buffers */
 	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
 			snapshot, snapshot_global, &device->memstore);