msm: kgsl: Only snapshot GMU on GMU boot failure

GMU boot failure is a special case so treat it as one.
Since most of adreno is not yet up and running there is no
point in trying to snapshot it all.
Dump as much GMU information as is possible and then return.

Change-Id: I8ad9b8406a7518d2c7fe76292843b4cda8e89ecd
Signed-off-by: Carter Cooper <ccooper@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 780cefe..506f6ad 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -3091,6 +3091,7 @@
 	.irq_handler = adreno_irq_handler,
 	.drain = adreno_drain,
 	/* Optional functions */
+	.snapshot_gmu = adreno_snapshot_gmu,
 	.drawctxt_create = adreno_drawctxt_create,
 	.drawctxt_detach = adreno_drawctxt_detach,
 	.drawctxt_destroy = adreno_drawctxt_destroy,
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 49dc5ed..d7d2cf9 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -832,6 +832,7 @@
 	/* GPU specific function hooks */
 	void (*irq_trace)(struct adreno_device *, unsigned int status);
 	void (*snapshot)(struct adreno_device *, struct kgsl_snapshot *);
+	void (*snapshot_gmu)(struct adreno_device *, struct kgsl_snapshot *);
 	void (*platform_setup)(struct adreno_device *);
 	void (*init)(struct adreno_device *);
 	void (*remove)(struct adreno_device *);
@@ -1006,6 +1007,9 @@
 		struct kgsl_snapshot *snapshot,
 		struct kgsl_context *context);
 
+void adreno_snapshot_gmu(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot);
+
 int adreno_reset(struct kgsl_device *device, int fault);
 
 void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev,
diff --git a/drivers/gpu/msm/adreno_a6xx.c b/drivers/gpu/msm/adreno_a6xx.c
index b9df4ec..88c15eb 100644
--- a/drivers/gpu/msm/adreno_a6xx.c
+++ b/drivers/gpu/msm/adreno_a6xx.c
@@ -2915,6 +2915,7 @@
 	.reg_offsets = &a6xx_reg_offsets,
 	.start = a6xx_start,
 	.snapshot = a6xx_snapshot,
+	.snapshot_gmu = a6xx_snapshot_gmu,
 	.irq = &a6xx_irq,
 	.snapshot_data = &a6xx_snapshot_data,
 	.irq_trace = trace_kgsl_a5xx_irq_status,
diff --git a/drivers/gpu/msm/adreno_a6xx.h b/drivers/gpu/msm/adreno_a6xx.h
index ee2fd71..dd8af80 100644
--- a/drivers/gpu/msm/adreno_a6xx.h
+++ b/drivers/gpu/msm/adreno_a6xx.h
@@ -110,6 +110,8 @@
 
 void a6xx_snapshot(struct adreno_device *adreno_dev,
 		struct kgsl_snapshot *snapshot);
+void a6xx_snapshot_gmu(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
 
 void a6xx_crashdump_init(struct adreno_device *adreno_dev);
 #endif
diff --git a/drivers/gpu/msm/adreno_a6xx_snapshot.c b/drivers/gpu/msm/adreno_a6xx_snapshot.c
index 80c9a61..f63e824d 100644
--- a/drivers/gpu/msm/adreno_a6xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a6xx_snapshot.c
@@ -1471,10 +1471,18 @@
 	}
 }
 
-static void a6xx_snapshot_gmu(struct kgsl_device *device,
+/*
+ * a6xx_snapshot_gmu() - A6XX GMU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A6XX GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a6xx_snapshot_gmu(struct adreno_device *adreno_dev,
 		struct kgsl_snapshot *snapshot)
 {
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
 	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
 
 	if (!kgsl_gmu_isenabled(device))
@@ -1571,16 +1579,11 @@
 	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
 	bool sptprac_on;
 
-	/* GMU TCM data dumped through AHB */
-	a6xx_snapshot_gmu(device, snapshot);
-
 	sptprac_on = gpudev->sptprac_is_on(adreno_dev);
 
 	/* Return if the GX is off */
-	if (!gpudev->gx_is_on(adreno_dev)) {
-		pr_err("GX is off. Only dumping GMU data in snapshot\n");
+	if (!gpudev->gx_is_on(adreno_dev))
 		return;
-	}
 
 	/* Dump the registers which get affected by crash dumper trigger */
 	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index f608927..b5999e6 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -837,8 +837,7 @@
 
 	snapshot_frozen_objsize = 0;
 
-	if (!IS_ERR(context))
-		setup_fault_process(device, snapshot,
+	setup_fault_process(device, snapshot,
 			context ? context->proc_priv : NULL);
 
 	/* Add GPU specific sections - registers mainly, but other stuff too */
@@ -946,6 +945,24 @@
 
 }
 
+/* adreno_snapshot_gmu - Snapshot the Adreno GMU state
+ * @device - KGSL device to snapshot
+ * @snapshot - Pointer to the snapshot instance
+ * This is a hook function called by kgsl_snapshot to snapshot the
+ * Adreno specific information for the GMU snapshot.  In turn, this function
+ * calls the GMU specific snapshot function to get core specific information.
+ */
+void adreno_snapshot_gmu(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	/* Add GMU specific sections */
+	if (gpudev->snapshot_gmu)
+		gpudev->snapshot_gmu(adreno_dev, snapshot);
+}
+
 /*
  * adreno_snapshot_cp_roq - Dump CP merciu data in snapshot
  * @device: Device being snapshotted
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index f8d5f5b..0ab775a 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -151,6 +151,8 @@
 	unsigned int (*gpuid)(struct kgsl_device *device, unsigned int *chipid);
 	void (*snapshot)(struct kgsl_device *device,
 		struct kgsl_snapshot *snapshot, struct kgsl_context *context);
+	void (*snapshot_gmu)(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot);
 	irqreturn_t (*irq_handler)(struct kgsl_device *device);
 	int (*drain)(struct kgsl_device *device);
 	/*
@@ -706,7 +708,6 @@
 void kgsl_device_snapshot(struct kgsl_device *device,
 			struct kgsl_context *context, bool gmu_fault);
 void kgsl_device_snapshot_close(struct kgsl_device *device);
-void kgsl_snapshot_save_frozen_objs(struct work_struct *work);
 
 void kgsl_events_init(void);
 void kgsl_events_exit(void);
diff --git a/drivers/gpu/msm/kgsl_gmu.c b/drivers/gpu/msm/kgsl_gmu.c
index 3bd7cf3..7ea2255 100644
--- a/drivers/gpu/msm/kgsl_gmu.c
+++ b/drivers/gpu/msm/kgsl_gmu.c
@@ -1366,7 +1366,7 @@
 		/* Wait for the NMI to be handled */
 		wmb();
 		udelay(100);
-		kgsl_device_snapshot(device, ERR_PTR(-EINVAL), true);
+		kgsl_device_snapshot(device, NULL, true);
 
 		adreno_write_gmureg(adreno_dev,
 				ADRENO_REG_GMU_GMU2HOST_INTR_CLR, 0xFFFFFFFF);
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index f9494a4..f710d8f 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -24,6 +24,8 @@
 #include "kgsl_snapshot.h"
 #include "adreno_cp_parser.h"
 
+static void kgsl_snapshot_save_frozen_objs(struct work_struct *work);
+
 /* Placeholder for list of ib objects that contain all objects in that IB */
 
 struct kgsl_snapshot_cp_obj {
@@ -182,8 +184,7 @@
 	context = kgsl_context_get(device, header->current_context);
 
 	/* Get the current PT base */
-	if (!IS_ERR(priv))
-		header->ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu);
+	header->ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu);
 
 	/* And the PID for the task leader */
 	if (context) {
@@ -207,6 +208,44 @@
 	return size;
 }
 
+/* Snapshot the Linux specific information */
+static size_t snapshot_os_no_ctxt(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_linux_v2 *header =
+		(struct kgsl_snapshot_linux_v2 *)buf;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	size_t size = sizeof(*header);
+
+	/* Make sure there is enough room for the data */
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "OS");
+		return 0;
+	}
+
+	memset(header, 0, sizeof(*header));
+
+	header->osid = KGSL_SNAPSHOT_OS_LINUX_V3;
+
+	/* Get the kernel build information */
+	strlcpy(header->release, init_utsname()->release,
+			sizeof(header->release));
+	strlcpy(header->version, init_utsname()->version,
+			sizeof(header->version));
+
+	/* Get the Unix time for the timestamp */
+	header->seconds = get_seconds();
+
+	/* Remember the power information */
+	header->power_flags = pwr->power_flags;
+	header->power_level = pwr->active_pwrlevel;
+	header->power_interval_timeout = pwr->interval_timeout;
+	header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]);
+
+	/* Return the size of the data segment */
+	return size;
+}
+
 static void kgsl_snapshot_put_object(struct kgsl_snapshot_object *obj)
 {
 	list_del(&obj->node);
@@ -663,14 +702,24 @@
 	snapshot->size += sizeof(*header);
 
 	/* Build the Linux specific header */
-	/* Context err is implied a GMU fault, so limit dump */
-	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS,
-			snapshot, snapshot_os,
-			IS_ERR(context) ? context : NULL);
+	/* We either want to only dump GMU, or we want to dump GPU and GMU */
+	if (gmu_fault) {
+		/* Dump only the GMU */
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS,
+				snapshot, snapshot_os_no_ctxt, NULL);
 
-	/* Get the device specific sections */
-	if (device->ftbl->snapshot)
-		device->ftbl->snapshot(device, snapshot, context);
+		if (device->ftbl->snapshot_gmu)
+			device->ftbl->snapshot_gmu(device, snapshot);
+	} else {
+		/* Dump GPU and GMU */
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS,
+				snapshot, snapshot_os, NULL);
+
+		if (device->ftbl->snapshot)
+			device->ftbl->snapshot(device, snapshot, context);
+		if (device->ftbl->snapshot_gmu)
+			device->ftbl->snapshot_gmu(device, snapshot);
+	}
 
 	/*
 	 * The timestamp is the seconds since boot so it is easier to match to
@@ -1193,7 +1242,7 @@
  * is taken
  * @work: The work item that scheduled this work
  */
-void kgsl_snapshot_save_frozen_objs(struct work_struct *work)
+static void kgsl_snapshot_save_frozen_objs(struct work_struct *work)
 {
 	struct kgsl_snapshot *snapshot = container_of(work,
 				struct kgsl_snapshot, work);
@@ -1205,6 +1254,9 @@
 	if (IS_ERR_OR_NULL(device))
 		return;
 
+	if (snapshot->gmu_fault)
+		goto gmu_only;
+
 	kgsl_snapshot_process_ib_obj_list(snapshot);
 
 	list_for_each_entry(obj, &snapshot->obj_list, node) {
@@ -1251,6 +1303,7 @@
 			       "snapshot: Active IB2:%016llx not dumped\n",
 				snapshot->ib2base);
 
+gmu_only:
 	complete_all(&snapshot->dump_gate);
 	BUG_ON(device->force_panic);
 }