drm/i915: Record batch buffer following GPU error

In order to improve our diagnostic capabilities following a GPU hang
and subsequent reset, we need to record the batch buffer that triggered
the error. We assume that the current batch buffer, plus a few details
about what else is on the active list, will be sufficient -- at the very
least an improvement over nothing.

The extra information is stored in /debug/dri/.../i915_error_state
following an error, and may be decoded using
intel_gpu_tools/tools/intel_error_decode.

v2: Avoid excessive work under spinlocks.
v3: Include ringbuffer for later analysis.
v4: Use kunmap correctly and record more buffer state.
v5: Search ringbuffer for current batch buffer
v6: Use a work fn for the impossible IRQ error case.
v7: Avoid non-atomic paths whilst in IRQ context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 56095b3..5eed463 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -350,6 +350,36 @@
 	return 0;
 }
 
+static const char *pin_flag(int pinned)
+{
+	if (pinned > 0)
+		return " P";
+	else if (pinned < 0)
+		return " p";
+	else
+		return "";
+}
+
+static const char *tiling_flag(int tiling)
+{
+	switch (tiling) {
+	default:
+	case I915_TILING_NONE: return "";
+	case I915_TILING_X: return " X";
+	case I915_TILING_Y: return " Y";
+	}
+}
+
+static const char *dirty_flag(int dirty)
+{
+	return dirty ? " dirty" : "";
+}
+
+static const char *purgeable_flag(int purgeable)
+{
+	return purgeable ? " purgeable" : "";
+}
+
 static int i915_error_state(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -357,6 +387,7 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_error_state *error;
 	unsigned long flags;
+	int i, page, offset, elt;
 
 	spin_lock_irqsave(&dev_priv->error_lock, flags);
 	if (!dev_priv->first_error) {
@@ -368,6 +399,7 @@
 
 	seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
 		   error->time.tv_usec);
+	seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
 	seq_printf(m, "EIR: 0x%08x\n", error->eir);
 	seq_printf(m, "  PGTBL_ER: 0x%08x\n", error->pgtbl_er);
 	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
@@ -379,6 +411,59 @@
 		seq_printf(m, "  INSTPS: 0x%08x\n", error->instps);
 		seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
 	}
+	seq_printf(m, "seqno: 0x%08x\n", error->seqno);
+
+	if (error->active_bo_count) {
+		seq_printf(m, "Buffers [%d]:\n", error->active_bo_count);
+
+		for (i = 0; i < error->active_bo_count; i++) {
+			seq_printf(m, "  %08x %8zd %08x %08x %08x%s%s%s%s",
+				   error->active_bo[i].gtt_offset,
+				   error->active_bo[i].size,
+				   error->active_bo[i].read_domains,
+				   error->active_bo[i].write_domain,
+				   error->active_bo[i].seqno,
+				   pin_flag(error->active_bo[i].pinned),
+				   tiling_flag(error->active_bo[i].tiling),
+				   dirty_flag(error->active_bo[i].dirty),
+				   purgeable_flag(error->active_bo[i].purgeable));
+
+			if (error->active_bo[i].name)
+				seq_printf(m, " (name: %d)", error->active_bo[i].name);
+			if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE)
+				seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg);
+
+			seq_printf(m, "\n");
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(error->batchbuffer); i++) {
+		if (error->batchbuffer[i]) {
+			struct drm_i915_error_object *obj = error->batchbuffer[i];
+
+			seq_printf(m, "--- gtt_offset = 0x%08x\n", obj->gtt_offset);
+			offset = 0;
+			for (page = 0; page < obj->page_count; page++) {
+				for (elt = 0; elt < PAGE_SIZE/4; elt++) {
+					seq_printf(m, "%08x :  %08x\n", offset, obj->pages[page][elt]);
+					offset += 4;
+				}
+			}
+		}
+	}
+
+	if (error->ringbuffer) {
+		struct drm_i915_error_object *obj = error->ringbuffer;
+
+		seq_printf(m, "--- ringbuffer = 0x%08x\n", obj->gtt_offset);
+		offset = 0;
+		for (page = 0; page < obj->page_count; page++) {
+			for (elt = 0; elt < PAGE_SIZE/4; elt++) {
+				seq_printf(m, "%08x :  %08x\n", offset, obj->pages[page][elt]);
+				offset += 4;
+			}
+		}
+	}
 
 out:
 	spin_unlock_irqrestore(&dev_priv->error_lock, flags);