drm/i915: Record batch buffer following GPU error

In order to improve our diagnostic capabilities following a GPU hang
and subsequent reset, we need to record the batch buffer that triggered
the error. We assume that the current batch buffer, plus a few details
about what else is on the active list, will be sufficient -- at the very
least an improvement over nothing.

The extra information is stored in /debug/dri/.../i915_error_state
following an error, and may be decoded using
intel_gpu_tools/tools/intel_error_decode.

v2: Avoid excessive work under spinlocks.
v3: Include ringbuffer for later analysis.
v4: Use kunmap correctly and record more buffer state.
v5: Search ringbuffer for current batch buffer
v6: Use a work fn for the impossible IRQ error case.
v7: Avoid non-atomic paths whilst in IRQ context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 40b0da3..ec06d486 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -150,7 +150,27 @@
 	u32 instps;
 	u32 instdone1;
 	u32 seqno;
+	u64 bbaddr;
 	struct timeval time;
+	struct drm_i915_error_object {
+		int page_count;
+		u32 gtt_offset;
+		u32 *pages[0];
+	} *ringbuffer, *batchbuffer[2];
+	struct drm_i915_error_buffer {
+		size_t size;
+		u32 name;
+		u32 seqno;
+		u32 gtt_offset;
+		u32 read_domains;
+		u32 write_domain;
+		u32 fence_reg;
+		s32 pinned:2;
+		u32 tiling:2;
+		u32 dirty:1;
+		u32 purgeable:1;
+	} *active_bo;
+	u32 active_bo_count;
 };
 
 struct drm_i915_display_funcs {
@@ -778,6 +798,7 @@
 
 /* i915_irq.c */
 void i915_hangcheck_elapsed(unsigned long data);
+void i915_destroy_error_state(struct drm_device *dev);
 extern int i915_irq_emit(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 extern int i915_irq_wait(struct drm_device *dev, void *data,