drm/i915: Record batch buffer following GPU error
In order to improve our diagnostic capabilities following a GPU hang
and subsequent reset, we need to record the batch buffer that triggered
the error. We assume that the current batch buffer, plus a few details
about what else is on the active list, will be sufficient -- at the very
least an improvement over nothing.
The extra information is stored in /debug/dri/.../i915_error_state
following an error, and may be decoded using
intel_gpu_tools/tools/intel_error_decode.
v2: Avoid excessive work under spinlocks.
v3: Include ringbuffer for later analysis.
v4: Use kunmap correctly and record more buffer state.
v5: Search ringbuffer for current batch buffer
v6: Use a work fn for the impossible IRQ error case.
v7: Avoid non-atomic paths whilst in IRQ context.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 40b0da3..ec06d48 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -150,7 +150,27 @@
u32 instps;
u32 instdone1;
u32 seqno;
+ u64 bbaddr;
struct timeval time;
+ struct drm_i915_error_object {
+ int page_count;
+ u32 gtt_offset;
+ u32 *pages[0];
+ } *ringbuffer, *batchbuffer[2];
+ struct drm_i915_error_buffer {
+ size_t size;
+ u32 name;
+ u32 seqno;
+ u32 gtt_offset;
+ u32 read_domains;
+ u32 write_domain;
+ u32 fence_reg;
+ s32 pinned:2;
+ u32 tiling:2;
+ u32 dirty:1;
+ u32 purgeable:1;
+ } *active_bo;
+ u32 active_bo_count;
};
struct drm_i915_display_funcs {
@@ -778,6 +798,7 @@
/* i915_irq.c */
void i915_hangcheck_elapsed(unsigned long data);
+void i915_destroy_error_state(struct drm_device *dev);
extern int i915_irq_emit(struct drm_device *dev, void *data,
struct drm_file *file_priv);
extern int i915_irq_wait(struct drm_device *dev, void *data,