drm/i915: More accurately track last fence usage by the GPU

Based on a patch by Daniel Vetter.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index addb939..5faae47 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -110,7 +110,7 @@
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
-	seq_printf(m, "%p: %s%s %8zd %08x %08x %d%s%s",
+	seq_printf(m, "%p: %s%s %8zd %04x %04x %d %d%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
@@ -118,6 +118,7 @@
 		   obj->base.read_domains,
 		   obj->base.write_domain,
 		   obj->last_rendering_seqno,
+		   obj->last_fenced_seqno,
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 42d3e90..ee7df1d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -124,9 +124,8 @@
 #define I915_FENCE_REG_NONE -1
 
 struct drm_i915_fence_reg {
-	struct drm_i915_gem_object *obj;
 	struct list_head lru_list;
-	bool gpu;
+	struct drm_i915_gem_object *obj;
 };
 
 struct sdvo_device_mapping {
@@ -787,6 +786,12 @@
 	unsigned int fault_mappable : 1;
 	unsigned int pin_mappable : 1;
 
+	/*
+	 * Is the GPU currently using a fence to access this buffer,
+	 */
+	unsigned int pending_fenced_gpu_access:1;
+	unsigned int fenced_gpu_access:1;
+
 	struct page **pages;
 
 	/**
@@ -802,11 +807,13 @@
 	 */
 	uint32_t gtt_offset;
 
-	/* Which ring is refering to is this object */
-	struct intel_ring_buffer *ring;
-
 	/** Breadcrumb of last rendering to the buffer. */
 	uint32_t last_rendering_seqno;
+	struct intel_ring_buffer *ring;
+
+	/** Breadcrumb of last fenced GPU access to the buffer. */
+	uint32_t last_fenced_seqno;
+	struct intel_ring_buffer *last_fenced_ring;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	uint32_t stride;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 061426e..2cfdee8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1688,7 +1688,27 @@
 	/* Move from whatever list we were on to the tail of execution. */
 	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
 	list_move_tail(&obj->ring_list, &ring->active_list);
+
 	obj->last_rendering_seqno = seqno;
+	if (obj->fenced_gpu_access) {
+		struct drm_i915_fence_reg *reg;
+
+		BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
+
+		obj->last_fenced_seqno = seqno;
+		obj->last_fenced_ring = ring;
+
+		reg = &dev_priv->fence_regs[obj->fence_reg];
+		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
+	}
+}
+
+static void
+i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
+{
+	list_del_init(&obj->ring_list);
+	obj->last_rendering_seqno = 0;
+	obj->last_fenced_seqno = 0;
 }
 
 static void
@@ -1699,8 +1719,33 @@
 
 	BUG_ON(!obj->active);
 	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
-	list_del_init(&obj->ring_list);
-	obj->last_rendering_seqno = 0;
+
+	i915_gem_object_move_off_active(obj);
+}
+
+static void
+i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (obj->pin_count != 0)
+		list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
+	else
+		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+
+	BUG_ON(!list_empty(&obj->gpu_write_list));
+	BUG_ON(!obj->active);
+	obj->ring = NULL;
+
+	i915_gem_object_move_off_active(obj);
+	obj->fenced_gpu_access = false;
+	obj->last_fenced_ring = NULL;
+
+	obj->active = 0;
+	drm_gem_object_unreference(&obj->base);
+
+	WARN_ON(i915_verify_lists(dev));
 }
 
 /* Immediately discard the backing storage */
@@ -1730,34 +1775,10 @@
 }
 
 static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-
-	if (obj->pin_count != 0)
-		list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
-	else
-		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
-	list_del_init(&obj->ring_list);
-
-	BUG_ON(!list_empty(&obj->gpu_write_list));
-
-	obj->last_rendering_seqno = 0;
-	obj->ring = NULL;
-	if (obj->active) {
-		obj->active = 0;
-		drm_gem_object_unreference(&obj->base);
-	}
-	WARN_ON(i915_verify_lists(dev));
-}
-
-static void
 i915_gem_process_flushing_list(struct drm_device *dev,
 			       uint32_t flush_domains,
 			       struct intel_ring_buffer *ring)
 {
-	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj, *next;
 
 	list_for_each_entry_safe(obj, next,
@@ -1770,14 +1791,6 @@
 			list_del_init(&obj->gpu_write_list);
 			i915_gem_object_move_to_active(obj, ring);
 
-			/* update the fence lru list */
-			if (obj->fence_reg != I915_FENCE_REG_NONE) {
-				struct drm_i915_fence_reg *reg =
-					&dev_priv->fence_regs[obj->fence_reg];
-				list_move_tail(&reg->lru_list,
-						&dev_priv->mm.fence_list);
-			}
-
 			trace_i915_gem_object_change_domain(obj,
 							    obj->base.read_domains,
 							    old_write_domain);
@@ -2615,8 +2628,7 @@
 			      bool interruptible)
 {
 	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_fence_reg *reg;
+	int ret;
 
 	if (obj->fence_reg == I915_FENCE_REG_NONE)
 		return 0;
@@ -2631,19 +2643,23 @@
 	 * therefore we must wait for any outstanding access to complete
 	 * before clearing the fence.
 	 */
-	reg = &dev_priv->fence_regs[obj->fence_reg];
-	if (reg->gpu) {
-		int ret;
-
+	if (obj->fenced_gpu_access) {
 		ret = i915_gem_object_flush_gpu_write_domain(obj, NULL);
 		if (ret)
 			return ret;
 
-		ret = i915_gem_object_wait_rendering(obj, interruptible);
+		obj->fenced_gpu_access = false;
+	}
+
+	if (obj->last_fenced_seqno) {
+		ret = i915_do_wait_request(dev,
+					   obj->last_fenced_seqno,
+					   interruptible,
+					   obj->last_fenced_ring);
 		if (ret)
 			return ret;
 
-		reg->gpu = false;
+		obj->last_fenced_seqno = false;
 	}
 
 	i915_gem_object_flush_gtt_write_domain(obj);
@@ -3166,8 +3182,9 @@
 	 * write domain
 	 */
 	if (obj->base.write_domain &&
-	    (obj->base.write_domain != obj->base.pending_read_domains ||
-	     obj->ring != ring)) {
+	    (((obj->base.write_domain != obj->base.pending_read_domains ||
+	       obj->ring != ring)) ||
+	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
 		flush_domains |= obj->base.write_domain;
 		invalidate_domains |=
 			obj->base.pending_read_domains & ~obj->base.write_domain;
@@ -3528,7 +3545,6 @@
 			    struct drm_i915_gem_exec_object2 *exec_list,
 			    int count)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret, i, retry;
 
 	/* Attempt to pin all of the buffers into the GTT.
@@ -3601,7 +3617,7 @@
 				if (ret)
 					break;
 
-				dev_priv->fence_regs[obj->fence_reg].gpu = true;
+				obj->pending_fenced_gpu_access = true;
 			}
 
 			entry->offset = obj->gtt_offset;
@@ -3981,6 +3997,7 @@
 			goto err;
 		}
 		obj->in_execbuffer = true;
+		obj->pending_fenced_gpu_access = false;
 	}
 
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
@@ -4085,6 +4102,7 @@
 
 		obj->base.read_domains = obj->base.pending_read_domains;
 		obj->base.write_domain = obj->base.pending_write_domain;
+		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
 		i915_gem_object_move_to_active(obj, ring);
 		if (obj->base.write_domain) {