drm/i915: Fix CPU-spinning hangs related to fence usage by using an LRU.

The lack of a proper LRU was partially worked around by taking the fence
from the object containing the oldest seqno.  But if there are multiple
objects inactive, then they don't have seqnos and the first fence reg
among them would be chosen.  If you were trying to copy data between two
mappings, this could result in each page fault stealing the fence from
the other argument, and your application hanging.

https://bugs.freedesktop.org/show_bug.cgi?id=23566
https://bugs.freedesktop.org/show_bug.cgi?id=23220
https://bugs.freedesktop.org/show_bug.cgi?id=23253
https://bugs.freedesktop.org/show_bug.cgi?id=23366

Cc: Stable Team <stable@kernel.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 140bee1..0c07a75 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -978,6 +978,7 @@
 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_set_domain *args = data;
 	struct drm_gem_object *obj;
 	uint32_t read_domains = args->read_domains;
@@ -1010,8 +1011,18 @@
 		 obj, obj->size, read_domains, write_domain);
 #endif
 	if (read_domains & I915_GEM_DOMAIN_GTT) {
+		struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 
+		/* Update the LRU on the fence for the CPU access that's
+		 * about to occur.
+		 */
+		if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+			list_move_tail(&obj_priv->fence_list,
+				       &dev_priv->mm.fence_list);
+		}
+
 		/* Silently promote "you're not bound, there was nothing to do"
 		 * to success, since the client was just asking us to
 		 * make sure everything was done.
@@ -1155,8 +1166,7 @@
 	}
 
 	/* Need a new fence register? */
-	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
-	    obj_priv->tiling_mode != I915_TILING_NONE) {
+	if (obj_priv->tiling_mode != I915_TILING_NONE) {
 		ret = i915_gem_object_get_fence_reg(obj);
 		if (ret) {
 			mutex_unlock(&dev->struct_mutex);
@@ -2208,6 +2218,12 @@
 	struct drm_i915_gem_object *old_obj_priv = NULL;
 	int i, ret, avail;
 
+	/* Just update our place in the LRU if our fence is getting used. */
+	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+		list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
+		return 0;
+	}
+
 	switch (obj_priv->tiling_mode) {
 	case I915_TILING_NONE:
 		WARN(1, "allocating a fence for non-tiled object?\n");
@@ -2229,7 +2245,6 @@
 	}
 
 	/* First try to find a free reg */
-try_again:
 	avail = 0;
 	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
 		reg = &dev_priv->fence_regs[i];
@@ -2243,52 +2258,41 @@
 
 	/* None available, try to steal one or wait for a user to finish */
 	if (i == dev_priv->num_fence_regs) {
-		uint32_t seqno = dev_priv->mm.next_gem_seqno;
+		struct drm_gem_object *old_obj = NULL;
 
 		if (avail == 0)
 			return -ENOSPC;
 
-		for (i = dev_priv->fence_reg_start;
-		     i < dev_priv->num_fence_regs; i++) {
-			uint32_t this_seqno;
+		list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
+				    fence_list) {
+			old_obj = old_obj_priv->obj;
 
-			reg = &dev_priv->fence_regs[i];
-			old_obj_priv = reg->obj->driver_private;
+			reg = &dev_priv->fence_regs[old_obj_priv->fence_reg];
 
 			if (old_obj_priv->pin_count)
 				continue;
 
+			/* Take a reference, as otherwise the wait_rendering
+			 * below may cause the object to get freed out from
+			 * under us.
+			 */
+			drm_gem_object_reference(old_obj);
+
 			/* i915 uses fences for GPU access to tiled buffers */
 			if (IS_I965G(dev) || !old_obj_priv->active)
 				break;
 
-			/* find the seqno of the first available fence */
-			this_seqno = old_obj_priv->last_rendering_seqno;
-			if (this_seqno != 0 &&
-			    reg->obj->write_domain == 0 &&
-			    i915_seqno_passed(seqno, this_seqno))
-				seqno = this_seqno;
-		}
-
-		/*
-		 * Now things get ugly... we have to wait for one of the
-		 * objects to finish before trying again.
-		 */
-		if (i == dev_priv->num_fence_regs) {
-			if (seqno == dev_priv->mm.next_gem_seqno) {
-				i915_gem_flush(dev,
-					       I915_GEM_GPU_DOMAINS,
-					       I915_GEM_GPU_DOMAINS);
-				seqno = i915_add_request(dev, NULL,
-							 I915_GEM_GPU_DOMAINS);
-				if (seqno == 0)
-					return -ENOMEM;
-			}
-
-			ret = i915_wait_request(dev, seqno);
-			if (ret)
+			/* This brings the object to the head of the LRU if it
+			 * had been written to.  The only way this should
+			 * result in us waiting longer than the expected
+			 * optimal amount of time is if there was a
+			 * fence-using buffer later that was read-only.
+			 */
+			i915_gem_object_flush_gpu_write_domain(old_obj);
+			ret = i915_gem_object_wait_rendering(old_obj);
+			if (ret != 0)
 				return ret;
-			goto try_again;
+			break;
 		}
 
 		/*
@@ -2296,10 +2300,15 @@
 		 * for this object next time we need it.
 		 */
 		i915_gem_release_mmap(reg->obj);
+		i = old_obj_priv->fence_reg;
 		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
+		list_del_init(&old_obj_priv->fence_list);
+		drm_gem_object_unreference(old_obj);
 	}
 
 	obj_priv->fence_reg = i;
+	list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
+
 	reg->obj = obj;
 
 	if (IS_I965G(dev))
@@ -2342,6 +2351,7 @@
 
 	dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
+	list_del_init(&obj_priv->fence_list);
 }
 
 /**
@@ -3595,9 +3605,7 @@
 	 * Pre-965 chips need a fence register set up in order to
 	 * properly handle tiled surfaces.
 	 */
-	if (!IS_I965G(dev) &&
-	    obj_priv->fence_reg == I915_FENCE_REG_NONE &&
-	    obj_priv->tiling_mode != I915_TILING_NONE) {
+	if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
 		ret = i915_gem_object_get_fence_reg(obj);
 		if (ret != 0) {
 			if (ret != -EBUSY && ret != -ERESTARTSYS)
@@ -3806,6 +3814,7 @@
 	obj_priv->obj = obj;
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
+	INIT_LIST_HEAD(&obj_priv->fence_list);
 
 	return 0;
 }
@@ -4253,6 +4262,7 @@
 	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.request_list);
+	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
 			  i915_gem_retire_work_handler);
 	dev_priv->mm.next_gem_seqno = 1;