drm/i915: add fence register management to execbuf

Adds code to set up fence registers at execbuf time on pre-965 chips as
necessary.  Also fixes up a few bugs in the pre-965 tile register support
(get_order != ffs).  The number of fences available to the kernel defaults
to the hw limit minus 3 (for legacy X front/back/depth), but a new parameter
allows userspace to override that as needed.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@linux.ie>
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e1f831f..6a9e3a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 					   unsigned alignment);
-static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
+static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_evict_something(struct drm_device *dev);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
@@ -567,6 +567,7 @@
 	pgoff_t page_offset;
 	unsigned long pfn;
 	int ret = 0;
+	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
 	/* We don't use vmf->pgoff since that has the fake offset */
 	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
@@ -586,7 +587,7 @@
 	/* Need a new fence register? */
 	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
 	    obj_priv->tiling_mode != I915_TILING_NONE) {
-		ret = i915_gem_object_get_fence_reg(obj);
+		ret = i915_gem_object_get_fence_reg(obj, write);
 		if (ret != 0)
 			return VM_FAULT_SIGBUS;
 	}
@@ -1214,7 +1215,7 @@
 /**
  * Unbinds an object from the GTT aperture.
  */
-static int
+int
 i915_gem_object_unbind(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
@@ -1448,21 +1449,26 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int regnum = obj_priv->fence_reg;
+	int tile_width;
 	uint32_t val;
 	uint32_t pitch_val;
 
 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
 	    (obj_priv->gtt_offset & (obj->size - 1))) {
-		WARN(1, "%s: object not 1M or size aligned\n", __func__);
+		WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n",
+		     __func__, obj_priv->gtt_offset, obj->size);
 		return;
 	}
 
-	if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) ||
-						       IS_I945GM(dev) ||
-						       IS_G33(dev)))
-		pitch_val = (obj_priv->stride / 128) - 1;
+	if (obj_priv->tiling_mode == I915_TILING_Y &&
+	    HAS_128_BYTE_Y_TILING(dev))
+		tile_width = 128;
 	else
-		pitch_val = (obj_priv->stride / 512) - 1;
+		tile_width = 512;
+
+	/* Note: pitch better be a power of two tile widths */
+	pitch_val = obj_priv->stride / tile_width;
+	pitch_val = ffs(pitch_val) - 1;
 
 	val = obj_priv->gtt_offset;
 	if (obj_priv->tiling_mode == I915_TILING_Y)
@@ -1486,7 +1492,8 @@
 
 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
 	    (obj_priv->gtt_offset & (obj->size - 1))) {
-		WARN(1, "%s: object not 1M or size aligned\n", __func__);
+		WARN(1, "%s: object 0x%08x not 1M or size aligned\n",
+		     __func__, obj_priv->gtt_offset);
 		return;
 	}
 
@@ -1506,6 +1513,7 @@
 /**
  * i915_gem_object_get_fence_reg - set up a fence reg for an object
  * @obj: object to map through a fence reg
+ * @write: object is about to be written
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
@@ -1517,7 +1525,7 @@
  * and tiling format.
  */
 static int
-i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
 {
 	struct drm_device *dev = obj->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1530,12 +1538,18 @@
 		WARN(1, "allocating a fence for non-tiled object?\n");
 		break;
 	case I915_TILING_X:
-		WARN(obj_priv->stride & (512 - 1),
-		     "object is X tiled but has non-512B pitch\n");
+		if (!obj_priv->stride)
+			return -EINVAL;
+		WARN((obj_priv->stride & (512 - 1)),
+		     "object 0x%08x is X tiled but has non-512B pitch\n",
+		     obj_priv->gtt_offset);
 		break;
 	case I915_TILING_Y:
-		WARN(obj_priv->stride & (128 - 1),
-		     "object is Y tiled but has non-128B pitch\n");
+		if (!obj_priv->stride)
+			return -EINVAL;
+		WARN((obj_priv->stride & (128 - 1)),
+		     "object 0x%08x is Y tiled but has non-128B pitch\n",
+		     obj_priv->gtt_offset);
 		break;
 	}
 
@@ -1637,7 +1651,7 @@
 	if (dev_priv->mm.suspended)
 		return -EBUSY;
 	if (alignment == 0)
-		alignment = PAGE_SIZE;
+		alignment = i915_gem_get_gtt_alignment(obj);
 	if (alignment & (PAGE_SIZE - 1)) {
 		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
 		return -EINVAL;
@@ -2658,6 +2672,14 @@
 				DRM_ERROR("Failure to bind: %d", ret);
 			return ret;
 		}
+		/*
+		 * Pre-965 chips need a fence register set up in order to
+		 * properly handle tiled surfaces.
+		 */
+		if (!IS_I965G(dev) &&
+		    obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+		    obj_priv->tiling_mode != I915_TILING_NONE)
+			i915_gem_object_get_fence_reg(obj, true);
 	}
 	obj_priv->pin_count++;
 
@@ -3297,7 +3319,7 @@
 	/* Old X drivers will take 0-2 for front, back, depth buffers */
 	dev_priv->fence_reg_start = 3;
 
-	if (IS_I965G(dev))
+	if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 		dev_priv->num_fence_regs = 16;
 	else
 		dev_priv->num_fence_regs = 8;