drm/i915: Only enforce fence limits inside the GTT.

So long as we adhere to the fence registers rules for alignment and no
overlaps (including with unfenced accesses to linear memory) and account
for the tiled access in our size allocation, we do not have to allocate
the full fenced region for the object. This allows us to fight the bloat
tiling imposed on pre-i965 chipsets and frees up RAM for real use. [Inside
the GTT we still suffer the additional alignment constraints, so it doesn't
magic allow us to render larger scenes without stalls -- we need the
expanded GTT and fence pipelining to overcome those...]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4fc1e05..ba2af4e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -130,7 +130,8 @@
 	if (obj->fence_reg != I915_FENCE_REG_NONE)
 		seq_printf(m, " (fence: %d)", obj->fence_reg);
 	if (obj->gtt_space != NULL)
-		seq_printf(m, " (gtt_offset: %08x)", obj->gtt_offset);
+		seq_printf(m, " (gtt offset: %08x, size: %08x)",
+			   obj->gtt_offset, (unsigned int)obj->gtt_space->size);
 	if (obj->pin_mappable || obj->fault_mappable)
 		seq_printf(m, " (mappable)");
 	if (obj->ring != NULL)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index dddca00..00d8fb3 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -770,6 +770,9 @@
 	case I915_PARAM_HAS_BLT:
 		value = HAS_BLT(dev);
 		break;
+	case I915_PARAM_HAS_RELAXED_FENCING:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3df8a62..7aa7f8a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -746,6 +746,8 @@
 	 * Advice: are the backing pages purgeable?
 	 */
 	unsigned int madv : 2;
+	unsigned int fenceable : 1;
+	unsigned int mappable : 1;
 
 	/**
 	 * Current tiling mode for the object.
@@ -1005,7 +1007,7 @@
 					      size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
 int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
-			bool mappable);
+			bool mappable, bool need_fence);
 void i915_gem_object_unpin(struct drm_gem_object *obj);
 int i915_gem_object_unbind(struct drm_gem_object *obj);
 void i915_gem_release_mmap(struct drm_gem_object *obj);
@@ -1068,10 +1070,6 @@
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
 void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
 void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
-bool i915_tiling_ok(struct drm_device *dev, int stride, int size,
-		    int tiling_mode);
-bool i915_gem_object_fence_offset_ok(struct drm_gem_object *obj,
-				     int tiling_mode);
 
 /* i915_gem_debug.c */
 void i915_gem_dump_object(struct drm_gem_object *obj, int len,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 08f57ae..07ad1e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -36,7 +36,8 @@
 #include <linux/pci.h>
 #include <linux/intel-gtt.h>
 
-static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
+static uint32_t i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj_priv);
+static uint32_t i915_gem_get_gtt_size(struct drm_i915_gem_object *obj_priv);
 
 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
 						  bool pipelined);
@@ -51,7 +52,9 @@
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj,
 					  bool interruptible);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
-				       unsigned alignment, bool mappable);
+				       unsigned alignment,
+				       bool mappable,
+				       bool need_fence);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -79,30 +82,26 @@
 }
 
 static void i915_gem_info_add_gtt(struct drm_i915_private *dev_priv,
-				  struct drm_gem_object *obj)
+				  struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.gtt_count++;
-	dev_priv->mm.gtt_memory += obj->size;
-	if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
+	dev_priv->mm.gtt_memory += obj->gtt_space->size;
+	if (obj->gtt_offset < dev_priv->mm.gtt_mappable_end) {
 		dev_priv->mm.mappable_gtt_used +=
-			min_t(size_t, obj->size,
-			      dev_priv->mm.gtt_mappable_end
-					- obj_priv->gtt_offset);
+			min_t(size_t, obj->gtt_space->size,
+			      dev_priv->mm.gtt_mappable_end - obj->gtt_offset);
 	}
 }
 
 static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
-				     struct drm_gem_object *obj)
+				     struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.gtt_count--;
-	dev_priv->mm.gtt_memory -= obj->size;
-	if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
+	dev_priv->mm.gtt_memory -= obj->gtt_space->size;
+	if (obj->gtt_offset < dev_priv->mm.gtt_mappable_end) {
 		dev_priv->mm.mappable_gtt_used -=
-			min_t(size_t, obj->size,
-			      dev_priv->mm.gtt_mappable_end
-					- obj_priv->gtt_offset);
+			min_t(size_t, obj->gtt_space->size,
+			      dev_priv->mm.gtt_mappable_end - obj->gtt_offset);
 	}
 }
 
@@ -113,47 +112,43 @@
  */
 static void
 i915_gem_info_update_mappable(struct drm_i915_private *dev_priv,
-			      struct drm_gem_object *obj,
+			      struct drm_i915_gem_object *obj,
 			      bool mappable)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-
 	if (mappable) {
-		if (obj_priv->pin_mappable && obj_priv->fault_mappable)
+		if (obj->pin_mappable && obj->fault_mappable)
 			/* Combined state was already mappable. */
 			return;
 		dev_priv->mm.gtt_mappable_count++;
-		dev_priv->mm.gtt_mappable_memory += obj->size;
+		dev_priv->mm.gtt_mappable_memory += obj->gtt_space->size;
 	} else {
-		if (obj_priv->pin_mappable || obj_priv->fault_mappable)
+		if (obj->pin_mappable || obj->fault_mappable)
 			/* Combined state still mappable. */
 			return;
 		dev_priv->mm.gtt_mappable_count--;
-		dev_priv->mm.gtt_mappable_memory -= obj->size;
+		dev_priv->mm.gtt_mappable_memory -= obj->gtt_space->size;
 	}
 }
 
 static void i915_gem_info_add_pin(struct drm_i915_private *dev_priv,
-				  struct drm_gem_object *obj,
+				  struct drm_i915_gem_object *obj,
 				  bool mappable)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.pin_count++;
-	dev_priv->mm.pin_memory += obj->size;
+	dev_priv->mm.pin_memory += obj->gtt_space->size;
 	if (mappable) {
-		obj_priv->pin_mappable = true;
+		obj->pin_mappable = true;
 		i915_gem_info_update_mappable(dev_priv, obj, true);
 	}
 }
 
 static void i915_gem_info_remove_pin(struct drm_i915_private *dev_priv,
-				     struct drm_gem_object *obj)
+				     struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.pin_count--;
-	dev_priv->mm.pin_memory -= obj->size;
-	if (obj_priv->pin_mappable) {
-		obj_priv->pin_mappable = false;
+	dev_priv->mm.pin_memory -= obj->gtt_space->size;
+	if (obj->pin_mappable) {
+		obj->pin_mappable = false;
 		i915_gem_info_update_mappable(dev_priv, obj, false);
 	}
 }
@@ -309,16 +304,6 @@
 	return 0;
 }
 
-static bool
-i915_gem_object_cpu_accessible(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-
-	return obj->gtt_space == NULL ||
-		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
-}
-
 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
 {
 	drm_i915_private_t *dev_priv = obj->dev->dev_private;
@@ -1083,7 +1068,7 @@
 	else if (obj_priv->tiling_mode == I915_TILING_NONE &&
 		 obj_priv->gtt_space &&
 		 obj->write_domain != I915_GEM_DOMAIN_CPU) {
-		ret = i915_gem_object_pin(obj, 0, true);
+		ret = i915_gem_object_pin(obj, 0, true, false);
 		if (ret)
 			goto out;
 
@@ -1307,11 +1292,19 @@
 	/* Now bind it into the GTT if needed */
 	mutex_lock(&dev->struct_mutex);
 	BUG_ON(obj_priv->pin_count && !obj_priv->pin_mappable);
-	if (!i915_gem_object_cpu_accessible(obj_priv))
-		i915_gem_object_unbind(obj);
+
+	if (obj_priv->gtt_space) {
+		if (!obj_priv->mappable ||
+		    (obj_priv->tiling_mode && !obj_priv->fenceable)) {
+			ret = i915_gem_object_unbind(obj);
+			if (ret)
+				goto unlock;
+		}
+	}
 
 	if (!obj_priv->gtt_space) {
-		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
+		ret = i915_gem_object_bind_to_gtt(obj, 0,
+						  true, obj_priv->tiling_mode);
 		if (ret)
 			goto unlock;
 	}
@@ -1322,7 +1315,7 @@
 
 	if (!obj_priv->fault_mappable) {
 		obj_priv->fault_mappable = true;
-		i915_gem_info_update_mappable(dev_priv, obj, true);
+		i915_gem_info_update_mappable(dev_priv, obj_priv, true);
 	}
 
 	/* Need a new fence register? */
@@ -1448,7 +1441,7 @@
 
 	if (obj_priv->fault_mappable) {
 		obj_priv->fault_mappable = false;
-		i915_gem_info_update_mappable(dev_priv, obj, false);
+		i915_gem_info_update_mappable(dev_priv, obj_priv, false);
 	}
 }
 
@@ -1473,32 +1466,51 @@
  * potential fence register mapping if needed.
  */
 static uint32_t
-i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
+i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj_priv)
 {
-	struct drm_device *dev = obj->dev;
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-	int start, i;
+	struct drm_device *dev = obj_priv->base.dev;
 
 	/*
 	 * Minimum alignment is 4k (GTT page size), but might be greater
 	 * if a fence register is needed for the object.
 	 */
-	if (INTEL_INFO(dev)->gen >= 4 || obj_priv->tiling_mode == I915_TILING_NONE)
+	if (INTEL_INFO(dev)->gen >= 4 ||
+	    obj_priv->tiling_mode == I915_TILING_NONE)
 		return 4096;
 
 	/*
 	 * Previous chips need to be aligned to the size of the smallest
 	 * fence register that can contain the object.
 	 */
+	return i915_gem_get_gtt_size(obj_priv);
+}
+
+static uint32_t
+i915_gem_get_gtt_size(struct drm_i915_gem_object *obj_priv)
+{
+	struct drm_device *dev = obj_priv->base.dev;
+	uint32_t size;
+
+	/*
+	 * Minimum alignment is 4k (GTT page size), but might be greater
+	 * if a fence register is needed for the object.
+	 */
+	if (INTEL_INFO(dev)->gen >= 4)
+		return obj_priv->base.size;
+
+	/*
+	 * Previous chips need to be aligned to the size of the smallest
+	 * fence register that can contain the object.
+	 */
 	if (INTEL_INFO(dev)->gen == 3)
-		start = 1024*1024;
+		size = 1024*1024;
 	else
-		start = 512*1024;
+		size = 512*1024;
 
-	for (i = start; i < obj->size; i <<= 1)
-		;
+	while (size < obj_priv->base.size)
+		size <<= 1;
 
-	return i;
+	return size;
 }
 
 /**
@@ -2253,8 +2265,10 @@
 
 	i915_gem_object_put_pages_gtt(obj);
 
-	i915_gem_info_remove_gtt(dev_priv, obj);
+	i915_gem_info_remove_gtt(dev_priv, obj_priv);
 	list_del_init(&obj_priv->mm_list);
+	obj_priv->fenceable = true;
+	obj_priv->mappable = true;
 
 	drm_mm_put_block(obj_priv->gtt_space);
 	obj_priv->gtt_space = NULL;
@@ -2311,16 +2325,16 @@
 	return 0;
 }
 
-static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void sandybridge_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	u32 size = i915_gem_get_gtt_size(obj_priv);
 	int regnum = obj_priv->fence_reg;
 	uint64_t val;
 
-	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
+	val = (uint64_t)((obj_priv->gtt_offset + size - 4096) &
 		    0xfffff000) << 32;
 	val |= obj_priv->gtt_offset & 0xfffff000;
 	val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
@@ -2333,16 +2347,16 @@
 	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
 }
 
-static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i965_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	u32 size = i915_gem_get_gtt_size(obj_priv);
 	int regnum = obj_priv->fence_reg;
 	uint64_t val;
 
-	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
+	val = (uint64_t)((obj_priv->gtt_offset + size - 4096) &
 		    0xfffff000) << 32;
 	val |= obj_priv->gtt_offset & 0xfffff000;
 	val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
@@ -2353,21 +2367,20 @@
 	I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
 }
 
-static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i915_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-	int regnum = obj_priv->fence_reg;
+	u32 size = i915_gem_get_gtt_size(obj_priv);
+	uint32_t fence_reg, val, pitch_val;
 	int tile_width;
-	uint32_t fence_reg, val;
-	uint32_t pitch_val;
 
 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
-	    (obj_priv->gtt_offset & (obj->size - 1))) {
-		WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
-		     __func__, obj_priv->gtt_offset, obj->size);
+	    (obj_priv->gtt_offset & (size - 1))) {
+		WARN(1, "%s: object 0x%08x [fenceable? %d] not 1M or size (0x%08x) aligned [gtt_space offset=%lx, size=%lx]\n",
+		     __func__, obj_priv->gtt_offset, obj_priv->fenceable, size,
+		     obj_priv->gtt_space->start, obj_priv->gtt_space->size);
 		return;
 	}
 
@@ -2390,23 +2403,24 @@
 	val = obj_priv->gtt_offset;
 	if (obj_priv->tiling_mode == I915_TILING_Y)
 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-	val |= I915_FENCE_SIZE_BITS(obj->size);
+	val |= I915_FENCE_SIZE_BITS(size);
 	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
 	val |= I830_FENCE_REG_VALID;
 
-	if (regnum < 8)
-		fence_reg = FENCE_REG_830_0 + (regnum * 4);
+	fence_reg = obj_priv->fence_reg;
+	if (fence_reg < 8)
+		fence_reg = FENCE_REG_830_0 + fence_reg * 4;
 	else
-		fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
+		fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
 	I915_WRITE(fence_reg, val);
 }
 
-static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i830_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	u32 size = i915_gem_get_gtt_size(obj_priv);
 	int regnum = obj_priv->fence_reg;
 	uint32_t val;
 	uint32_t pitch_val;
@@ -2426,7 +2440,7 @@
 	val = obj_priv->gtt_offset;
 	if (obj_priv->tiling_mode == I915_TILING_Y)
 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-	fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
+	fence_size_bits = I830_FENCE_SIZE_BITS(size);
 	WARN_ON(fence_size_bits & ~0x00000f00);
 	val |= fence_size_bits;
 	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
@@ -2438,10 +2452,9 @@
 static int i915_find_fence_reg(struct drm_device *dev,
 			       bool interruptible)
 {
-	struct drm_i915_fence_reg *reg = NULL;
-	struct drm_i915_gem_object *obj_priv = NULL;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_gem_object *obj = NULL;
+	struct drm_i915_fence_reg *reg;
+	struct drm_i915_gem_object *obj_priv = NULL;
 	int i, avail, ret;
 
 	/* First try to find a free reg */
@@ -2460,33 +2473,31 @@
 		return -ENOSPC;
 
 	/* None available, try to steal one or wait for a user to finish */
-	i = I915_FENCE_REG_NONE;
+	avail = I915_FENCE_REG_NONE;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list,
 			    lru_list) {
-		obj = reg->obj;
-		obj_priv = to_intel_bo(obj);
-
+		obj_priv = to_intel_bo(reg->obj);
 		if (obj_priv->pin_count)
 			continue;
 
 		/* found one! */
-		i = obj_priv->fence_reg;
+		avail = obj_priv->fence_reg;
 		break;
 	}
 
-	BUG_ON(i == I915_FENCE_REG_NONE);
+	BUG_ON(avail == I915_FENCE_REG_NONE);
 
 	/* We only have a reference on obj from the active list. put_fence_reg
 	 * might drop that one, causing a use-after-free in it. So hold a
 	 * private reference to obj like the other callers of put_fence_reg
 	 * (set_tiling ioctl) do. */
-	drm_gem_object_reference(obj);
-	ret = i915_gem_object_put_fence_reg(obj, interruptible);
-	drm_gem_object_unreference(obj);
+	drm_gem_object_reference(&obj_priv->base);
+	ret = i915_gem_object_put_fence_reg(&obj_priv->base, interruptible);
+	drm_gem_object_unreference(&obj_priv->base);
 	if (ret != 0)
 		return ret;
 
-	return i;
+	return avail;
 }
 
 /**
@@ -2551,22 +2562,23 @@
 
 	switch (INTEL_INFO(dev)->gen) {
 	case 6:
-		sandybridge_write_fence_reg(reg);
+		sandybridge_write_fence_reg(obj);
 		break;
 	case 5:
 	case 4:
-		i965_write_fence_reg(reg);
+		i965_write_fence_reg(obj);
 		break;
 	case 3:
-		i915_write_fence_reg(reg);
+		i915_write_fence_reg(obj);
 		break;
 	case 2:
-		i830_write_fence_reg(reg);
+		i830_write_fence_reg(obj);
 		break;
 	}
 
-	trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
-			obj_priv->tiling_mode);
+	trace_i915_gem_object_get_fence(obj,
+					obj_priv->fence_reg,
+					obj_priv->tiling_mode);
 
 	return 0;
 }
@@ -2671,13 +2683,15 @@
 static int
 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 			    unsigned alignment,
-			    bool mappable)
+			    bool mappable,
+			    bool need_fence)
 {
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	struct drm_mm_node *free_space;
-	gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
+	gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
+	u32 size, fence_size, fence_alignment;
 	int ret;
 
 	if (obj_priv->madv != I915_MADV_WILLNEED) {
@@ -2685,13 +2699,18 @@
 		return -EINVAL;
 	}
 
+	fence_size = i915_gem_get_gtt_size(obj_priv);
+	fence_alignment = i915_gem_get_gtt_alignment(obj_priv);
+
 	if (alignment == 0)
-		alignment = i915_gem_get_gtt_alignment(obj);
-	if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
+		alignment = need_fence ? fence_alignment : 4096;
+	if (need_fence && alignment & (fence_alignment - 1)) {
 		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
 		return -EINVAL;
 	}
 
+	size = need_fence ? fence_size : obj->size;
+
 	/* If the object is bigger than the entire aperture, reject it early
 	 * before evicting everything in a vain attempt to find space.
 	 */
@@ -2705,32 +2724,29 @@
 	if (mappable)
 		free_space =
 			drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
-						    obj->size, alignment, 0,
+						    size, alignment, 0,
 						    dev_priv->mm.gtt_mappable_end,
 						    0);
 	else
 		free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
-						obj->size, alignment, 0);
+						size, alignment, 0);
 
 	if (free_space != NULL) {
 		if (mappable)
 			obj_priv->gtt_space =
 				drm_mm_get_block_range_generic(free_space,
-							       obj->size,
-							       alignment, 0,
+							       size, alignment, 0,
 							       dev_priv->mm.gtt_mappable_end,
 							       0);
 		else
 			obj_priv->gtt_space =
-				drm_mm_get_block(free_space, obj->size,
-						 alignment);
+				drm_mm_get_block(free_space, size, alignment);
 	}
 	if (obj_priv->gtt_space == NULL) {
 		/* If the gtt is empty and we're still having trouble
 		 * fitting our object in, we're out of memory.
 		 */
-		ret = i915_gem_evict_something(dev, obj->size, alignment,
-					       mappable);
+		ret = i915_gem_evict_something(dev, size, alignment, mappable);
 		if (ret)
 			return ret;
 
@@ -2744,7 +2760,7 @@
 
 		if (ret == -ENOMEM) {
 			/* first try to clear up some space from the GTT */
-			ret = i915_gem_evict_something(dev, obj->size,
+			ret = i915_gem_evict_something(dev, size,
 						       alignment, mappable);
 			if (ret) {
 				/* now try to shrink everyone else */
@@ -2775,8 +2791,8 @@
 		drm_mm_put_block(obj_priv->gtt_space);
 		obj_priv->gtt_space = NULL;
 
-		ret = i915_gem_evict_something(dev, obj->size, alignment,
-					       mappable);
+		ret = i915_gem_evict_something(dev, size,
+					       alignment, mappable);
 		if (ret)
 			return ret;
 
@@ -2787,7 +2803,7 @@
 
 	/* keep track of bounds object by adding it to the inactive list */
 	list_add_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
-	i915_gem_info_add_gtt(dev_priv, obj);
+	i915_gem_info_add_gtt(dev_priv, obj_priv);
 
 	/* Assert that the object is not currently in any GPU domain. As it
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2798,6 +2814,13 @@
 
 	trace_i915_gem_object_bind(obj, obj_priv->gtt_offset, mappable);
 
+	obj_priv->fenceable =
+		obj_priv->gtt_space->size == fence_size &&
+		(obj_priv->gtt_space->start & (fence_alignment -1)) == 0;
+
+	obj_priv->mappable =
+		obj_priv->gtt_offset + obj->size <= dev_priv->mm.gtt_mappable_end;
+
 	return 0;
 }
 
@@ -3516,9 +3539,8 @@
 				entry->relocation_count ? true : need_fence;
 
 			/* Check fence reg constraints and rebind if necessary */
-			if (need_fence &&
-			    !i915_gem_object_fence_offset_ok(&obj->base,
-							     obj->tiling_mode)) {
+			if ((need_fence && !obj->fenceable) ||
+			    (need_mappable && !obj->mappable)) {
 				ret = i915_gem_object_unbind(&obj->base);
 				if (ret)
 					break;
@@ -3526,7 +3548,8 @@
 
 			ret = i915_gem_object_pin(&obj->base,
 						  entry->alignment,
-						  need_mappable);
+						  need_mappable,
+						  need_fence);
 			if (ret)
 				break;
 
@@ -4097,7 +4120,7 @@
 
 int
 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
-		    bool mappable)
+		    bool mappable, bool need_fence)
 {
 	struct drm_device *dev = obj->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4108,14 +4131,15 @@
 	WARN_ON(i915_verify_lists(dev));
 
 	if (obj_priv->gtt_space != NULL) {
-		if (alignment == 0)
-			alignment = i915_gem_get_gtt_alignment(obj);
-		if (obj_priv->gtt_offset & (alignment - 1) ||
-		    (mappable && !i915_gem_object_cpu_accessible(obj_priv))) {
+		if ((alignment && obj_priv->gtt_offset & (alignment - 1)) ||
+		    (need_fence && !obj_priv->fenceable) ||
+		    (mappable && !obj_priv->mappable)) {
 			WARN(obj_priv->pin_count,
 			     "bo is already pinned with incorrect alignment:"
-			     " offset=%x, req.alignment=%x\n",
-			     obj_priv->gtt_offset, alignment);
+			     " offset=%x, req.alignment=%x, need_fence=%d, fenceable=%d, mappable=%d, cpu_accessible=%d\n",
+			     obj_priv->gtt_offset, alignment,
+			     need_fence, obj_priv->fenceable,
+			     mappable, obj_priv->mappable);
 			ret = i915_gem_object_unbind(obj);
 			if (ret)
 				return ret;
@@ -4123,13 +4147,14 @@
 	}
 
 	if (obj_priv->gtt_space == NULL) {
-		ret = i915_gem_object_bind_to_gtt(obj, alignment, mappable);
+		ret = i915_gem_object_bind_to_gtt(obj, alignment,
+						  mappable, need_fence);
 		if (ret)
 			return ret;
 	}
 
 	if (obj_priv->pin_count++ == 0) {
-		i915_gem_info_add_pin(dev_priv, obj, mappable);
+		i915_gem_info_add_pin(dev_priv, obj_priv, mappable);
 		if (!obj_priv->active)
 			list_move_tail(&obj_priv->mm_list,
 				       &dev_priv->mm.pinned_list);
@@ -4155,7 +4180,7 @@
 		if (!obj_priv->active)
 			list_move_tail(&obj_priv->mm_list,
 				       &dev_priv->mm.inactive_list);
-		i915_gem_info_remove_pin(dev_priv, obj);
+		i915_gem_info_remove_pin(dev_priv, obj_priv);
 	}
 	WARN_ON(i915_verify_lists(dev));
 }
@@ -4196,7 +4221,8 @@
 	obj_priv->user_pin_count++;
 	obj_priv->pin_filp = file_priv;
 	if (obj_priv->user_pin_count == 1) {
-		ret = i915_gem_object_pin(obj, args->alignment, true);
+		ret = i915_gem_object_pin(obj, args->alignment,
+					  true, obj_priv->tiling_mode);
 		if (ret)
 			goto out;
 	}
@@ -4389,6 +4415,8 @@
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->gpu_write_list);
 	obj->madv = I915_MADV_WILLNEED;
+	obj->fenceable = true;
+	obj->mappable = true;
 
 	return &obj->base;
 }
@@ -4508,7 +4536,7 @@
 	obj_priv = to_intel_bo(obj);
 	obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
 
-	ret = i915_gem_object_pin(obj, 4096, true);
+	ret = i915_gem_object_pin(obj, 4096, true, false);
 	if (ret)
 		goto err_unref;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index af352de..0597a73 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -181,7 +181,7 @@
 }
 
 /* Check pitch constriants for all chips & tiling formats */
-bool
+static bool
 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 {
 	int tile_width;
@@ -232,25 +232,35 @@
 	return true;
 }
 
-bool
-i915_gem_object_fence_offset_ok(struct drm_gem_object *obj, int tiling_mode)
+/* Is the current GTT allocation valid for the change in tiling? */
+static bool
+i915_gem_object_fence_ok(struct drm_gem_object *obj, int tiling_mode)
 {
-	struct drm_device *dev = obj->dev;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-
-	if (obj_priv->gtt_space == NULL)
-		return true;
+	u32 size;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return true;
 
-	if (INTEL_INFO(dev)->gen >= 4)
+	if (INTEL_INFO(obj->dev)->gen >= 4)
 		return true;
 
-	if (obj_priv->gtt_offset & (obj->size - 1))
+	/*
+	 * Previous chips need to be aligned to the size of the smallest
+	 * fence register that can contain the object.
+	 */
+	if (INTEL_INFO(obj->dev)->gen == 3)
+		size = 1024*1024;
+	else
+		size = 512*1024;
+
+	while (size < obj_priv->base.size)
+		size <<= 1;
+
+	if (obj_priv->gtt_offset & (size - 1))
 		return false;
 
-	if (IS_GEN3(dev)) {
+	if (INTEL_INFO(obj->dev)->gen == 3) {
 		if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK)
 			return false;
 	} else {
@@ -331,7 +341,7 @@
 		 * tiling mode. Otherwise we can just leave it alone, but
 		 * need to ensure that any fence register is cleared.
 		 */
-		if (!i915_gem_object_fence_offset_ok(obj, args->tiling_mode))
+		if (!i915_gem_object_fence_ok(obj, args->tiling_mode))
 			ret = i915_gem_object_unbind(obj);
 		else if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
 			ret = i915_gem_object_put_fence_reg(obj, true);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c9c4c70..4954af2 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1461,7 +1461,8 @@
 		BUG();
 	}
 
-	ret = i915_gem_object_pin(obj, alignment, true);
+	ret = i915_gem_object_pin(obj, alignment,
+				  !pipelined, obj_priv->tiling_mode);
 	if (ret)
 		return ret;
 
@@ -4353,7 +4354,7 @@
 	/* we only need to pin inside GTT if cursor is non-phy */
 	mutex_lock(&dev->struct_mutex);
 	if (!dev_priv->info->cursor_needs_physical) {
-		ret = i915_gem_object_pin(bo, PAGE_SIZE, true);
+		ret = i915_gem_object_pin(bo, PAGE_SIZE, true, false);
 		if (ret) {
 			DRM_ERROR("failed to pin cursor bo\n");
 			goto fail_locked;
@@ -5517,7 +5518,7 @@
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gem_object_pin(ctx, 4096, true);
+	ret = i915_gem_object_pin(ctx, 4096, false, false);
 	if (ret) {
 		DRM_ERROR("failed to pin power context: %d\n", ret);
 		goto err_unref;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index beda201..e62e1b3 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -781,7 +781,7 @@
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_object_pin(new_bo, PAGE_SIZE, true);
+	ret = i915_gem_object_pin(new_bo, PAGE_SIZE, false, false);
 	if (ret != 0)
 		return ret;
 
@@ -1423,7 +1423,7 @@
                 }
 		overlay->flip_addr = overlay->reg_bo->phys_obj->handle->busaddr;
 	} else {
-		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true);
+		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true, false);
 		if (ret) {
                         DRM_ERROR("failed to pin overlay register bo\n");
                         goto out_free_bo;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e88214e..632a98e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -547,7 +547,7 @@
 	obj_priv = to_intel_bo(obj);
 	obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
 
-	ret = i915_gem_object_pin(obj, 4096, true);
+	ret = i915_gem_object_pin(obj, 4096, true, false);
 	if (ret != 0) {
 		goto err_unref;
 	}
@@ -603,7 +603,7 @@
 
 	ring->gem_object = obj;
 
-	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
+	ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
 	if (ret)
 		goto err_unref;
 
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8c641be..b20dbb2 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -287,6 +287,7 @@
 #define I915_PARAM_HAS_EXECBUF2          9
 #define I915_PARAM_HAS_BSD		 10
 #define I915_PARAM_HAS_BLT		 11
+#define I915_PARAM_HAS_RELAXED_FENCING	 12
 
 typedef struct drm_i915_getparam {
 	int param;