Merge branch 'topic/bxt-stage1' into drm-intel-next-queued

Separate topic branch for bxt didn't work out since we needed to
refactor the gmbus code a bit to make it look decent. So backmerge.

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index b3989e2..b49dc51 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1289,6 +1289,29 @@
 EXPORT_SYMBOL(drm_plane_index);
 
 /**
+ * drm_plane_from_index - find the registered plane at an index
+ * @dev: DRM device
+ * @idx: index of registered plane to find for
+ *
+ * Given a plane index, return the registered plane from DRM device's
+ * list of planes with matching index.
+ */
+struct drm_plane *
+drm_plane_from_index(struct drm_device *dev, int idx)
+{
+	struct drm_plane *plane;
+	unsigned int i = 0;
+
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		if (i == idx)
+			return plane;
+		i++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(drm_plane_from_index);
+
+/**
  * drm_plane_force_disable - Forcibly disable a plane
  * @plane: plane to disable
  *
diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c
index 0f2587ff..89b08a8 100644
--- a/drivers/gpu/drm/i915/dvo_ivch.c
+++ b/drivers/gpu/drm/i915/dvo_ivch.c
@@ -23,6 +23,9 @@
  * Authors:
  *    Eric Anholt <eric@anholt.net>
  *
+ * Minor modifications (Dithering enable):
+ *    Thomas Richter <thor@math.tu-berlin.de>
+ *
  */
 
 #include "dvo.h"
@@ -59,6 +62,8 @@
 # define VR01_DVO_BYPASS_ENABLE		(1 << 1)
 /** Enables the DVO clock */
 # define VR01_DVO_ENABLE		(1 << 0)
+/** Enable dithering for 18bpp panels. Not documented. */
+# define VR01_DITHER_ENABLE             (1 << 4)
 
 /*
  * LCD Interface Format
@@ -74,6 +79,8 @@
 # define VR10_INTERFACE_2X18		(2 << 2)
 /** Enables 2x24-bit LVDS output */
 # define VR10_INTERFACE_2X24		(3 << 2)
+/** Mask that defines the depth of the pipeline */
+# define VR10_INTERFACE_DEPTH_MASK      (3 << 2)
 
 /*
  * VR20 LCD Horizontal Display Size
@@ -342,9 +349,15 @@
 			  struct drm_display_mode *adjusted_mode)
 {
 	uint16_t vr40 = 0;
-	uint16_t vr01;
+	uint16_t vr01 = 0;
+	uint16_t vr10;
 
-	vr01 = 0;
+	ivch_read(dvo, VR10, &vr10);
+	/* Enable dithering for 18 bpp pipelines */
+	vr10 &= VR10_INTERFACE_DEPTH_MASK;
+	if (vr10 == VR10_INTERFACE_2X18 || vr10 == VR10_INTERFACE_1X18)
+		vr01 = VR01_DITHER_ENABLE;
+
 	vr40 = (VR40_STALL_ENABLE | VR40_VERTICAL_INTERP_ENABLE |
 		VR40_HORIZONTAL_INTERP_ENABLE);
 
@@ -353,7 +366,7 @@
 		uint16_t x_ratio, y_ratio;
 
 		vr01 |= VR01_PANEL_FIT_ENABLE;
-		vr40 |= VR40_CLOCK_GATING_ENABLE;
+		vr40 |= VR40_CLOCK_GATING_ENABLE | VR40_ENHANCED_PANEL_FITTING;
 		x_ratio = (((mode->hdisplay - 1) << 16) /
 			   (adjusted_mode->hdisplay - 1)) >> 2;
 		y_ratio = (((mode->vdisplay - 1) << 16) /
@@ -380,6 +393,8 @@
 	DRM_DEBUG_KMS("VR00: 0x%04x\n", val);
 	ivch_read(dvo, VR01, &val);
 	DRM_DEBUG_KMS("VR01: 0x%04x\n", val);
+	ivch_read(dvo, VR10, &val);
+	DRM_DEBUG_KMS("VR10: 0x%04x\n", val);
 	ivch_read(dvo, VR30, &val);
 	DRM_DEBUG_KMS("VR30: 0x%04x\n", val);
 	ivch_read(dvo, VR40, &val);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 61ae8ff..9605ff8 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -869,6 +869,9 @@
 	    batch_len + batch_start_offset > src_obj->base.size)
 		return ERR_PTR(-E2BIG);
 
+	if (WARN_ON(dest_obj->pages_pin_count == 0))
+		return ERR_PTR(-ENODEV);
+
 	ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
@@ -882,13 +885,6 @@
 		goto unpin_src;
 	}
 
-	ret = i915_gem_object_get_pages(dest_obj);
-	if (ret) {
-		DRM_DEBUG_DRIVER("CMD: Failed to get pages for shadow batch\n");
-		goto unmap_src;
-	}
-	i915_gem_object_pin_pages(dest_obj);
-
 	ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
@@ -898,7 +894,6 @@
 	dst = vmap_batch(dest_obj, 0, batch_len);
 	if (!dst) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
-		i915_gem_object_unpin_pages(dest_obj);
 		ret = -ENOMEM;
 		goto unmap_src;
 	}
@@ -1129,7 +1124,6 @@
 	}
 
 	vunmap(batch_base);
-	i915_gem_object_unpin_pages(shadow_batch_obj);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index effb7fc..2394924 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -123,8 +123,9 @@
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
+	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
+		   obj->active ? "*" : " ",
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
 		   get_global_flag(obj),
@@ -159,9 +160,9 @@
 	}
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
-	if (obj->pin_mappable || obj->fault_mappable) {
+	if (obj->pin_display || obj->fault_mappable) {
 		char s[3], *t = s;
-		if (obj->pin_mappable)
+		if (obj->pin_display)
 			*t++ = 'p';
 		if (obj->fault_mappable)
 			*t++ = 'f';
@@ -361,31 +362,39 @@
 	return 0;
 }
 
-#define print_file_stats(m, name, stats) \
-	seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n", \
-		   name, \
-		   stats.count, \
-		   stats.total, \
-		   stats.active, \
-		   stats.inactive, \
-		   stats.global, \
-		   stats.shared, \
-		   stats.unbound)
+#define print_file_stats(m, name, stats) do { \
+	if (stats.count) \
+		seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n", \
+			   name, \
+			   stats.count, \
+			   stats.total, \
+			   stats.active, \
+			   stats.inactive, \
+			   stats.global, \
+			   stats.shared, \
+			   stats.unbound); \
+} while (0)
 
 static void print_batch_pool_stats(struct seq_file *m,
 				   struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
 	struct file_stats stats;
+	struct intel_engine_cs *ring;
+	int i, j;
 
 	memset(&stats, 0, sizeof(stats));
 
-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list)
-		per_file_stats(0, obj, &stats);
+	for_each_ring(ring, dev_priv, i) {
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				per_file_stats(0, obj, &stats);
+		}
+	}
 
-	print_file_stats(m, "batch pool", stats);
+	print_file_stats(m, "[k]batch pool", stats);
 }
 
 #define count_vmas(list, member) do { \
@@ -449,7 +458,7 @@
 			size += i915_gem_obj_ggtt_size(obj);
 			++count;
 		}
-		if (obj->pin_mappable) {
+		if (obj->pin_display) {
 			mappable_size += i915_gem_obj_ggtt_size(obj);
 			++mappable_count;
 		}
@@ -471,8 +480,6 @@
 
 	seq_putc(m, '\n');
 	print_batch_pool_stats(m, dev_priv);
-
-	seq_putc(m, '\n');
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 		struct file_stats stats;
 		struct task_struct *task;
@@ -613,24 +620,39 @@
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
-	int count = 0;
-	int ret;
+	struct intel_engine_cs *ring;
+	int total = 0;
+	int ret, i, j;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	seq_puts(m, "cache:\n");
-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list) {
-		seq_puts(m, "   ");
-		describe_obj(m, obj);
-		seq_putc(m, '\n');
-		count++;
+	for_each_ring(ring, dev_priv, i) {
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			int count;
+
+			count = 0;
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				count++;
+			seq_printf(m, "%s cache[%d]: %d objects\n",
+				   ring->name, j, count);
+
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link) {
+				seq_puts(m, "   ");
+				describe_obj(m, obj);
+				seq_putc(m, '\n');
+			}
+
+			total += count;
+		}
 	}
 
-	seq_printf(m, "total: %d\n", count);
+	seq_printf(m, "total: %d\n", total);
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -643,31 +665,44 @@
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_i915_gem_request *gem_request;
-	int ret, count, i;
+	struct drm_i915_gem_request *rq;
+	int ret, any, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	count = 0;
+	any = 0;
 	for_each_ring(ring, dev_priv, i) {
-		if (list_empty(&ring->request_list))
+		int count;
+
+		count = 0;
+		list_for_each_entry(rq, &ring->request_list, list)
+			count++;
+		if (count == 0)
 			continue;
 
-		seq_printf(m, "%s requests:\n", ring->name);
-		list_for_each_entry(gem_request,
-				    &ring->request_list,
-				    list) {
-			seq_printf(m, "    %x @ %d\n",
-				   gem_request->seqno,
-				   (int) (jiffies - gem_request->emitted_jiffies));
+		seq_printf(m, "%s requests: %d\n", ring->name, count);
+		list_for_each_entry(rq, &ring->request_list, list) {
+			struct task_struct *task;
+
+			rcu_read_lock();
+			task = NULL;
+			if (rq->pid)
+				task = pid_task(rq->pid, PIDTYPE_PID);
+			seq_printf(m, "    %x @ %d: %s [%d]\n",
+				   rq->seqno,
+				   (int) (jiffies - rq->emitted_jiffies),
+				   task ? task->comm : "<unknown>",
+				   task ? task->pid : -1);
+			rcu_read_unlock();
 		}
-		count++;
+
+		any++;
 	}
 	mutex_unlock(&dev->struct_mutex);
 
-	if (count == 0)
+	if (any == 0)
 		seq_puts(m, "No requests\n");
 
 	return 0;
@@ -2153,8 +2188,6 @@
 	if (!ppgtt)
 		return;
 
-	seq_printf(m, "Page directories: %d\n", ppgtt->num_pd_pages);
-	seq_printf(m, "Page tables: %d\n", ppgtt->num_pd_entries);
 	for_each_ring(ring, dev_priv, unused) {
 		seq_printf(m, "%s\n", ring->name);
 		for (i = 0; i < 4; i++) {
@@ -2226,6 +2259,44 @@
 	return 0;
 }
 
+static int i915_rps_boost_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_file *file;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
+	if (ret)
+		goto unlock;
+
+	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
+		struct drm_i915_file_private *file_priv = file->driver_priv;
+		struct task_struct *task;
+
+		rcu_read_lock();
+		task = pid_task(file->pid, PIDTYPE_PID);
+		seq_printf(m, "%s [%d]: %d boosts%s\n",
+			   task ? task->comm : "<unknown>",
+			   task ? task->pid : -1,
+			   file_priv->rps_boosts,
+			   list_empty(&file_priv->rps_boost) ? "" : ", active");
+		rcu_read_unlock();
+	}
+	seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+
+	mutex_unlock(&dev_priv->rps.hw_lock);
+unlock:
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
 static int i915_llc(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
@@ -4738,6 +4809,7 @@
 	{"i915_ddb_info", i915_ddb_info, 0},
 	{"i915_sseu_status", i915_sseu_status, 0},
 	{"i915_drrs_status", i915_drrs_status, 0},
+	{"i915_rps_boost_info", i915_rps_boost_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
@@ -4827,3 +4899,99 @@
 		drm_debugfs_remove_files(info_list, 1, minor);
 	}
 }
+
+struct dpcd_block {
+	/* DPCD dump start address. */
+	unsigned int offset;
+	/* DPCD dump end address, inclusive. If unset, .size will be used. */
+	unsigned int end;
+	/* DPCD dump size. Used if .end is unset. If unset, defaults to 1. */
+	size_t size;
+	/* Only valid for eDP. */
+	bool edp;
+};
+
+static const struct dpcd_block i915_dpcd_debug[] = {
+	{ .offset = DP_DPCD_REV, .size = DP_RECEIVER_CAP_SIZE },
+	{ .offset = DP_PSR_SUPPORT, .end = DP_PSR_CAPS },
+	{ .offset = DP_DOWNSTREAM_PORT_0, .size = 16 },
+	{ .offset = DP_LINK_BW_SET, .end = DP_EDP_CONFIGURATION_SET },
+	{ .offset = DP_SINK_COUNT, .end = DP_ADJUST_REQUEST_LANE2_3 },
+	{ .offset = DP_SET_POWER },
+	{ .offset = DP_EDP_DPCD_REV },
+	{ .offset = DP_EDP_GENERAL_CAP_1, .end = DP_EDP_GENERAL_CAP_3 },
+	{ .offset = DP_EDP_DISPLAY_CONTROL_REGISTER, .end = DP_EDP_BACKLIGHT_FREQ_CAP_MAX_LSB },
+	{ .offset = DP_EDP_DBC_MINIMUM_BRIGHTNESS_SET, .end = DP_EDP_DBC_MAXIMUM_BRIGHTNESS_SET },
+};
+
+static int i915_dpcd_show(struct seq_file *m, void *data)
+{
+	struct drm_connector *connector = m->private;
+	struct intel_dp *intel_dp =
+		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+	uint8_t buf[16];
+	ssize_t err;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
+		const struct dpcd_block *b = &i915_dpcd_debug[i];
+		size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);
+
+		if (b->edp &&
+		    connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+			continue;
+
+		/* low tech for now */
+		if (WARN_ON(size > sizeof(buf)))
+			continue;
+
+		err = drm_dp_dpcd_read(&intel_dp->aux, b->offset, buf, size);
+		if (err <= 0) {
+			DRM_ERROR("dpcd read (%zu bytes at %u) failed (%zd)\n",
+				  size, b->offset, err);
+			continue;
+		}
+
+		seq_printf(m, "%04x: %*ph\n", b->offset, (int) size, buf);
+	};
+
+	return 0;
+}
+
+static int i915_dpcd_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, i915_dpcd_show, inode->i_private);
+}
+
+static const struct file_operations i915_dpcd_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_dpcd_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * i915_debugfs_connector_add - add i915 specific connector debugfs files
+ * @connector: pointer to a registered drm_connector
+ *
+ * Cleanup will be done by drm_connector_unregister() through a call to
+ * drm_debugfs_connector_remove().
+ *
+ * Returns 0 on success, negative error codes on error.
+ */
+int i915_debugfs_connector_add(struct drm_connector *connector)
+{
+	struct dentry *root = connector->debugfs_entry;
+
+	/* The connector must have been registered beforehands. */
+	if (!root)
+		return -ENODEV;
+
+	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+		debugfs_create_file("i915_dpcd", S_IRUGO, root, connector,
+				    &i915_dpcd_fops);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a9b7770..e44116f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1039,8 +1039,12 @@
 put_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 free_priv:
-	if (dev_priv->slab)
-		kmem_cache_destroy(dev_priv->slab);
+	if (dev_priv->requests)
+		kmem_cache_destroy(dev_priv->requests);
+	if (dev_priv->vmas)
+		kmem_cache_destroy(dev_priv->vmas);
+	if (dev_priv->objects)
+		kmem_cache_destroy(dev_priv->objects);
 	kfree(dev_priv);
 	return ret;
 }
@@ -1105,7 +1109,6 @@
 
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_ringbuffer(dev);
-	i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 	i915_gem_cleanup_stolen(dev);
@@ -1124,8 +1127,12 @@
 	if (dev_priv->regs != NULL)
 		pci_iounmap(dev->pdev, dev_priv->regs);
 
-	if (dev_priv->slab)
-		kmem_cache_destroy(dev_priv->slab);
+	if (dev_priv->requests)
+		kmem_cache_destroy(dev_priv->requests);
+	if (dev_priv->vmas)
+		kmem_cache_destroy(dev_priv->vmas);
+	if (dev_priv->objects)
+		kmem_cache_destroy(dev_priv->objects);
 
 	pci_dev_put(dev_priv->bridge_dev);
 	kfree(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 48434cb6..c1a3cdb5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1207,11 +1207,13 @@
 	int err;
 
 	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
-	WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on);
 
 #define COND (I915_READ(VLV_GTLC_SURVIVABILITY_REG) & VLV_GFX_CLK_STATUS_BIT)
 	/* Wait for a previous force-off to settle */
-	if (force_on) {
+	if (force_on && !IS_CHERRYVIEW(dev_priv->dev)) {
+		/* WARN_ON only for the Valleyview */
+		WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on);
+
 		err = wait_for(!COND, 20);
 		if (err) {
 			DRM_ERROR("timeout waiting for GFX clock force-off (%08x)\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 99facbf..3b21249 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,7 +56,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20150327"
+#define DRIVER_DATE		"20150410"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -455,6 +455,7 @@
 		u32 semaphore_seqno[I915_NUM_RINGS - 1];
 
 		/* Register state */
+		u32 start;
 		u32 tail;
 		u32 head;
 		u32 ctl;
@@ -881,6 +882,8 @@
 	struct delayed_work work;
 	unsigned busy_frontbuffer_bits;
 	bool link_standby;
+	bool psr2_support;
+	bool aux_frame_sync;
 };
 
 enum intel_pch {
@@ -1033,11 +1036,16 @@
 	u8 rp0_freq;		/* Non-overclocked max frequency. */
 	u32 cz_freq;
 
+	u8 up_threshold; /* Current %busy required to uplock */
+	u8 down_threshold; /* Current %busy required to downclock */
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
 	bool enabled;
 	struct delayed_work delayed_resume_work;
+	struct list_head clients;
+	unsigned boosts;
 
 	/* manual wa residency calculations */
 	struct intel_rps_ei up_ei, down_ei;
@@ -1135,11 +1143,6 @@
 	int which_slice;
 };
 
-struct i915_gem_batch_pool {
-	struct drm_device *dev;
-	struct list_head cache_list;
-};
-
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
@@ -1153,13 +1156,6 @@
 	 */
 	struct list_head unbound_list;
 
-	/*
-	 * A pool of objects to use as shadow copies of client batch buffers
-	 * when the command parser is enabled. Prevents the client from
-	 * modifying the batch contents after software parsing.
-	 */
-	struct i915_gem_batch_pool batch_pool;
-
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
@@ -1562,7 +1558,9 @@
 
 struct drm_i915_private {
 	struct drm_device *dev;
-	struct kmem_cache *slab;
+	struct kmem_cache *objects;
+	struct kmem_cache *vmas;
+	struct kmem_cache *requests;
 
 	const struct intel_device_info info;
 
@@ -1574,8 +1572,7 @@
 
 	struct i915_virtual_gpu vgpu;
 
-	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
-
+	struct intel_gmbus gmbus[GMBUS_NUM_PINS];
 
 	/** gmbus_mutex protects against concurrent usage of the single hw gmbus
 	 * controller on different i2c buses. */
@@ -1814,20 +1811,18 @@
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
-		int (*do_execbuf)(struct drm_device *dev, struct drm_file *file,
-				  struct intel_engine_cs *ring,
-				  struct intel_context *ctx,
-				  struct drm_i915_gem_execbuffer2 *args,
-				  struct list_head *vmas,
-				  struct drm_i915_gem_object *batch_obj,
-				  u64 exec_start, u32 flags);
+		int (*execbuf_submit)(struct drm_device *dev, struct drm_file *file,
+				      struct intel_engine_cs *ring,
+				      struct intel_context *ctx,
+				      struct drm_i915_gem_execbuffer2 *args,
+				      struct list_head *vmas,
+				      struct drm_i915_gem_object *batch_obj,
+				      u64 exec_start, u32 flags);
 		int (*init_rings)(struct drm_device *dev);
 		void (*cleanup_ring)(struct intel_engine_cs *ring);
 		void (*stop_ring)(struct intel_engine_cs *ring);
 	} gt;
 
-	uint32_t request_uniq;
-
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -1916,7 +1911,7 @@
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
-	struct list_head batch_pool_list;
+	struct list_head batch_pool_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
@@ -1968,7 +1963,6 @@
 	 * accurate mappable working set.
 	 */
 	unsigned int fault_mappable:1;
-	unsigned int pin_mappable:1;
 	unsigned int pin_display:1;
 
 	/*
@@ -1985,6 +1979,10 @@
 
 	struct sg_table *pages;
 	int pages_pin_count;
+	struct get_page {
+		struct scatterlist *sg;
+		int last;
+	} get_page;
 
 	/* prime dma-buf support */
 	void *dma_buf_vmapping;
@@ -2045,6 +2043,7 @@
 	struct kref ref;
 
 	/** On Which ring this request was generated */
+	struct drm_i915_private *i915;
 	struct intel_engine_cs *ring;
 
 	/** GEM sequence number associated with this request. */
@@ -2092,8 +2091,6 @@
 	/** process identifier submitting this request */
 	struct pid *pid;
 
-	uint32_t uniq;
-
 	/**
 	 * The ELSP only accepts two elements at a time, so we queue
 	 * context/tail pairs on a given queue (ring->execlist_queue) until the
@@ -2115,6 +2112,8 @@
 
 };
 
+int i915_gem_request_alloc(struct intel_engine_cs *ring,
+			   struct intel_context *ctx);
 void i915_gem_request_free(struct kref *req_ref);
 
 static inline uint32_t
@@ -2142,6 +2141,19 @@
 	kref_put(&req->ref, i915_gem_request_free);
 }
 
+static inline void
+i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
+{
+	struct drm_device *dev;
+
+	if (!req)
+		return;
+
+	dev = req->ring->dev;
+	if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex))
+		mutex_unlock(&dev->struct_mutex);
+}
+
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 					   struct drm_i915_gem_request *src)
 {
@@ -2167,12 +2179,13 @@
 	struct {
 		spinlock_t lock;
 		struct list_head request_list;
-		struct delayed_work idle_work;
 	} mm;
 	struct idr context_idr;
 
-	atomic_t rps_wait_boost;
-	struct  intel_engine_cs *bsd_ring;
+	struct list_head rps_boost;
+	struct intel_engine_cs *bsd_ring;
+
+	unsigned rps_boosts;
 };
 
 /*
@@ -2524,6 +2537,13 @@
 				enum forcewake_domains domains);
 void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
 				enum forcewake_domains domains);
+/* Like above but the caller must manage the uncore.lock itself.
+ * Must be used with I915_READ_FW and friends.
+ */
+void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains domains);
+void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains domains);
 void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
 static inline bool intel_vgpu_active(struct drm_device *dev)
 {
@@ -2645,15 +2665,32 @@
 				    int *needs_clflush);
 
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
+
+static inline int __sg_page_count(struct scatterlist *sg)
 {
-	struct sg_page_iter sg_iter;
-
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, n)
-		return sg_page_iter_page(&sg_iter);
-
-	return NULL;
+	return sg->length >> PAGE_SHIFT;
 }
+
+static inline struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
+{
+	if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
+		return NULL;
+
+	if (n < obj->get_page.last) {
+		obj->get_page.sg = obj->pages->sgl;
+		obj->get_page.last = 0;
+	}
+
+	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
+		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
+		if (unlikely(sg_is_chain(obj->get_page.sg)))
+			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
+	}
+
+	return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
+}
+
 static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
 	BUG_ON(obj->pages == NULL);
@@ -2997,8 +3034,10 @@
 int i915_debugfs_init(struct drm_minor *minor);
 void i915_debugfs_cleanup(struct drm_minor *minor);
 #ifdef CONFIG_DEBUG_FS
+int i915_debugfs_connector_add(struct drm_connector *connector);
 void intel_display_crc_init(struct drm_device *dev);
 #else
+static inline int i915_debugfs_connector_add(struct drm_connector *connector) {}
 static inline void intel_display_crc_init(struct drm_device *dev) {}
 #endif
 
@@ -3025,13 +3064,6 @@
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
-/* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
-			      struct i915_gem_batch_pool *pool);
-void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
-i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
-
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
@@ -3055,13 +3087,11 @@
 /* intel_i2c.c */
 extern int intel_setup_gmbus(struct drm_device *dev);
 extern void intel_teardown_gmbus(struct drm_device *dev);
-static inline bool intel_gmbus_is_port_valid(unsigned port)
-{
-	return (port >= GMBUS_PORT_SSC && port <= GMBUS_PORT_DPD);
-}
+extern bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
+				     unsigned int pin);
 
-extern struct i2c_adapter *intel_gmbus_get_adapter(
-		struct drm_i915_private *dev_priv, unsigned port);
+extern struct i2c_adapter *
+intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin);
 extern void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed);
 extern void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit);
 static inline bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter)
@@ -3207,6 +3237,17 @@
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
+/* These are untraced mmio-accessors that are only valid to be used inside
+ * criticial sections inside IRQ handlers where forcewake is explicitly
+ * controlled.
+ * Think twice, and think again, before using these.
+ * Note: Should only be used between intel_uncore_forcewake_irqlock() and
+ * intel_uncore_forcewake_irqunlock().
+ */
+#define I915_READ_FW(reg__) readl(dev_priv->regs + (reg__))
+#define I915_WRITE_FW(reg__, val__) writel(val__, dev_priv->regs + (reg__))
+#define POSTING_READ_FW(reg__) (void)I915_READ_FW(reg__)
+
 /* "Broadcast RGB" property */
 #define INTEL_BROADCAST_RGB_AUTO 0
 #define INTEL_BROADCAST_RGB_FULL 1
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d07c0b1..89d9ebe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -378,13 +378,13 @@
 void *i915_gem_object_alloc(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
+	return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
 }
 
 void i915_gem_object_free(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	kmem_cache_free(dev_priv->slab, obj);
+	kmem_cache_free(dev_priv->objects, obj);
 }
 
 static int
@@ -1181,12 +1181,27 @@
 	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
 
-static bool can_wait_boost(struct drm_i915_file_private *file_priv)
+static int __i915_spin_request(struct drm_i915_gem_request *rq)
 {
-	if (file_priv == NULL)
-		return true;
+	unsigned long timeout;
 
-	return !atomic_xchg(&file_priv->rps_wait_boost, true);
+	if (i915_gem_request_get_ring(rq)->irq_refcount)
+		return -EBUSY;
+
+	timeout = jiffies + 1;
+	while (!need_resched()) {
+		if (i915_gem_request_completed(rq, true))
+			return 0;
+
+		if (time_after_eq(jiffies, timeout))
+			break;
+
+		cpu_relax_lowlatency();
+	}
+	if (i915_gem_request_completed(rq, false))
+		return 0;
+
+	return -EAGAIN;
 }
 
 /**
@@ -1230,20 +1245,23 @@
 	timeout_expire = timeout ?
 		jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
 
-	if (INTEL_INFO(dev)->gen >= 6 && ring->id == RCS && can_wait_boost(file_priv)) {
-		gen6_rps_boost(dev_priv);
-		if (file_priv)
-			mod_delayed_work(dev_priv->wq,
-					 &file_priv->mm.idle_work,
-					 msecs_to_jiffies(100));
-	}
-
-	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
-		return -ENODEV;
+	if (INTEL_INFO(dev)->gen >= 6)
+		gen6_rps_boost(dev_priv, file_priv);
 
 	/* Record current time in case interrupted by signal, or wedged */
 	trace_i915_gem_request_wait_begin(req);
 	before = ktime_get_raw_ns();
+
+	/* Optimistic spin for the next jiffie before touching IRQs */
+	ret = __i915_spin_request(req);
+	if (ret == 0)
+		goto out;
+
+	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
+		ret = -ENODEV;
+		goto out;
+	}
+
 	for (;;) {
 		struct timer_list timer;
 
@@ -1292,14 +1310,15 @@
 			destroy_timer_on_stack(&timer);
 		}
 	}
-	now = ktime_get_raw_ns();
-	trace_i915_gem_request_wait_end(req);
-
 	if (!irq_test_in_progress)
 		ring->irq_put(ring);
 
 	finish_wait(&ring->irq_queue, &wait);
 
+out:
+	now = ktime_get_raw_ns();
+	trace_i915_gem_request_wait_end(req);
+
 	if (timeout) {
 		s64 tres = *timeout - (now - before);
 
@@ -2178,6 +2197,10 @@
 		return ret;
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+
+	obj->get_page.sg = obj->pages->sgl;
+	obj->get_page.last = 0;
+
 	return 0;
 }
 
@@ -2420,7 +2443,6 @@
 
 	i915_queue_hangcheck(ring->dev);
 
-	cancel_delayed_work_sync(&dev_priv->mm.idle_work);
 	queue_delayed_work(dev_priv->wq,
 			   &dev_priv->mm.retire_work,
 			   round_jiffies_up_relative(HZ));
@@ -2515,7 +2537,45 @@
 		i915_gem_context_unreference(ctx);
 	}
 
-	kfree(req);
+	kmem_cache_free(req->i915->requests, req);
+}
+
+int i915_gem_request_alloc(struct intel_engine_cs *ring,
+			   struct intel_context *ctx)
+{
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	struct drm_i915_gem_request *rq;
+	int ret;
+
+	if (ring->outstanding_lazy_request)
+		return 0;
+
+	rq = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+	if (rq == NULL)
+		return -ENOMEM;
+
+	kref_init(&rq->ref);
+	rq->i915 = dev_priv;
+
+	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
+	if (ret) {
+		kfree(rq);
+		return ret;
+	}
+
+	rq->ring = ring;
+
+	if (i915.enable_execlists)
+		ret = intel_logical_ring_alloc_request_extras(rq, ctx);
+	else
+		ret = intel_ring_alloc_request_extras(rq);
+	if (ret) {
+		kfree(rq);
+		return ret;
+	}
+
+	ring->outstanding_lazy_request = rq;
+	return 0;
 }
 
 struct drm_i915_gem_request *
@@ -2577,7 +2637,6 @@
 				struct drm_i915_gem_request,
 				execlist_link);
 		list_del(&submit_req->execlist_link);
-		intel_runtime_pm_put(dev_priv);
 
 		if (submit_req->ctx != ring->default_context)
 			intel_lr_context_unpin(ring, submit_req->ctx);
@@ -2767,8 +2826,25 @@
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), mm.idle_work.work);
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_engine_cs *ring;
+	int i;
 
-	intel_mark_idle(dev_priv->dev);
+	for_each_ring(ring, dev_priv, i)
+		if (!list_empty(&ring->request_list))
+			return;
+
+	intel_mark_idle(dev);
+
+	if (mutex_trylock(&dev->struct_mutex)) {
+		struct intel_engine_cs *ring;
+		int i;
+
+		for_each_ring(ring, dev_priv, i)
+			i915_gem_batch_pool_fini(&ring->batch_pool);
+
+		mutex_unlock(&dev->struct_mutex);
+	}
 }
 
 /**
@@ -2866,9 +2942,7 @@
 	ret = __i915_wait_request(req, reset_counter, true,
 				  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
 				  file->driver_priv);
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_request_unreference(req);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_request_unreference__unlocked(req);
 	return ret;
 
 out:
@@ -3514,17 +3588,6 @@
 	if (ret)
 		goto err_remove_node;
 
-	/*  allocate before insert / bind */
-	if (vma->vm->allocate_va_range) {
-		trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size,
-				VM_TO_TRACE_NAME(vma->vm));
-		ret = vma->vm->allocate_va_range(vma->vm,
-						vma->node.start,
-						vma->node.size);
-		if (ret)
-			goto err_remove_node;
-	}
-
 	trace_i915_vma_bind(vma, flags);
 	ret = i915_vma_bind(vma, obj->cache_level,
 			    flags & PIN_GLOBAL ? GLOBAL_BIND : 0);
@@ -4071,9 +4134,7 @@
 	if (ret == 0)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_request_unreference(target);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_request_unreference__unlocked(target);
 
 	return ret;
 }
@@ -4194,9 +4255,6 @@
 	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
 
 	vma->pin_count++;
-	if (flags & PIN_MAPPABLE)
-		obj->pin_mappable |= true;
-
 	return 0;
 }
 
@@ -4234,8 +4292,7 @@
 	WARN_ON(vma->pin_count == 0);
 	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
 
-	if (--vma->pin_count == 0 && view->type == I915_GGTT_VIEW_NORMAL)
-		obj->pin_mappable = false;
+	--vma->pin_count;
 }
 
 bool
@@ -4374,7 +4431,7 @@
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
-	INIT_LIST_HEAD(&obj->batch_pool_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
@@ -4576,7 +4633,7 @@
 
 	list_del(&vma->vma_link);
 
-	kfree(vma);
+	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
 
 static void
@@ -4863,12 +4920,12 @@
 	}
 
 	if (!i915.enable_execlists) {
-		dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission;
+		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
 		dev_priv->gt.init_rings = i915_gem_init_rings;
 		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
 		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
 	} else {
-		dev_priv->gt.do_execbuf = intel_execlists_submission;
+		dev_priv->gt.execbuf_submit = intel_execlists_submission;
 		dev_priv->gt.init_rings = intel_logical_rings_init;
 		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
 		dev_priv->gt.stop_ring = intel_logical_ring_stop;
@@ -4950,11 +5007,21 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i;
 
-	dev_priv->slab =
+	dev_priv->objects =
 		kmem_cache_create("i915_gem_object",
 				  sizeof(struct drm_i915_gem_object), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
+	dev_priv->vmas =
+		kmem_cache_create("i915_gem_vma",
+				  sizeof(struct i915_vma), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
+	dev_priv->requests =
+		kmem_cache_create("i915_gem_request",
+				  sizeof(struct drm_i915_gem_request), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->vm_list);
 	i915_init_vm(dev_priv, &dev_priv->gtt.base);
@@ -4997,8 +5064,6 @@
 
 	i915_gem_shrinker_init(dev_priv);
 
-	i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
-
 	mutex_init(&dev_priv->fb_tracking.lock);
 }
 
@@ -5006,8 +5071,6 @@
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	cancel_delayed_work_sync(&file_priv->mm.idle_work);
-
 	/* Clean up our request list when the client is going away, so that
 	 * later retire_requests won't dereference our soon-to-be-gone
 	 * file_priv.
@@ -5023,15 +5086,12 @@
 		request->file_priv = NULL;
 	}
 	spin_unlock(&file_priv->mm.lock);
-}
 
-static void
-i915_gem_file_idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_file_private *file_priv =
-		container_of(work, typeof(*file_priv), mm.idle_work.work);
-
-	atomic_set(&file_priv->rps_wait_boost, false);
+	if (!list_empty(&file_priv->rps_boost)) {
+		mutex_lock(&to_i915(dev)->rps.hw_lock);
+		list_del(&file_priv->rps_boost);
+		mutex_unlock(&to_i915(dev)->rps.hw_lock);
+	}
 }
 
 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
@@ -5048,11 +5108,10 @@
 	file->driver_priv = file_priv;
 	file_priv->dev_priv = dev->dev_private;
 	file_priv->file = file;
+	INIT_LIST_HEAD(&file_priv->rps_boost);
 
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
-	INIT_DELAYED_WORK(&file_priv->mm.idle_work,
-			  i915_gem_file_idle_work_handler);
 
 	ret = i915_gem_context_open(dev, file);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index c690170..7bf2f3f 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_gem_batch_pool.h"
 
 /**
  * DOC: batch pool
@@ -46,8 +47,12 @@
 void i915_gem_batch_pool_init(struct drm_device *dev,
 			      struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	pool->dev = dev;
-	INIT_LIST_HEAD(&pool->cache_list);
+
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+		INIT_LIST_HEAD(&pool->cache_list[n]);
 }
 
 /**
@@ -58,33 +63,35 @@
  */
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	while (!list_empty(&pool->cache_list)) {
-		struct drm_i915_gem_object *obj =
-			list_first_entry(&pool->cache_list,
-					 struct drm_i915_gem_object,
-					 batch_pool_list);
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		while (!list_empty(&pool->cache_list[n])) {
+			struct drm_i915_gem_object *obj =
+				list_first_entry(&pool->cache_list[n],
+						 struct drm_i915_gem_object,
+						 batch_pool_link);
 
-		WARN_ON(obj->active);
-
-		list_del_init(&obj->batch_pool_list);
-		drm_gem_object_unreference(&obj->base);
+			list_del(&obj->batch_pool_link);
+			drm_gem_object_unreference(&obj->base);
+		}
 	}
 }
 
 /**
- * i915_gem_batch_pool_get() - select a buffer from the pool
+ * i915_gem_batch_pool_get() - allocate a buffer from the pool
  * @pool: the batch buffer pool
  * @size: the minimum desired size of the returned buffer
  *
- * Finds or allocates a batch buffer in the pool with at least the requested
- * size. The caller is responsible for any domain, active/inactive, or
- * purgeability management for the returned buffer.
+ * Returns an inactive buffer from @pool with at least @size bytes,
+ * with the pages pinned. The caller must i915_gem_object_unpin_pages()
+ * on the returned object.
  *
  * Note: Callers must hold the struct_mutex
  *
- * Return: the selected batch buffer object
+ * Return: the buffer object or an error pointer
  */
 struct drm_i915_gem_object *
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
@@ -92,46 +99,53 @@
 {
 	struct drm_i915_gem_object *obj = NULL;
 	struct drm_i915_gem_object *tmp, *next;
+	struct list_head *list;
+	int n;
 
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	list_for_each_entry_safe(tmp, next,
-			&pool->cache_list, batch_pool_list) {
+	/* Compute a power-of-two bucket, but throw everything greater than
+	 * 16KiB into the same bucket: i.e. the the buckets hold objects of
+	 * (1 page, 2 pages, 4 pages, 8+ pages).
+	 */
+	n = fls(size >> PAGE_SHIFT) - 1;
+	if (n >= ARRAY_SIZE(pool->cache_list))
+		n = ARRAY_SIZE(pool->cache_list) - 1;
+	list = &pool->cache_list[n];
 
+	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
+		/* The batches are strictly LRU ordered */
 		if (tmp->active)
-			continue;
+			break;
 
 		/* While we're looping, do some clean up */
 		if (tmp->madv == __I915_MADV_PURGED) {
-			list_del(&tmp->batch_pool_list);
+			list_del(&tmp->batch_pool_link);
 			drm_gem_object_unreference(&tmp->base);
 			continue;
 		}
 
-		/*
-		 * Select a buffer that is at least as big as needed
-		 * but not 'too much' bigger. A better way to do this
-		 * might be to bucket the pool objects based on size.
-		 */
-		if (tmp->base.size >= size &&
-		    tmp->base.size <= (2 * size)) {
+		if (tmp->base.size >= size) {
 			obj = tmp;
 			break;
 		}
 	}
 
-	if (!obj) {
+	if (obj == NULL) {
+		int ret;
+
 		obj = i915_gem_alloc_object(pool->dev, size);
-		if (!obj)
+		if (obj == NULL)
 			return ERR_PTR(-ENOMEM);
 
-		list_add_tail(&obj->batch_pool_list, &pool->cache_list);
+		ret = i915_gem_object_get_pages(obj);
+		if (ret)
+			return ERR_PTR(ret);
+
+		obj->madv = I915_MADV_DONTNEED;
 	}
-	else
-		/* Keep list in LRU order */
-		list_move_tail(&obj->batch_pool_list, &pool->cache_list);
 
-	obj->madv = I915_MADV_WILLNEED;
-
+	list_move_tail(&obj->batch_pool_link, list);
+	i915_gem_object_pin_pages(obj);
 	return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
new file mode 100644
index 0000000..848e907
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_GEM_BATCH_POOL_H
+#define I915_GEM_BATCH_POOL_H
+
+#include "i915_drv.h"
+
+struct i915_gem_batch_pool {
+	struct drm_device *dev;
+	struct list_head cache_list[4];
+};
+
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+			      struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+
+#endif /* I915_GEM_BATCH_POOL_H */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f3e84c4..e4c57a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -157,7 +157,9 @@
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	obj = i915_gem_alloc_object(dev, size);
+	obj = i915_gem_object_create_stolen(dev, size);
+	if (obj == NULL)
+		obj = i915_gem_alloc_object(dev, size);
 	if (obj == NULL)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..48ac560 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,7 +37,6 @@
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
-#define  __EXEC_OBJECT_PURGEABLE (1<<27)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -224,12 +223,7 @@
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 		vma->pin_count--;
 
-	if (entry->flags & __EXEC_OBJECT_PURGEABLE)
-		obj->madv = I915_MADV_DONTNEED;
-
-	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
-			  __EXEC_OBJECT_HAS_PIN |
-			  __EXEC_OBJECT_PURGEABLE);
+	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
 
 static void eb_destroy(struct eb_vmas *eb)
@@ -1142,12 +1136,11 @@
 			  u32 batch_len,
 			  bool is_master)
 {
-	struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
 	struct drm_i915_gem_object *shadow_batch_obj;
 	struct i915_vma *vma;
 	int ret;
 
-	shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
 						   PAGE_ALIGN(batch_len));
 	if (IS_ERR(shadow_batch_obj))
 		return shadow_batch_obj;
@@ -1165,11 +1158,13 @@
 	if (ret)
 		goto err;
 
+	i915_gem_object_unpin_pages(shadow_batch_obj);
+
 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
 
 	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
 	vma->exec_entry = shadow_exec_entry;
-	vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE | __EXEC_OBJECT_HAS_PIN;
+	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
 	drm_gem_object_reference(&shadow_batch_obj->base);
 	list_add_tail(&vma->exec_list, &eb->vmas);
 
@@ -1178,6 +1173,7 @@
 	return shadow_batch_obj;
 
 err:
+	i915_gem_object_unpin_pages(shadow_batch_obj);
 	if (ret == -EACCES) /* unhandled chained batch */
 		return batch_obj;
 	else
@@ -1601,9 +1597,9 @@
 	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
-	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
-				      &eb->vmas, batch_obj, exec_start,
-				      dispatch_flags);
+	ret = dev_priv->gt.execbuf_submit(dev, file, ring, ctx, args,
+					  &eb->vmas, batch_obj, exec_start,
+					  dispatch_flags);
 
 	/*
 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f48d845..9041f3d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -320,7 +320,7 @@
 	return 0;
 }
 
-static void unmap_and_free_pt(struct i915_page_table_entry *pt,
+static void unmap_and_free_pt(struct i915_page_table *pt,
 			       struct drm_device *dev)
 {
 	if (WARN_ON(!pt->page))
@@ -332,9 +332,27 @@
 	kfree(pt);
 }
 
-static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev)
+static void gen8_initialize_pt(struct i915_address_space *vm,
+			       struct i915_page_table *pt)
 {
-	struct i915_page_table_entry *pt;
+	gen8_pte_t *pt_vaddr, scratch_pte;
+	int i;
+
+	pt_vaddr = kmap_atomic(pt->page);
+	scratch_pte = gen8_pte_encode(vm->scratch.addr,
+				      I915_CACHE_LLC, true);
+
+	for (i = 0; i < GEN8_PTES; i++)
+		pt_vaddr[i] = scratch_pte;
+
+	if (!HAS_LLC(vm->dev))
+		drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
+	kunmap_atomic(pt_vaddr);
+}
+
+static struct i915_page_table *alloc_pt_single(struct drm_device *dev)
+{
+	struct i915_page_table *pt;
 	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
 		GEN8_PTES : GEN6_PTES;
 	int ret = -ENOMEM;
@@ -383,7 +401,7 @@
  *
  * Return: 0 if allocation succeeded.
  */
-static int alloc_pt_range(struct i915_page_directory_entry *pd, uint16_t pde, size_t count,
+static int alloc_pt_range(struct i915_page_directory *pd, uint16_t pde, size_t count,
 			  struct drm_device *dev)
 {
 	int i, ret;
@@ -393,7 +411,7 @@
 		return -EINVAL;
 
 	for (i = pde; i < pde + count; i++) {
-		struct i915_page_table_entry *pt = alloc_pt_single(dev);
+		struct i915_page_table *pt = alloc_pt_single(dev);
 
 		if (IS_ERR(pt)) {
 			ret = PTR_ERR(pt);
@@ -413,34 +431,55 @@
 	return ret;
 }
 
-static void unmap_and_free_pd(struct i915_page_directory_entry *pd)
+static void unmap_and_free_pd(struct i915_page_directory *pd,
+			      struct drm_device *dev)
 {
 	if (pd->page) {
+		i915_dma_unmap_single(pd, dev);
 		__free_page(pd->page);
+		kfree(pd->used_pdes);
 		kfree(pd);
 	}
 }
 
-static struct i915_page_directory_entry *alloc_pd_single(void)
+static struct i915_page_directory *alloc_pd_single(struct drm_device *dev)
 {
-	struct i915_page_directory_entry *pd;
+	struct i915_page_directory *pd;
+	int ret = -ENOMEM;
 
 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 	if (!pd)
 		return ERR_PTR(-ENOMEM);
 
-	pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!pd->page) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
+	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
+				sizeof(*pd->used_pdes), GFP_KERNEL);
+	if (!pd->used_pdes)
+		goto free_pd;
+
+	pd->page = alloc_page(GFP_KERNEL);
+	if (!pd->page)
+		goto free_bitmap;
+
+	ret = i915_dma_map_single(pd, dev);
+	if (ret)
+		goto free_page;
 
 	return pd;
+
+free_page:
+	__free_page(pd->page);
+free_bitmap:
+	kfree(pd->used_pdes);
+free_pd:
+	kfree(pd);
+
+	return ERR_PTR(ret);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
-static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
-			   uint64_t val)
+static int gen8_write_pdp(struct intel_engine_cs *ring,
+			  unsigned entry,
+			  dma_addr_t addr)
 {
 	int ret;
 
@@ -452,10 +491,10 @@
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
-	intel_ring_emit(ring, (u32)(val >> 32));
+	intel_ring_emit(ring, upper_32_bits(addr));
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
-	intel_ring_emit(ring, (u32)(val));
+	intel_ring_emit(ring, lower_32_bits(addr));
 	intel_ring_advance(ring);
 
 	return 0;
@@ -466,12 +505,12 @@
 {
 	int i, ret;
 
-	/* bit of a hack to find the actual last used pd */
-	int used_pd = ppgtt->num_pd_entries / I915_PDES;
-
-	for (i = used_pd - 1; i >= 0; i--) {
-		dma_addr_t addr = ppgtt->pdp.page_directory[i]->daddr;
-		ret = gen8_write_pdp(ring, i, addr);
+	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
+		struct i915_page_directory *pd = ppgtt->pdp.page_directory[i];
+		dma_addr_t pd_daddr = pd ? pd->daddr : ppgtt->scratch_pd->daddr;
+		/* The page directory might be NULL, but we need to clear out
+		 * whatever the previous context might have used. */
+		ret = gen8_write_pdp(ring, i, pd_daddr);
 		if (ret)
 			return ret;
 	}
@@ -497,8 +536,8 @@
 				      I915_CACHE_LLC, use_scratch);
 
 	while (num_entries) {
-		struct i915_page_directory_entry *pd;
-		struct i915_page_table_entry *pt;
+		struct i915_page_directory *pd;
+		struct i915_page_table *pt;
 		struct page *page_table;
 
 		if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
@@ -559,8 +598,8 @@
 			break;
 
 		if (pt_vaddr == NULL) {
-			struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[pdpe];
-			struct i915_page_table_entry *pt = pd->page_table[pde];
+			struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe];
+			struct i915_page_table *pt = pd->page_table[pde];
 			struct page *page_table = pt->page;
 
 			pt_vaddr = kmap_atomic(page_table);
@@ -588,14 +627,43 @@
 	}
 }
 
-static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct drm_device *dev)
+static void __gen8_do_map_pt(gen8_pde_t * const pde,
+			     struct i915_page_table *pt,
+			     struct drm_device *dev)
+{
+	gen8_pde_t entry =
+		gen8_pde_encode(dev, pt->daddr, I915_CACHE_LLC);
+	*pde = entry;
+}
+
+static void gen8_initialize_pd(struct i915_address_space *vm,
+			       struct i915_page_directory *pd)
+{
+	struct i915_hw_ppgtt *ppgtt =
+			container_of(vm, struct i915_hw_ppgtt, base);
+	gen8_pde_t *page_directory;
+	struct i915_page_table *pt;
+	int i;
+
+	page_directory = kmap_atomic(pd->page);
+	pt = ppgtt->scratch_pt;
+	for (i = 0; i < I915_PDES; i++)
+		/* Map the PDE to the page table */
+		__gen8_do_map_pt(page_directory + i, pt, vm->dev);
+
+	if (!HAS_LLC(vm->dev))
+		drm_clflush_virt_range(page_directory, PAGE_SIZE);
+	kunmap_atomic(page_directory);
+}
+
+static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev)
 {
 	int i;
 
 	if (!pd->page)
 		return;
 
-	for (i = 0; i < I915_PDES; i++) {
+	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
 		if (WARN_ON(!pd->page_table[i]))
 			continue;
 
@@ -608,45 +676,16 @@
 {
 	int i;
 
-	for (i = 0; i < ppgtt->num_pd_pages; i++) {
+	for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) {
 		if (WARN_ON(!ppgtt->pdp.page_directory[i]))
 			continue;
 
 		gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
-		unmap_and_free_pd(ppgtt->pdp.page_directory[i]);
+		unmap_and_free_pd(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
 	}
-}
 
-static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
-{
-	struct pci_dev *hwdev = ppgtt->base.dev->pdev;
-	int i, j;
-
-	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		/* TODO: In the future we'll support sparse mappings, so this
-		 * will have to change. */
-		if (!ppgtt->pdp.page_directory[i]->daddr)
-			continue;
-
-		pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i]->daddr, PAGE_SIZE,
-			       PCI_DMA_BIDIRECTIONAL);
-
-		for (j = 0; j < I915_PDES; j++) {
-			struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i];
-			struct i915_page_table_entry *pt;
-			dma_addr_t addr;
-
-			if (WARN_ON(!pd->page_table[j]))
-				continue;
-
-			pt = pd->page_table[j];
-			addr = pt->daddr;
-
-			if (addr)
-				pci_unmap_page(hwdev, addr, PAGE_SIZE,
-					       PCI_DMA_BIDIRECTIONAL);
-		}
-	}
+	unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev);
+	unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -654,230 +693,371 @@
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 
-	gen8_ppgtt_unmap_pages(ppgtt);
 	gen8_ppgtt_free(ppgtt);
 }
 
-static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
+/**
+ * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
+ * @ppgtt:	Master ppgtt structure.
+ * @pd:		Page directory for this address range.
+ * @start:	Starting virtual address to begin allocations.
+ * @length	Size of the allocations.
+ * @new_pts:	Bitmap set by function with new allocations. Likely used by the
+ *		caller to free on error.
+ *
+ * Allocate the required number of page tables. Extremely similar to
+ * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
+ * the page directory boundary (instead of the page directory pointer). That
+ * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
+ * possible, and likely that the caller will need to use multiple calls of this
+ * function to achieve the appropriate allocation.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
+				     struct i915_page_directory *pd,
+				     uint64_t start,
+				     uint64_t length,
+				     unsigned long *new_pts)
 {
-	int i, ret;
+	struct drm_device *dev = ppgtt->base.dev;
+	struct i915_page_table *pt;
+	uint64_t temp;
+	uint32_t pde;
 
-	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		ret = alloc_pt_range(ppgtt->pdp.page_directory[i],
-				     0, I915_PDES, ppgtt->base.dev);
-		if (ret)
+	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
+		/* Don't reallocate page tables */
+		if (pt) {
+			/* Scratch is never allocated this way */
+			WARN_ON(pt == ppgtt->scratch_pt);
+			continue;
+		}
+
+		pt = alloc_pt_single(dev);
+		if (IS_ERR(pt))
 			goto unwind_out;
+
+		gen8_initialize_pt(&ppgtt->base, pt);
+		pd->page_table[pde] = pt;
+		set_bit(pde, new_pts);
 	}
 
 	return 0;
 
 unwind_out:
-	while (i--)
-		gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
+	for_each_set_bit(pde, new_pts, I915_PDES)
+		unmap_and_free_pt(pd->page_table[pde], dev);
 
 	return -ENOMEM;
 }
 
-static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
-						const int max_pdp)
+/**
+ * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
+ * @ppgtt:	Master ppgtt structure.
+ * @pdp:	Page directory pointer for this address range.
+ * @start:	Starting virtual address to begin allocations.
+ * @length	Size of the allocations.
+ * @new_pds	Bitmap set by function with new allocations. Likely used by the
+ *		caller to free on error.
+ *
+ * Allocate the required number of page directories starting at the pde index of
+ * @start, and ending at the pde index @start + @length. This function will skip
+ * over already allocated page directories within the range, and only allocate
+ * new ones, setting the appropriate pointer within the pdp as well as the
+ * correct position in the bitmap @new_pds.
+ *
+ * The function will only allocate the pages within the range for a give page
+ * directory pointer. In other words, if @start + @length straddles a virtually
+ * addressed PDP boundary (512GB for 4k pages), there will be more allocations
+ * required by the caller, This is not currently possible, and the BUG in the
+ * code will prevent it.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
+				     struct i915_page_directory_pointer *pdp,
+				     uint64_t start,
+				     uint64_t length,
+				     unsigned long *new_pds)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct i915_page_directory *pd;
+	uint64_t temp;
+	uint32_t pdpe;
+
+	WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
+
+	/* FIXME: PPGTT container_of won't work for 64b */
+	WARN_ON((start + length) > 0x800000000ULL);
+
+	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+		if (pd)
+			continue;
+
+		pd = alloc_pd_single(dev);
+		if (IS_ERR(pd))
+			goto unwind_out;
+
+		gen8_initialize_pd(&ppgtt->base, pd);
+		pdp->page_directory[pdpe] = pd;
+		set_bit(pdpe, new_pds);
+	}
+
+	return 0;
+
+unwind_out:
+	for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
+		unmap_and_free_pd(pdp->page_directory[pdpe], dev);
+
+	return -ENOMEM;
+}
+
+static void
+free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
 {
 	int i;
 
-	for (i = 0; i < max_pdp; i++) {
-		ppgtt->pdp.page_directory[i] = alloc_pd_single();
-		if (IS_ERR(ppgtt->pdp.page_directory[i]))
-			goto unwind_out;
-	}
-
-	ppgtt->num_pd_pages = max_pdp;
-	BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPES);
-
-	return 0;
-
-unwind_out:
-	while (i--)
-		unmap_and_free_pd(ppgtt->pdp.page_directory[i]);
-
-	return -ENOMEM;
+	for (i = 0; i < GEN8_LEGACY_PDPES; i++)
+		kfree(new_pts[i]);
+	kfree(new_pts);
+	kfree(new_pds);
 }
 
-static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
-			    const int max_pdp)
+/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
+ * of these are based on the number of PDPEs in the system.
+ */
+static
+int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
+					 unsigned long ***new_pts)
 {
-	int ret;
+	int i;
+	unsigned long *pds;
+	unsigned long **pts;
 
-	ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
-	if (ret)
-		return ret;
+	pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
+	if (!pds)
+		return -ENOMEM;
 
-	ret = gen8_ppgtt_allocate_page_tables(ppgtt);
-	if (ret)
-		goto err_out;
+	pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL);
+	if (!pts) {
+		kfree(pds);
+		return -ENOMEM;
+	}
 
-	ppgtt->num_pd_entries = max_pdp * I915_PDES;
+	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
+		pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
+				 sizeof(unsigned long), GFP_KERNEL);
+		if (!pts[i])
+			goto err_out;
+	}
+
+	*new_pds = pds;
+	*new_pts = pts;
 
 	return 0;
 
 err_out:
-	gen8_ppgtt_free(ppgtt);
+	free_gen8_temp_bitmaps(pds, pts);
+	return -ENOMEM;
+}
+
+static int gen8_alloc_va_range(struct i915_address_space *vm,
+			       uint64_t start,
+			       uint64_t length)
+{
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
+	unsigned long *new_page_dirs, **new_page_tables;
+	struct i915_page_directory *pd;
+	const uint64_t orig_start = start;
+	const uint64_t orig_length = length;
+	uint64_t temp;
+	uint32_t pdpe;
+	int ret;
+
+#ifndef CONFIG_64BIT
+	/* Disallow 64b address on 32b platforms. Nothing is wrong with doing
+	 * this in hardware, but a lot of the drm code is not prepared to handle
+	 * 64b offset on 32b platforms.
+	 * This will be addressed when 48b PPGTT is added */
+	if (start + length > 0x100000000ULL)
+		return -E2BIG;
+#endif
+
+	/* Wrap is never okay since we can only represent 48b, and we don't
+	 * actually use the other side of the canonical address space.
+	 */
+	if (WARN_ON(start + length < start))
+		return -ERANGE;
+
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
+	if (ret)
+		return ret;
+
+	/* Do the allocations first so we can easily bail out */
+	ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
+					new_page_dirs);
+	if (ret) {
+		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+		return ret;
+	}
+
+	/* For every page directory referenced, allocate page tables */
+	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+		ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
+						new_page_tables[pdpe]);
+		if (ret)
+			goto err_out;
+	}
+
+	start = orig_start;
+	length = orig_length;
+
+	/* Allocations have completed successfully, so set the bitmaps, and do
+	 * the mappings. */
+	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+		gen8_pde_t *const page_directory = kmap_atomic(pd->page);
+		struct i915_page_table *pt;
+		uint64_t pd_len = gen8_clamp_pd(start, length);
+		uint64_t pd_start = start;
+		uint32_t pde;
+
+		/* Every pd should be allocated, we just did that above. */
+		WARN_ON(!pd);
+
+		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+			/* Same reasoning as pd */
+			WARN_ON(!pt);
+			WARN_ON(!pd_len);
+			WARN_ON(!gen8_pte_count(pd_start, pd_len));
+
+			/* Set our used ptes within the page table */
+			bitmap_set(pt->used_ptes,
+				   gen8_pte_index(pd_start),
+				   gen8_pte_count(pd_start, pd_len));
+
+			/* Our pde is now pointing to the pagetable, pt */
+			set_bit(pde, pd->used_pdes);
+
+			/* Map the PDE to the page table */
+			__gen8_do_map_pt(page_directory + pde, pt, vm->dev);
+
+			/* NB: We haven't yet mapped ptes to pages. At this
+			 * point we're still relying on insert_entries() */
+		}
+
+		if (!HAS_LLC(vm->dev))
+			drm_clflush_virt_range(page_directory, PAGE_SIZE);
+
+		kunmap_atomic(page_directory);
+
+		set_bit(pdpe, ppgtt->pdp.used_pdpes);
+	}
+
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	return 0;
+
+err_out:
+	while (pdpe--) {
+		for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
+			unmap_and_free_pt(ppgtt->pdp.page_directory[pdpe]->page_table[temp], vm->dev);
+	}
+
+	for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
+		unmap_and_free_pd(ppgtt->pdp.page_directory[pdpe], vm->dev);
+
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
 	return ret;
 }
 
-static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
-					     const int pd)
-{
-	dma_addr_t pd_addr;
-	int ret;
-
-	pd_addr = pci_map_page(ppgtt->base.dev->pdev,
-			       ppgtt->pdp.page_directory[pd]->page, 0,
-			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-
-	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
-	if (ret)
-		return ret;
-
-	ppgtt->pdp.page_directory[pd]->daddr = pd_addr;
-
-	return 0;
-}
-
-static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
-					const int pd,
-					const int pt)
-{
-	dma_addr_t pt_addr;
-	struct i915_page_directory_entry *pdir = ppgtt->pdp.page_directory[pd];
-	struct i915_page_table_entry *ptab = pdir->page_table[pt];
-	struct page *p = ptab->page;
-	int ret;
-
-	pt_addr = pci_map_page(ppgtt->base.dev->pdev,
-			       p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
-	if (ret)
-		return ret;
-
-	ptab->daddr = pt_addr;
-
-	return 0;
-}
-
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
  * space.
  *
- * FIXME: split allocation into smaller pieces. For now we only ever do this
- * once, but with full PPGTT, the multiple contiguous allocations will be bad.
- * TODO: Do something with the size parameter
  */
-static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
+static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 {
-	const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
-	const int min_pt_pages = I915_PDES * max_pdp;
-	int i, j, ret;
+	ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
+	if (IS_ERR(ppgtt->scratch_pt))
+		return PTR_ERR(ppgtt->scratch_pt);
 
-	if (size % (1<<30))
-		DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
+	ppgtt->scratch_pd = alloc_pd_single(ppgtt->base.dev);
+	if (IS_ERR(ppgtt->scratch_pd))
+		return PTR_ERR(ppgtt->scratch_pd);
 
-	/* 1. Do all our allocations for page directories and page tables.
-	 * We allocate more than was asked so that we can point the unused parts
-	 * to valid entries that point to scratch page. Dynamic page tables
-	 * will fix this eventually.
-	 */
-	ret = gen8_ppgtt_alloc(ppgtt, GEN8_LEGACY_PDPES);
+	gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
+	gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd);
+
+	ppgtt->base.start = 0;
+	ppgtt->base.total = size;
+	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
+	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
+
+	ppgtt->switch_mm = gen8_mm_switch;
+
+	return 0;
+}
+
+static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint64_t start = 0, size = dev_priv->gtt.base.total;
+	int ret;
+
+	ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total);
 	if (ret)
 		return ret;
 
-	/*
-	 * 2. Create DMA mappings for the page directories and page tables.
-	 */
-	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
-		ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
-		if (ret)
-			goto bail;
-
-		for (j = 0; j < I915_PDES; j++) {
-			ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
-			if (ret)
-				goto bail;
-		}
+	/* Aliasing PPGTT has to always work and be mapped because of the way we
+	 * use RESTORE_INHIBIT in the context switch. This will be fixed
+	 * eventually. */
+	ret = gen8_alloc_va_range(&ppgtt->base, start, size);
+	if (ret) {
+		unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev);
+		unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
+		return ret;
 	}
 
-	/*
-	 * 3. Map all the page directory entires to point to the page tables
-	 * we've allocated.
-	 *
-	 * For now, the PPGTT helper functions all require that the PDEs are
-	 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
-	 * will never need to touch the PDEs again.
-	 */
-	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
-		struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i];
-		gen8_pde_t *pd_vaddr;
-		pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i]->page);
-		for (j = 0; j < I915_PDES; j++) {
-			struct i915_page_table_entry *pt = pd->page_table[j];
-			dma_addr_t addr = pt->daddr;
-			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
-						      I915_CACHE_LLC);
-		}
-		if (!HAS_LLC(ppgtt->base.dev))
-			drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
-		kunmap_atomic(pd_vaddr);
-	}
-
-	ppgtt->switch_mm = gen8_mm_switch;
+	ppgtt->base.allocate_va_range = NULL;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
-	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
-	ppgtt->base.start = 0;
+	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
 
-	/* This is the area that we advertise as usable for the caller */
-	ppgtt->base.total = max_pdp * I915_PDES * GEN8_PTES * PAGE_SIZE;
-
-	/* Set all ptes to a valid scratch page. Also above requested space */
-	ppgtt->base.clear_range(&ppgtt->base, 0,
-				ppgtt->num_pd_pages * GEN8_PTES * PAGE_SIZE,
-				true);
-
-	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
-			 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
-	DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
-			 ppgtt->num_pd_entries,
-			 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
 	return 0;
+}
 
-bail:
-	gen8_ppgtt_unmap_pages(ppgtt);
-	gen8_ppgtt_free(ppgtt);
-	return ret;
+static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
+{
+	int ret;
+
+	ret = gen8_ppgtt_init_common(ppgtt, (1ULL << 32));
+	if (ret)
+		return ret;
+
+	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
+	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
+
+	return 0;
 }
 
 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 {
-	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
 	struct i915_address_space *vm = &ppgtt->base;
-	gen6_pte_t __iomem *pd_addr;
+	struct i915_page_table *unused;
 	gen6_pte_t scratch_pte;
 	uint32_t pd_entry;
-	int pte, pde;
+	uint32_t  pte, pde, temp;
+	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
 
 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 
-	pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
-		ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
-
-	seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
-		   ppgtt->pd.pd_offset,
-		   ppgtt->pd.pd_offset + ppgtt->num_pd_entries);
-	for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
+	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
 		u32 expected;
 		gen6_pte_t *pt_vaddr;
 		dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr;
-		pd_entry = readl(pd_addr + pde);
+		pd_entry = readl(ppgtt->pd_addr + pde);
 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
 
 		if (pd_entry != expected)
@@ -914,8 +1094,8 @@
 }
 
 /* Write pde (index) from the page directory @pd to the page table @pt */
-static void gen6_write_pde(struct i915_page_directory_entry *pd,
-			    const int pde, struct i915_page_table_entry *pt)
+static void gen6_write_pde(struct i915_page_directory *pd,
+			    const int pde, struct i915_page_table *pt)
 {
 	/* Caller needs to make sure the write completes if necessary */
 	struct i915_hw_ppgtt *ppgtt =
@@ -931,10 +1111,10 @@
 /* Write all the page tables found in the ppgtt structure to incrementing page
  * directories. */
 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
-				  struct i915_page_directory_entry *pd,
+				  struct i915_page_directory *pd,
 				  uint32_t start, uint32_t length)
 {
-	struct i915_page_table_entry *pt;
+	struct i915_page_table *pt;
 	uint32_t pde, temp;
 
 	gen6_for_each_pde(pt, pd, start, length, temp, pde)
@@ -1169,7 +1349,7 @@
 }
 
 static void gen6_initialize_pt(struct i915_address_space *vm,
-		struct i915_page_table_entry *pt)
+		struct i915_page_table *pt)
 {
 	gen6_pte_t *pt_vaddr, scratch_pte;
 	int i;
@@ -1195,7 +1375,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt =
 				container_of(vm, struct i915_hw_ppgtt, base);
-	struct i915_page_table_entry *pt;
+	struct i915_page_table *pt;
 	const uint32_t start_save = start, length_save = length;
 	uint32_t pde, temp;
 	int ret;
@@ -1263,7 +1443,7 @@
 
 unwind_out:
 	for_each_set_bit(pde, new_page_tables, I915_PDES) {
-		struct i915_page_table_entry *pt = ppgtt->pd.page_table[pde];
+		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
 
 		ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
 		unmap_and_free_pt(pt, vm->dev);
@@ -1275,17 +1455,16 @@
 
 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
 {
-	int i;
+	struct i915_page_table *pt;
+	uint32_t pde;
 
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		struct i915_page_table_entry *pt = ppgtt->pd.page_table[i];
-
+	gen6_for_all_pdes(pt, ppgtt, pde) {
 		if (pt != ppgtt->scratch_pt)
-			unmap_and_free_pt(ppgtt->pd.page_table[i], ppgtt->base.dev);
+			unmap_and_free_pt(pt, ppgtt->base.dev);
 	}
 
 	unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
-	unmap_and_free_pd(&ppgtt->pd);
+	unmap_and_free_pd(&ppgtt->pd, ppgtt->base.dev);
 }
 
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -1342,7 +1521,6 @@
 	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
 		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
-	ppgtt->num_pd_entries = I915_PDES;
 	return 0;
 
 err_out:
@@ -1358,7 +1536,7 @@
 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
 				  uint64_t start, uint64_t length)
 {
-	struct i915_page_table_entry *unused;
+	struct i915_page_table *unused;
 	uint32_t pde, temp;
 
 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
@@ -1390,7 +1568,7 @@
 
 	if (aliasing) {
 		/* preallocate all pts */
-		ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries,
+		ret = alloc_pt_range(&ppgtt->pd, 0, I915_PDES,
 				ppgtt->base.dev);
 
 		if (ret) {
@@ -1399,12 +1577,12 @@
 		}
 	}
 
-	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
+	ppgtt->base.allocate_va_range = aliasing ? NULL : gen6_alloc_va_range;
 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.start = 0;
-	ppgtt->base.total = ppgtt->num_pd_entries * GEN6_PTES * PAGE_SIZE;
+	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
 	ppgtt->pd.pd_offset =
@@ -1440,8 +1618,10 @@
 
 	if (INTEL_INFO(dev)->gen < 8)
 		return gen6_ppgtt_init(ppgtt, aliasing);
+	else if (aliasing)
+		return gen8_aliasing_ppgtt_init(ppgtt);
 	else
-		return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
+		return gen8_ppgtt_init(ppgtt);
 }
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
@@ -2542,7 +2722,8 @@
 
 	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
 		return ERR_PTR(-EINVAL);
-	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+
+	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -2756,8 +2937,22 @@
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		  u32 flags)
 {
+	int ret;
+
+	if (vma->vm->allocate_va_range) {
+		trace_i915_va_alloc(vma->vm, vma->node.start,
+				    vma->node.size,
+				    VM_TO_TRACE_NAME(vma->vm));
+
+		ret = vma->vm->allocate_va_range(vma->vm,
+						 vma->node.start,
+						 vma->node.size);
+		if (ret)
+			return ret;
+	}
+
 	if (i915_is_ggtt(vma->vm)) {
-		int ret = i915_get_ggtt_vma_pages(vma);
+		ret = i915_get_ggtt_vma_pages(vma);
 
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index fc03c99..29de64d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -206,26 +206,28 @@
 			 u32 flags);
 };
 
-struct i915_page_table_entry {
+struct i915_page_table {
 	struct page *page;
 	dma_addr_t daddr;
 
 	unsigned long *used_ptes;
 };
 
-struct i915_page_directory_entry {
+struct i915_page_directory {
 	struct page *page; /* NULL for GEN6-GEN7 */
 	union {
 		uint32_t pd_offset;
 		dma_addr_t daddr;
 	};
 
-	struct i915_page_table_entry *page_table[I915_PDES]; /* PDEs */
+	unsigned long *used_pdes;
+	struct i915_page_table *page_table[I915_PDES]; /* PDEs */
 };
 
-struct i915_page_directory_pointer_entry {
+struct i915_page_directory_pointer {
 	/* struct page *page; */
-	struct i915_page_directory_entry *page_directory[GEN8_LEGACY_PDPES];
+	DECLARE_BITMAP(used_pdpes, GEN8_LEGACY_PDPES);
+	struct i915_page_directory *page_directory[GEN8_LEGACY_PDPES];
 };
 
 struct i915_address_space {
@@ -314,14 +316,13 @@
 	struct kref ref;
 	struct drm_mm_node node;
 	unsigned long pd_dirty_rings;
-	unsigned num_pd_entries;
-	unsigned num_pd_pages; /* gen8+ */
 	union {
-		struct i915_page_directory_pointer_entry pdp;
-		struct i915_page_directory_entry pd;
+		struct i915_page_directory_pointer pdp;
+		struct i915_page_directory pd;
 	};
 
-	struct i915_page_table_entry *scratch_pt;
+	struct i915_page_table *scratch_pt;
+	struct i915_page_directory *scratch_pd;
 
 	struct drm_i915_file_private *file_priv;
 
@@ -349,6 +350,11 @@
 	     temp = min_t(unsigned, temp, length), \
 	     start += temp, length -= temp)
 
+#define gen6_for_all_pdes(pt, ppgtt, iter)  \
+	for (iter = 0;		\
+	     pt = ppgtt->pd.page_table[iter], iter < I915_PDES;	\
+	     iter++)
+
 static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
 {
 	const uint32_t mask = NUM_PTE(pde_shift) - 1;
@@ -397,6 +403,63 @@
 	return i915_pde_index(addr, GEN6_PDE_SHIFT);
 }
 
+/* Equivalent to the gen6 version, For each pde iterates over every pde
+ * between from start until start + length. On gen8+ it simply iterates
+ * over every page directory entry in a page directory.
+ */
+#define gen8_for_each_pde(pt, pd, start, length, temp, iter)		\
+	for (iter = gen8_pde_index(start); \
+	     pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES;	\
+	     iter++,				\
+	     temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,	\
+	     temp = min(temp, length),					\
+	     start += temp, length -= temp)
+
+#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)		\
+	for (iter = gen8_pdpe_index(start);	\
+	     pd = (pdp)->page_directory[iter], length > 0 && iter < GEN8_LEGACY_PDPES;	\
+	     iter++,				\
+	     temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,	\
+	     temp = min(temp, length),					\
+	     start += temp, length -= temp)
+
+/* Clamp length to the next page_directory boundary */
+static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length)
+{
+	uint64_t next_pd = ALIGN(start + 1, 1 << GEN8_PDPE_SHIFT);
+
+	if (next_pd > (start + length))
+		return length;
+
+	return next_pd - start;
+}
+
+static inline uint32_t gen8_pte_index(uint64_t address)
+{
+	return i915_pte_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pde_index(uint64_t address)
+{
+	return i915_pde_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pdpe_index(uint64_t address)
+{
+	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
+}
+
+static inline uint32_t gen8_pml4e_index(uint64_t address)
+{
+	WARN_ON(1); /* For 64B */
+	return 0;
+}
+
+static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
+{
+	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
+}
+
 int i915_gem_gtt_init(struct drm_device *dev);
 void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_global_gtt_cleanup(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index f7929e7..f6ecbda 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -184,9 +184,12 @@
 	struct i915_vma *vma;
 	int count = 0;
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
 		if (drm_mm_node_allocated(&vma->node))
 			count++;
+		if (vma->pin_count)
+			count++;
+	}
 
 	return count;
 }
@@ -210,8 +213,7 @@
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (!i915_gem_obj_is_pinned(obj) &&
-		    obj->pages_pin_count == num_vma_bound(obj))
+		if (obj->pages_pin_count == num_vma_bound(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1d4e60d..ac22614 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -251,10 +251,11 @@
 		return;
 
 	err_printf(m, "%s command stream:\n", ring_str(ring_idx));
-	err_printf(m, "  HEAD: 0x%08x\n", ring->head);
-	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
-	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
-	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
+	err_printf(m, "  START: 0x%08x\n", ring->start);
+	err_printf(m, "  HEAD:  0x%08x\n", ring->head);
+	err_printf(m, "  TAIL:  0x%08x\n", ring->tail);
+	err_printf(m, "  CTL:   0x%08x\n", ring->ctl);
+	err_printf(m, "  HWS:   0x%08x\n", ring->hws);
 	err_printf(m, "  ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd);
 	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
 	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
@@ -883,6 +884,7 @@
 	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
 	ering->seqno = ring->get_seqno(ring, false);
 	ering->acthd = intel_ring_get_active_head(ring);
+	ering->start = I915_READ_START(ring);
 	ering->head = I915_READ_HEAD(ring);
 	ering->tail = I915_READ_TAIL(ring);
 	ering->ctl = I915_READ_CTL(ring);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 14ecb4d..46bcbff 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -985,8 +985,7 @@
 	return;
 }
 
-static void notify_ring(struct drm_device *dev,
-			struct intel_engine_cs *ring)
+static void notify_ring(struct intel_engine_cs *ring)
 {
 	if (!intel_ring_initialized(ring))
 		return;
@@ -1049,7 +1048,7 @@
 	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
 		if (!vlv_c0_above(dev_priv,
 				  &dev_priv->rps.down_ei, &now,
-				  VLV_RP_DOWN_EI_THRESHOLD))
+				  dev_priv->rps.down_threshold))
 			events |= GEN6_PM_RP_DOWN_THRESHOLD;
 		dev_priv->rps.down_ei = now;
 	}
@@ -1057,7 +1056,7 @@
 	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
 		if (vlv_c0_above(dev_priv,
 				 &dev_priv->rps.up_ei, &now,
-				 VLV_RP_UP_EI_THRESHOLD))
+				 dev_priv->rps.up_threshold))
 			events |= GEN6_PM_RP_UP_THRESHOLD;
 		dev_priv->rps.up_ei = now;
 	}
@@ -1095,21 +1094,20 @@
 	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
 
 	adj = dev_priv->rps.last_adj;
+	new_delay = dev_priv->rps.cur_freq;
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
 		if (adj > 0)
 			adj *= 2;
-		else {
-			/* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
-		}
-		new_delay = dev_priv->rps.cur_freq + adj;
-
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
 		/*
 		 * For better performance, jump directly
 		 * to RPe if we're below it.
 		 */
-		if (new_delay < dev_priv->rps.efficient_freq)
+		if (new_delay < dev_priv->rps.efficient_freq - adj) {
 			new_delay = dev_priv->rps.efficient_freq;
+			adj = 0;
+		}
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
 		if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
 			new_delay = dev_priv->rps.efficient_freq;
@@ -1119,24 +1117,22 @@
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
-		else {
-			/* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv->dev) ? -2 : -1;
-		}
-		new_delay = dev_priv->rps.cur_freq + adj;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
 	} else { /* unknown event */
-		new_delay = dev_priv->rps.cur_freq;
+		adj = 0;
 	}
 
+	dev_priv->rps.last_adj = adj;
+
 	/* sysfs frequency interfaces may have snuck in while servicing the
 	 * interrupt
 	 */
+	new_delay += adj;
 	new_delay = clamp_t(int, new_delay,
 			    dev_priv->rps.min_freq_softlimit,
 			    dev_priv->rps.max_freq_softlimit);
 
-	dev_priv->rps.last_adj = new_delay - dev_priv->rps.cur_freq;
-
 	intel_set_rps(dev_priv->dev, new_delay);
 
 	mutex_unlock(&dev_priv->rps.hw_lock);
@@ -1251,9 +1247,9 @@
 {
 	if (gt_iir &
 	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(dev, &dev_priv->ring[RCS]);
+		notify_ring(&dev_priv->ring[RCS]);
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[VCS]);
+		notify_ring(&dev_priv->ring[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_device *dev,
@@ -1263,11 +1259,11 @@
 
 	if (gt_iir &
 	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(dev, &dev_priv->ring[RCS]);
+		notify_ring(&dev_priv->ring[RCS]);
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[VCS]);
+		notify_ring(&dev_priv->ring[VCS]);
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[BCS]);
+		notify_ring(&dev_priv->ring[BCS]);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -1278,88 +1274,74 @@
 		ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
-static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
-				       struct drm_i915_private *dev_priv,
+static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
 				       u32 master_ctl)
 {
-	struct intel_engine_cs *ring;
-	u32 rcs, bcs, vcs;
-	uint32_t tmp = 0;
 	irqreturn_t ret = IRQ_NONE;
 
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		tmp = I915_READ(GEN8_GT_IIR(0));
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(0));
 		if (tmp) {
-			I915_WRITE(GEN8_GT_IIR(0), tmp);
+			I915_WRITE_FW(GEN8_GT_IIR(0), tmp);
 			ret = IRQ_HANDLED;
 
-			rcs = tmp >> GEN8_RCS_IRQ_SHIFT;
-			ring = &dev_priv->ring[RCS];
-			if (rcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (rcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[RCS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[RCS]);
 
-			bcs = tmp >> GEN8_BCS_IRQ_SHIFT;
-			ring = &dev_priv->ring[BCS];
-			if (bcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (bcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[BCS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[BCS]);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT0)!\n");
 	}
 
 	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		tmp = I915_READ(GEN8_GT_IIR(1));
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(1));
 		if (tmp) {
-			I915_WRITE(GEN8_GT_IIR(1), tmp);
+			I915_WRITE_FW(GEN8_GT_IIR(1), tmp);
 			ret = IRQ_HANDLED;
 
-			vcs = tmp >> GEN8_VCS1_IRQ_SHIFT;
-			ring = &dev_priv->ring[VCS];
-			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[VCS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[VCS]);
 
-			vcs = tmp >> GEN8_VCS2_IRQ_SHIFT;
-			ring = &dev_priv->ring[VCS2];
-			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[VCS2]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[VCS2]);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT1)!\n");
 	}
 
+	if (master_ctl & GEN8_GT_VECS_IRQ) {
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(3));
+		if (tmp) {
+			I915_WRITE_FW(GEN8_GT_IIR(3), tmp);
+			ret = IRQ_HANDLED;
+
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[VECS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[VECS]);
+		} else
+			DRM_ERROR("The master control interrupt lied (GT3)!\n");
+	}
+
 	if (master_ctl & GEN8_GT_PM_IRQ) {
-		tmp = I915_READ(GEN8_GT_IIR(2));
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(2));
 		if (tmp & dev_priv->pm_rps_events) {
-			I915_WRITE(GEN8_GT_IIR(2),
-				   tmp & dev_priv->pm_rps_events);
+			I915_WRITE_FW(GEN8_GT_IIR(2),
+				      tmp & dev_priv->pm_rps_events);
 			ret = IRQ_HANDLED;
 			gen6_rps_irq_handler(dev_priv, tmp);
 		} else
 			DRM_ERROR("The master control interrupt lied (PM)!\n");
 	}
 
-	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		tmp = I915_READ(GEN8_GT_IIR(3));
-		if (tmp) {
-			I915_WRITE(GEN8_GT_IIR(3), tmp);
-			ret = IRQ_HANDLED;
-
-			vcs = tmp >> GEN8_VECS_IRQ_SHIFT;
-			ring = &dev_priv->ring[VECS];
-			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
-		} else
-			DRM_ERROR("The master control interrupt lied (GT3)!\n");
-	}
-
 	return ret;
 }
 
@@ -1654,7 +1636,7 @@
 
 	if (HAS_VEBOX(dev_priv->dev)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
+			notify_ring(&dev_priv->ring[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -1848,7 +1830,7 @@
 			I915_WRITE(VLV_IIR, iir);
 		}
 
-		gen8_gt_irq_handler(dev, dev_priv, master_ctl);
+		gen8_gt_irq_handler(dev_priv, master_ctl);
 
 		/* Call regardless, as some status bits might not be
 		 * signalled in iir */
@@ -2181,17 +2163,16 @@
 		aux_mask |=  GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
 			GEN9_AUX_CHANNEL_D;
 
-	master_ctl = I915_READ(GEN8_MASTER_IRQ);
+	master_ctl = I915_READ_FW(GEN8_MASTER_IRQ);
 	master_ctl &= ~GEN8_MASTER_IRQ_CONTROL;
 	if (!master_ctl)
 		return IRQ_NONE;
 
-	I915_WRITE(GEN8_MASTER_IRQ, 0);
-	POSTING_READ(GEN8_MASTER_IRQ);
+	I915_WRITE_FW(GEN8_MASTER_IRQ, 0);
 
 	/* Find, clear, then process each source of interrupt */
 
-	ret = gen8_gt_irq_handler(dev, dev_priv, master_ctl);
+	ret = gen8_gt_irq_handler(dev_priv, master_ctl);
 
 	if (master_ctl & GEN8_DE_MISC_IRQ) {
 		tmp = I915_READ(GEN8_DE_MISC_IIR);
@@ -2284,8 +2265,8 @@
 
 	}
 
-	I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
-	POSTING_READ(GEN8_MASTER_IRQ);
+	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
+	POSTING_READ_FW(GEN8_MASTER_IRQ);
 
 	return ret;
 }
@@ -3696,7 +3677,7 @@
 		new_iir = I915_READ16(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[RCS]);
+			notify_ring(&dev_priv->ring[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			int plane = pipe;
@@ -3887,7 +3868,7 @@
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[RCS]);
+			notify_ring(&dev_priv->ring[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			int plane = pipe;
@@ -4114,9 +4095,9 @@
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[RCS]);
+			notify_ring(&dev_priv->ring[RCS]);
 		if (iir & I915_BSD_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[VCS]);
+			notify_ring(&dev_priv->ring[VCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS &&
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 21e69ca..87d9c0f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -671,8 +671,6 @@
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
 #define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
-#define VLV_RP_UP_EI_THRESHOLD			90
-#define VLV_RP_DOWN_EI_THRESHOLD		70
 
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
@@ -1787,16 +1785,16 @@
 #define   GMBUS_RATE_400KHZ	(2<<8) /* reserved on Pineview */
 #define   GMBUS_RATE_1MHZ	(3<<8) /* reserved on Pineview */
 #define   GMBUS_HOLD_EXT	(1<<7) /* 300ns hold time, rsvd on Pineview */
-#define   GMBUS_PORT_DISABLED	0
-#define   GMBUS_PORT_SSC	1
-#define   GMBUS_PORT_VGADDC	2
-#define   GMBUS_PORT_PANEL	3
-#define   GMBUS_PORT_DPD_CHV	3 /* HDMID_CHV */
-#define   GMBUS_PORT_DPC	4 /* HDMIC */
-#define   GMBUS_PORT_DPB	5 /* SDVO, HDMIB */
-#define   GMBUS_PORT_DPD	6 /* HDMID */
-#define   GMBUS_PORT_RESERVED	7 /* 7 reserved */
-#define   GMBUS_NUM_PORTS	(GMBUS_PORT_DPD - GMBUS_PORT_SSC + 1)
+#define   GMBUS_PIN_DISABLED	0
+#define   GMBUS_PIN_SSC		1
+#define   GMBUS_PIN_VGADDC	2
+#define   GMBUS_PIN_PANEL	3
+#define   GMBUS_PIN_DPD_CHV	3 /* HDMID_CHV */
+#define   GMBUS_PIN_DPC		4 /* HDMIC */
+#define   GMBUS_PIN_DPB		5 /* SDVO, HDMIB */
+#define   GMBUS_PIN_DPD		6 /* HDMID */
+#define   GMBUS_PIN_RESERVED	7 /* 7 reserved */
+#define   GMBUS_NUM_PINS	7 /* including 0 */
 #define GMBUS1			0x5104 /* command/status */
 #define   GMBUS_SW_CLR_INT	(1<<31)
 #define   GMBUS_SW_RDY		(1<<30)
@@ -2747,6 +2745,20 @@
 #define   EDP_PSR_DEBUG_MASK_MEMUP	(1<<26)
 #define   EDP_PSR_DEBUG_MASK_HPD	(1<<25)
 
+#define EDP_PSR2_CTL			0x6f900
+#define   EDP_PSR2_ENABLE		(1<<31)
+#define   EDP_SU_TRACK_ENABLE		(1<<30)
+#define   EDP_MAX_SU_DISABLE_TIME(t)	((t)<<20)
+#define   EDP_MAX_SU_DISABLE_TIME_MASK	(0x1f<<20)
+#define   EDP_PSR2_TP2_TIME_500		(0<<8)
+#define   EDP_PSR2_TP2_TIME_100		(1<<8)
+#define   EDP_PSR2_TP2_TIME_2500	(2<<8)
+#define   EDP_PSR2_TP2_TIME_50		(3<<8)
+#define   EDP_PSR2_TP2_TIME_MASK	(3<<8)
+#define   EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4
+#define   EDP_PSR2_FRAME_BEFORE_SU_MASK	(0xf<<4)
+#define   EDP_PSR2_IDLE_MASK		0xf
+
 /* VGA port control */
 #define ADPA			0x61100
 #define PCH_ADPA                0xe1100
@@ -4853,7 +4865,9 @@
 #define   PLANE_CTL_ALPHA_HW_PREMULTIPLY	(  3 << 4)
 #define   PLANE_CTL_ROTATE_MASK			0x3
 #define   PLANE_CTL_ROTATE_0			0x0
+#define   PLANE_CTL_ROTATE_90			0x1
 #define   PLANE_CTL_ROTATE_180			0x2
+#define   PLANE_CTL_ROTATE_270			0x3
 #define _PLANE_STRIDE_1_A			0x70188
 #define _PLANE_STRIDE_2_A			0x70288
 #define _PLANE_STRIDE_3_A			0x70388
@@ -5095,6 +5109,121 @@
 #define PS_WIN_SZ(pipe)		_PIPE(pipe, _PSA_WIN_SZ, _PSB_WIN_SZ)
 #define PS_WIN_POS(pipe)	_PIPE(pipe, _PSA_WIN_POS, _PSB_WIN_POS)
 
+/*
+ * Skylake scalers
+ */
+#define _PS_1A_CTRL      0x68180
+#define _PS_2A_CTRL      0x68280
+#define _PS_1B_CTRL      0x68980
+#define _PS_2B_CTRL      0x68A80
+#define _PS_1C_CTRL      0x69180
+#define PS_SCALER_EN        (1 << 31)
+#define PS_SCALER_MODE_MASK (3 << 28)
+#define PS_SCALER_MODE_DYN  (0 << 28)
+#define PS_SCALER_MODE_HQ  (1 << 28)
+#define PS_PLANE_SEL_MASK  (7 << 25)
+#define PS_PLANE_SEL(plane) ((plane + 1) << 25)
+#define PS_FILTER_MASK         (3 << 23)
+#define PS_FILTER_MEDIUM       (0 << 23)
+#define PS_FILTER_EDGE_ENHANCE (2 << 23)
+#define PS_FILTER_BILINEAR     (3 << 23)
+#define PS_VERT3TAP            (1 << 21)
+#define PS_VERT_INT_INVERT_FIELD1 (0 << 20)
+#define PS_VERT_INT_INVERT_FIELD0 (1 << 20)
+#define PS_PWRUP_PROGRESS         (1 << 17)
+#define PS_V_FILTER_BYPASS        (1 << 8)
+#define PS_VADAPT_EN              (1 << 7)
+#define PS_VADAPT_MODE_MASK        (3 << 5)
+#define PS_VADAPT_MODE_LEAST_ADAPT (0 << 5)
+#define PS_VADAPT_MODE_MOD_ADAPT   (1 << 5)
+#define PS_VADAPT_MODE_MOST_ADAPT  (3 << 5)
+
+#define _PS_PWR_GATE_1A     0x68160
+#define _PS_PWR_GATE_2A     0x68260
+#define _PS_PWR_GATE_1B     0x68960
+#define _PS_PWR_GATE_2B     0x68A60
+#define _PS_PWR_GATE_1C     0x69160
+#define PS_PWR_GATE_DIS_OVERRIDE       (1 << 31)
+#define PS_PWR_GATE_SETTLING_TIME_32   (0 << 3)
+#define PS_PWR_GATE_SETTLING_TIME_64   (1 << 3)
+#define PS_PWR_GATE_SETTLING_TIME_96   (2 << 3)
+#define PS_PWR_GATE_SETTLING_TIME_128  (3 << 3)
+#define PS_PWR_GATE_SLPEN_8             0
+#define PS_PWR_GATE_SLPEN_16            1
+#define PS_PWR_GATE_SLPEN_24            2
+#define PS_PWR_GATE_SLPEN_32            3
+
+#define _PS_WIN_POS_1A      0x68170
+#define _PS_WIN_POS_2A      0x68270
+#define _PS_WIN_POS_1B      0x68970
+#define _PS_WIN_POS_2B      0x68A70
+#define _PS_WIN_POS_1C      0x69170
+
+#define _PS_WIN_SZ_1A       0x68174
+#define _PS_WIN_SZ_2A       0x68274
+#define _PS_WIN_SZ_1B       0x68974
+#define _PS_WIN_SZ_2B       0x68A74
+#define _PS_WIN_SZ_1C       0x69174
+
+#define _PS_VSCALE_1A       0x68184
+#define _PS_VSCALE_2A       0x68284
+#define _PS_VSCALE_1B       0x68984
+#define _PS_VSCALE_2B       0x68A84
+#define _PS_VSCALE_1C       0x69184
+
+#define _PS_HSCALE_1A       0x68190
+#define _PS_HSCALE_2A       0x68290
+#define _PS_HSCALE_1B       0x68990
+#define _PS_HSCALE_2B       0x68A90
+#define _PS_HSCALE_1C       0x69190
+
+#define _PS_VPHASE_1A       0x68188
+#define _PS_VPHASE_2A       0x68288
+#define _PS_VPHASE_1B       0x68988
+#define _PS_VPHASE_2B       0x68A88
+#define _PS_VPHASE_1C       0x69188
+
+#define _PS_HPHASE_1A       0x68194
+#define _PS_HPHASE_2A       0x68294
+#define _PS_HPHASE_1B       0x68994
+#define _PS_HPHASE_2B       0x68A94
+#define _PS_HPHASE_1C       0x69194
+
+#define _PS_ECC_STAT_1A     0x681D0
+#define _PS_ECC_STAT_2A     0x682D0
+#define _PS_ECC_STAT_1B     0x689D0
+#define _PS_ECC_STAT_2B     0x68AD0
+#define _PS_ECC_STAT_1C     0x691D0
+
+#define _ID(id, a, b) ((a) + (id)*((b)-(a)))
+#define SKL_PS_CTRL(pipe, id) _PIPE(pipe,        \
+			_ID(id, _PS_1A_CTRL, _PS_2A_CTRL),       \
+			_ID(id, _PS_1B_CTRL, _PS_2B_CTRL))
+#define SKL_PS_PWR_GATE(pipe, id) _PIPE(pipe,    \
+			_ID(id, _PS_PWR_GATE_1A, _PS_PWR_GATE_2A), \
+			_ID(id, _PS_PWR_GATE_1B, _PS_PWR_GATE_2B))
+#define SKL_PS_WIN_POS(pipe, id) _PIPE(pipe,     \
+			_ID(id, _PS_WIN_POS_1A, _PS_WIN_POS_2A), \
+			_ID(id, _PS_WIN_POS_1B, _PS_WIN_POS_2B))
+#define SKL_PS_WIN_SZ(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_WIN_SZ_1A, _PS_WIN_SZ_2A),   \
+			_ID(id, _PS_WIN_SZ_1B, _PS_WIN_SZ_2B))
+#define SKL_PS_VSCALE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_VSCALE_1A, _PS_VSCALE_2A),   \
+			_ID(id, _PS_VSCALE_1B, _PS_VSCALE_2B))
+#define SKL_PS_HSCALE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_HSCALE_1A, _PS_HSCALE_2A),   \
+			_ID(id, _PS_HSCALE_1B, _PS_HSCALE_2B))
+#define SKL_PS_VPHASE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_VPHASE_1A, _PS_VPHASE_2A),   \
+			_ID(id, _PS_VPHASE_1B, _PS_VPHASE_2B))
+#define SKL_PS_HPHASE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_HPHASE_1A, _PS_HPHASE_2A),   \
+			_ID(id, _PS_HPHASE_1B, _PS_HPHASE_2B))
+#define SKL_PS_ECC_STAT(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A),   \
+			_ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B)
+
 /* legacy palette */
 #define _LGC_PALETTE_A           0x4a000
 #define _LGC_PALETTE_B           0x4a800
@@ -5324,6 +5453,9 @@
 #define GEN7_L3SQCREG1				0xB010
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
+#define GEN8_L3SQCREG1				0xB100
+#define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
+
 #define GEN7_L3CNTLREG1				0xB01C
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
 #define  GEN7_L3AGDIS				(1<<19)
@@ -6091,6 +6223,7 @@
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
 
 #define GEN6_UCGCTL2				0x9404
+# define GEN6_VFUNIT_CLOCK_GATE_DISABLE			(1 << 31)
 # define GEN7_VDSUNIT_CLOCK_GATE_DISABLE		(1 << 30)
 # define GEN7_TDLUNIT_CLOCK_GATE_DISABLE		(1 << 22)
 # define GEN6_RCZUNIT_CLOCK_GATE_DISABLE		(1 << 13)
@@ -6189,6 +6322,8 @@
 #define GEN9_MEDIA_PG_IDLE_HYSTERESIS		0xA0C4
 #define GEN9_RENDER_PG_IDLE_HYSTERESIS		0xA0C8
 #define GEN9_PG_ENABLE				0xA210
+#define GEN9_RENDER_PG_ENABLE			(1<<0)
+#define GEN9_MEDIA_PG_ENABLE			(1<<1)
 
 #define VLV_CHICKEN_3				(VLV_DISPLAY_BASE + 0x7040C)
 #define  PIXEL_OVERLAP_CNT_MASK			(3 << 30)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index b3070a4..2aa140e 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -221,7 +221,7 @@
 
 DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table_entry *pt, u32 first, u32 count, u32 bits),
+		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
 	TP_ARGS(vm, pde, pt, first, count, bits),
 
 	TP_STRUCT__entry(
@@ -251,7 +251,7 @@
 
 DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table_entry *pt, u32 first, u32 count, u32 bits),
+		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
 	TP_ARGS(vm, pde, pt, first, count, bits)
 );
 
@@ -505,7 +505,6 @@
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, uniq)
 			     __field(u32, seqno)
 			     ),
 
@@ -514,13 +513,11 @@
 						i915_gem_request_get_ring(req);
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->uniq = req ? req->uniq : 0;
 			   __entry->seqno = i915_gem_request_get_seqno(req);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, uniq=%u, seqno=%u",
-		      __entry->dev, __entry->ring, __entry->uniq,
-		      __entry->seqno)
+	    TP_printk("dev=%u, ring=%u, seqno=%u",
+		      __entry->dev, __entry->ring, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_add,
@@ -565,7 +562,6 @@
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, uniq)
 			     __field(u32, seqno)
 			     __field(bool, blocking)
 			     ),
@@ -581,14 +577,13 @@
 						i915_gem_request_get_ring(req);
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->uniq = req ? req->uniq : 0;
 			   __entry->seqno = i915_gem_request_get_seqno(req);
 			   __entry->blocking =
 				     mutex_is_locked(&ring->dev->struct_mutex);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, uniq=%u, seqno=%u, blocking=%s",
-		      __entry->dev, __entry->ring, __entry->uniq,
+	    TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s",
+		      __entry->dev, __entry->ring,
 		      __entry->seqno, __entry->blocking ?  "yes (NB)" : "no")
 );
 
@@ -597,33 +592,6 @@
 	    TP_ARGS(req)
 );
 
-DECLARE_EVENT_CLASS(i915_ring,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring),
-
-	    TP_STRUCT__entry(
-			     __field(u32, dev)
-			     __field(u32, ring)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
-			   __entry->ring = ring->id;
-			   ),
-
-	    TP_printk("dev=%u, ring=%u", __entry->dev, __entry->ring)
-);
-
-DEFINE_EVENT(i915_ring, i915_ring_wait_begin,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring)
-);
-
-DEFINE_EVENT(i915_ring, i915_ring_wait_end,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring)
-);
-
 TRACE_EVENT(i915_flip_request,
 	    TP_PROTO(int plane, struct drm_i915_gem_object *obj),
 
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index 3903b90..3c4b7cd 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -48,6 +48,8 @@
 	int ncrtcs = dev->mode_config.num_crtc;
 	int nconnectors = dev->mode_config.num_connector;
 	enum pipe nuclear_pipe = INVALID_PIPE;
+	struct intel_crtc *nuclear_crtc = NULL;
+	struct intel_crtc_state *crtc_state = NULL;
 	int ret;
 	int i;
 	bool not_nuclear = false;
@@ -76,8 +78,14 @@
 	state->allow_modeset = false;
 	for (i = 0; i < ncrtcs; i++) {
 		struct intel_crtc *crtc = to_intel_crtc(state->crtcs[i]);
+		if (crtc)
+			memset(&crtc->atomic, 0, sizeof(crtc->atomic));
 		if (crtc && crtc->pipe != nuclear_pipe)
 			not_nuclear = true;
+		if (crtc && crtc->pipe == nuclear_pipe) {
+			nuclear_crtc = crtc;
+			crtc_state = to_intel_crtc_state(state->crtc_states[i]);
+		}
 	}
 	for (i = 0; i < nconnectors; i++)
 		if (state->connectors[i] != NULL)
@@ -92,6 +100,11 @@
 	if (ret)
 		return ret;
 
+	/* FIXME: move to crtc atomic check function once it is ready */
+	ret = intel_atomic_setup_scalers(dev, nuclear_crtc, crtc_state);
+	if (ret)
+		return ret;
+
 	return ret;
 }
 
@@ -155,6 +168,18 @@
 		swap(state->plane_states[i], plane->state);
 		plane->state->state = NULL;
 	}
+
+	/* swap crtc_state */
+	for (i = 0; i < dev->mode_config.num_crtc; i++) {
+		struct drm_crtc *crtc = state->crtcs[i];
+		if (!crtc) {
+			continue;
+		}
+
+		to_intel_crtc(crtc)->config->scaler_state =
+			to_intel_crtc_state(state->crtc_states[i])->scaler_state;
+	}
+
 	drm_atomic_helper_commit_planes(dev, state);
 	drm_atomic_helper_wait_for_vblanks(dev, state);
 	drm_atomic_helper_cleanup_planes(dev, state);
@@ -241,3 +266,151 @@
 {
 	drm_atomic_helper_crtc_destroy_state(crtc, state);
 }
+
+/**
+ * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests
+ * @dev: DRM device
+ * @crtc: intel crtc
+ * @crtc_state: incoming crtc_state to validate and setup scalers
+ *
+ * This function sets up scalers based on staged scaling requests for
+ * a @crtc and its planes. It is called from crtc level check path. If request
+ * is a supportable request, it attaches scalers to requested planes and crtc.
+ *
+ * This function takes into account the current scaler(s) in use by any planes
+ * not being part of this atomic state
+ *
+ *  Returns:
+ *         0 - scalers were setup succesfully
+ *         error code - otherwise
+ */
+int intel_atomic_setup_scalers(struct drm_device *dev,
+	struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state)
+{
+	struct drm_plane *plane = NULL;
+	struct intel_plane *intel_plane;
+	struct intel_plane_state *plane_state = NULL;
+	struct intel_crtc_scaler_state *scaler_state;
+	struct drm_atomic_state *drm_state;
+	int num_scalers_need;
+	int i, j;
+
+	if (INTEL_INFO(dev)->gen < 9 || !intel_crtc || !crtc_state)
+		return 0;
+
+	scaler_state = &crtc_state->scaler_state;
+	drm_state = crtc_state->base.state;
+
+	num_scalers_need = hweight32(scaler_state->scaler_users);
+	DRM_DEBUG_KMS("crtc_state = %p need = %d avail = %d scaler_users = 0x%x\n",
+		crtc_state, num_scalers_need, intel_crtc->num_scalers,
+		scaler_state->scaler_users);
+
+	/*
+	 * High level flow:
+	 * - staged scaler requests are already in scaler_state->scaler_users
+	 * - check whether staged scaling requests can be supported
+	 * - add planes using scalers that aren't in current transaction
+	 * - assign scalers to requested users
+	 * - as part of plane commit, scalers will be committed
+	 *   (i.e., either attached or detached) to respective planes in hw
+	 * - as part of crtc_commit, scaler will be either attached or detached
+	 *   to crtc in hw
+	 */
+
+	/* fail if required scalers > available scalers */
+	if (num_scalers_need > intel_crtc->num_scalers){
+		DRM_DEBUG_KMS("Too many scaling requests %d > %d\n",
+			num_scalers_need, intel_crtc->num_scalers);
+		return -EINVAL;
+	}
+
+	/* walkthrough scaler_users bits and start assigning scalers */
+	for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) {
+		int *scaler_id;
+
+		/* skip if scaler not required */
+		if (!(scaler_state->scaler_users & (1 << i)))
+			continue;
+
+		if (i == SKL_CRTC_INDEX) {
+			/* panel fitter case: assign as a crtc scaler */
+			scaler_id = &scaler_state->scaler_id;
+		} else {
+			if (!drm_state)
+				continue;
+
+			/* plane scaler case: assign as a plane scaler */
+			/* find the plane that set the bit as scaler_user */
+			plane = drm_state->planes[i];
+
+			/*
+			 * to enable/disable hq mode, add planes that are using scaler
+			 * into this transaction
+			 */
+			if (!plane) {
+				struct drm_plane_state *state;
+				plane = drm_plane_from_index(dev, i);
+				state = drm_atomic_get_plane_state(drm_state, plane);
+				if (IS_ERR(state)) {
+					DRM_DEBUG_KMS("Failed to add [PLANE:%d] to drm_state\n",
+						plane->base.id);
+					return PTR_ERR(state);
+				}
+			}
+
+			intel_plane = to_intel_plane(plane);
+
+			/* plane on different crtc cannot be a scaler user of this crtc */
+			if (WARN_ON(intel_plane->pipe != intel_crtc->pipe)) {
+				continue;
+			}
+
+			plane_state = to_intel_plane_state(drm_state->plane_states[i]);
+			scaler_id = &plane_state->scaler_id;
+		}
+
+		if (*scaler_id < 0) {
+			/* find a free scaler */
+			for (j = 0; j < intel_crtc->num_scalers; j++) {
+				if (!scaler_state->scalers[j].in_use) {
+					scaler_state->scalers[j].in_use = 1;
+					*scaler_id = scaler_state->scalers[j].id;
+					DRM_DEBUG_KMS("Attached scaler id %u.%u to %s:%d\n",
+						intel_crtc->pipe,
+						i == SKL_CRTC_INDEX ? scaler_state->scaler_id :
+							plane_state->scaler_id,
+						i == SKL_CRTC_INDEX ? "CRTC" : "PLANE",
+						i == SKL_CRTC_INDEX ?  intel_crtc->base.base.id :
+						plane->base.id);
+					break;
+				}
+			}
+		}
+
+		if (WARN_ON(*scaler_id < 0)) {
+			DRM_DEBUG_KMS("Cannot find scaler for %s:%d\n",
+				i == SKL_CRTC_INDEX ? "CRTC" : "PLANE",
+				i == SKL_CRTC_INDEX ? intel_crtc->base.base.id:plane->base.id);
+			continue;
+		}
+
+		/* set scaler mode */
+		if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) {
+			/*
+			 * when only 1 scaler is in use on either pipe A or B,
+			 * scaler 0 operates in high quality (HQ) mode.
+			 * In this case use scaler 0 to take advantage of HQ mode
+			 */
+			*scaler_id = 0;
+			scaler_state->scalers[0].in_use = 1;
+			scaler_state->scalers[0].mode = PS_SCALER_MODE_HQ;
+			scaler_state->scalers[1].in_use = 0;
+		} else {
+			scaler_state->scalers[*scaler_id].mode = PS_SCALER_MODE_DYN;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 976b891..a27ee8c 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -162,6 +162,30 @@
 			(1 << drm_plane_index(plane));
 	}
 
+	if (state->fb && intel_rotation_90_or_270(state->rotation)) {
+		if (!(state->fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+			state->fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)) {
+			DRM_DEBUG_KMS("Y/Yf tiling required for 90/270!\n");
+			return -EINVAL;
+		}
+
+		/*
+		 * 90/270 is not allowed with RGB64 16:16:16:16,
+		 * RGB 16-bit 5:6:5, and Indexed 8-bit.
+		 * TBD: Add RGB64 case once its added in supported format list.
+		 */
+		switch (state->fb->pixel_format) {
+		case DRM_FORMAT_C8:
+		case DRM_FORMAT_RGB565:
+			DRM_DEBUG_KMS("Unsupported pixel format %s for 90/270!\n",
+					drm_get_format_name(state->fb->pixel_format));
+			return -EINVAL;
+
+		default:
+			break;
+		}
+	}
+
 	return intel_plane->check_plane(plane, intel_state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 2396cc7..0d5b1ce 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -485,7 +485,8 @@
 		return -ENODEV;
 
 	intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
-	ret = intel_ddi_get_cdclk_freq(dev_priv);
+	ret = dev_priv->display.get_display_clock_speed(dev_priv->dev);
+
 	intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index c684085..c08368c 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -438,7 +438,7 @@
 		if (block_size >= sizeof(*general)) {
 			int bus_pin = general->crt_ddc_gmbus_pin;
 			DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin);
-			if (intel_gmbus_is_port_valid(bus_pin))
+			if (intel_gmbus_is_valid_pin(dev_priv, bus_pin))
 				dev_priv->vbt.crt_ddc_pin = bus_pin;
 		} else {
 			DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n",
@@ -447,6 +447,12 @@
 	}
 }
 
+static union child_device_config *
+child_device_ptr(struct bdb_general_definitions *p_defs, int i)
+{
+	return (void *) &p_defs->devices[i * p_defs->child_dev_size];
+}
+
 static void
 parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
 			  struct bdb_header *bdb)
@@ -476,10 +482,10 @@
 	block_size = get_blocksize(p_defs);
 	/* get the number of child device */
 	child_device_num = (block_size - sizeof(*p_defs)) /
-				sizeof(*p_child);
+		p_defs->child_dev_size;
 	count = 0;
 	for (i = 0; i < child_device_num; i++) {
-		p_child = &(p_defs->devices[i]);
+		p_child = child_device_ptr(p_defs, i);
 		if (!p_child->old.device_type) {
 			/* skip the device block if device type is invalid */
 			continue;
@@ -1067,25 +1073,19 @@
 		DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n");
 		return;
 	}
-	/* judge whether the size of child device meets the requirements.
-	 * If the child device size obtained from general definition block
-	 * is different with sizeof(struct child_device_config), skip the
-	 * parsing of sdvo device info
-	 */
-	if (p_defs->child_dev_size != sizeof(*p_child)) {
-		/* different child dev size . Ignore it */
-		DRM_DEBUG_KMS("different child size is found. Invalid.\n");
+	if (p_defs->child_dev_size < sizeof(*p_child)) {
+		DRM_ERROR("General definiton block child device size is too small.\n");
 		return;
 	}
 	/* get the block size of general definitions */
 	block_size = get_blocksize(p_defs);
 	/* get the number of child device */
 	child_device_num = (block_size - sizeof(*p_defs)) /
-				sizeof(*p_child);
+				p_defs->child_dev_size;
 	count = 0;
 	/* get the number of child device that is present */
 	for (i = 0; i < child_device_num; i++) {
-		p_child = &(p_defs->devices[i]);
+		p_child = child_device_ptr(p_defs, i);
 		if (!p_child->common.device_type) {
 			/* skip the device block if device type is invalid */
 			continue;
@@ -1105,7 +1105,7 @@
 	dev_priv->vbt.child_dev_num = count;
 	count = 0;
 	for (i = 0; i < child_device_num; i++) {
-		p_child = &(p_defs->devices[i]);
+		p_child = child_device_ptr(p_defs, i);
 		if (!p_child->common.device_type) {
 			/* skip the device block if device type is invalid */
 			continue;
@@ -1133,7 +1133,7 @@
 	struct drm_device *dev = dev_priv->dev;
 	enum port port;
 
-	dev_priv->vbt.crt_ddc_pin = GMBUS_PORT_VGADDC;
+	dev_priv->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC;
 
 	/* Default to having backlight */
 	dev_priv->vbt.backlight.present = true;
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index 6afd5be..af0b476 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -277,9 +277,9 @@
 	 * And the device num is related with the size of general definition
 	 * block. It is obtained by using the following formula:
 	 * number = (block_size - sizeof(bdb_general_definitions))/
-	 *	     sizeof(child_device_config);
+	 *	     defs->child_dev_size;
 	 */
-	union child_device_config devices[0];
+	uint8_t devices[0];
 } __packed;
 
 /* Mask for DRRS / Panel Channel / SSC / BLT control bits extraction */
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 6095a99..93bb515 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -747,7 +747,7 @@
 		goto out;
 
 	/* Try to probe digital port for output in DVI-I -> VGA mode. */
-	i2c = intel_gmbus_get_adapter(dev_priv, GMBUS_PORT_DPB);
+	i2c = intel_gmbus_get_adapter(dev_priv, GMBUS_PIN_DPB);
 	ret = intel_crt_ddc_get_modes(connector, i2c);
 
 out:
@@ -851,7 +851,7 @@
 	if (!crt)
 		return;
 
-	intel_connector = kzalloc(sizeof(*intel_connector), GFP_KERNEL);
+	intel_connector = intel_connector_alloc();
 	if (!intel_connector) {
 		kfree(crt);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 1c9d439..5b50484 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1689,105 +1689,6 @@
 	}
 }
 
-static int skl_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
-	uint32_t cdctl = I915_READ(CDCLK_CTL);
-	uint32_t linkrate;
-
-	if (!(lcpll1 & LCPLL_PLL_ENABLE)) {
-		WARN(1, "LCPLL1 not enabled\n");
-		return 24000; /* 24MHz is the cd freq with NSSC ref */
-	}
-
-	if ((cdctl & CDCLK_FREQ_SEL_MASK) == CDCLK_FREQ_540)
-		return 540000;
-
-	linkrate = (I915_READ(DPLL_CTRL1) &
-		    DPLL_CRTL1_LINK_RATE_MASK(SKL_DPLL0)) >> 1;
-
-	if (linkrate == DPLL_CRTL1_LINK_RATE_2160 ||
-	    linkrate == DPLL_CRTL1_LINK_RATE_1080) {
-		/* vco 8640 */
-		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
-		case CDCLK_FREQ_450_432:
-			return 432000;
-		case CDCLK_FREQ_337_308:
-			return 308570;
-		case CDCLK_FREQ_675_617:
-			return 617140;
-		default:
-			WARN(1, "Unknown cd freq selection\n");
-		}
-	} else {
-		/* vco 8100 */
-		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
-		case CDCLK_FREQ_450_432:
-			return 450000;
-		case CDCLK_FREQ_337_308:
-			return 337500;
-		case CDCLK_FREQ_675_617:
-			return 675000;
-		default:
-			WARN(1, "Unknown cd freq selection\n");
-		}
-	}
-
-	/* error case, do as if DPLL0 isn't enabled */
-	return 24000;
-}
-
-static int bdw_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	uint32_t lcpll = I915_READ(LCPLL_CTL);
-	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
-
-	if (lcpll & LCPLL_CD_SOURCE_FCLK)
-		return 800000;
-	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
-		return 450000;
-	else if (freq == LCPLL_CLK_FREQ_450)
-		return 450000;
-	else if (freq == LCPLL_CLK_FREQ_54O_BDW)
-		return 540000;
-	else if (freq == LCPLL_CLK_FREQ_337_5_BDW)
-		return 337500;
-	else
-		return 675000;
-}
-
-static int hsw_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-	uint32_t lcpll = I915_READ(LCPLL_CTL);
-	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
-
-	if (lcpll & LCPLL_CD_SOURCE_FCLK)
-		return 800000;
-	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
-		return 450000;
-	else if (freq == LCPLL_CLK_FREQ_450)
-		return 450000;
-	else if (IS_HSW_ULT(dev))
-		return 337500;
-	else
-		return 540000;
-}
-
-int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-
-	if (IS_SKYLAKE(dev))
-		return skl_get_cdclk_freq(dev_priv);
-
-	if (IS_BROADWELL(dev))
-		return bdw_get_cdclk_freq(dev_priv);
-
-	/* Haswell */
-	return hsw_get_cdclk_freq(dev_priv);
-}
-
 static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv,
 			       struct intel_shared_dpll *pll)
 {
@@ -1974,7 +1875,7 @@
 		hsw_shared_dplls_init(dev_priv);
 
 	DRM_DEBUG_KMS("CDCLK running at %dKHz\n",
-		      intel_ddi_get_cdclk_freq(dev_priv));
+		      dev_priv->display.get_display_clock_speed(dev));
 
 	if (IS_SKYLAKE(dev)) {
 		if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE))
@@ -2200,7 +2101,7 @@
 	struct intel_connector *connector;
 	enum port port = intel_dig_port->port;
 
-	connector = kzalloc(sizeof(*connector), GFP_KERNEL);
+	connector = intel_connector_alloc();
 	if (!connector)
 		return NULL;
 
@@ -2219,7 +2120,7 @@
 	struct intel_connector *connector;
 	enum port port = intel_dig_port->port;
 
-	connector = kzalloc(sizeof(*connector), GFP_KERNEL);
+	connector = intel_connector_alloc();
 	if (!connector)
 		return NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d35f406..9f72d9a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -103,6 +103,8 @@
 			    const struct intel_crtc_state *pipe_config);
 static void intel_begin_crtc_commit(struct drm_crtc *crtc);
 static void intel_finish_crtc_commit(struct drm_crtc *crtc);
+static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state);
 
 static struct intel_encoder *intel_find_encoder(struct intel_connector *connector, int pipe)
 {
@@ -2338,13 +2340,6 @@
 	info->pitch = fb->pitches[0];
 	info->fb_modifier = fb->modifier[0];
 
-	if (!(info->fb_modifier == I915_FORMAT_MOD_Y_TILED ||
-	      info->fb_modifier == I915_FORMAT_MOD_Yf_TILED)) {
-		DRM_DEBUG_KMS(
-			      "Y or Yf tiling is needed for 90/270 rotation!\n");
-		return -EINVAL;
-	}
-
 	return 0;
 }
 
@@ -2936,6 +2931,35 @@
 	return i915_gem_obj_ggtt_offset_view(obj, view);
 }
 
+/*
+ * This function detaches (aka. unbinds) unused scalers in hardware
+ */
+void skl_detach_scalers(struct intel_crtc *intel_crtc)
+{
+	struct drm_device *dev;
+	struct drm_i915_private *dev_priv;
+	struct intel_crtc_scaler_state *scaler_state;
+	int i;
+
+	if (!intel_crtc || !intel_crtc->config)
+		return;
+
+	dev = intel_crtc->base.dev;
+	dev_priv = dev->dev_private;
+	scaler_state = &intel_crtc->config->scaler_state;
+
+	/* loop through and disable scalers that aren't in use */
+	for (i = 0; i < intel_crtc->num_scalers; i++) {
+		if (!scaler_state->scalers[i].in_use) {
+			I915_WRITE(SKL_PS_CTRL(intel_crtc->pipe, i), 0);
+			I915_WRITE(SKL_PS_WIN_POS(intel_crtc->pipe, i), 0);
+			I915_WRITE(SKL_PS_WIN_SZ(intel_crtc->pipe, i), 0);
+			DRM_DEBUG_KMS("CRTC:%d Disabled scaler id %u.%u\n",
+				intel_crtc->base.base.id, intel_crtc->pipe, i);
+		}
+	}
+}
+
 static void skylake_update_primary_plane(struct drm_crtc *crtc,
 					 struct drm_framebuffer *fb,
 					 int x, int y)
@@ -2945,8 +2969,12 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_gem_object *obj;
 	int pipe = intel_crtc->pipe;
-	u32 plane_ctl, stride_div;
+	u32 plane_ctl, stride_div, stride;
+	u32 tile_height, plane_offset, plane_size;
+	unsigned int rotation;
+	int x_offset, y_offset;
 	unsigned long surf_addr;
+	struct drm_plane *plane;
 
 	if (!intel_crtc->primary_enabled) {
 		I915_WRITE(PLANE_CTL(pipe, 0), 0);
@@ -3007,21 +3035,51 @@
 	}
 
 	plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
-	if (crtc->primary->state->rotation == BIT(DRM_ROTATE_180))
+
+	plane = crtc->primary;
+	rotation = plane->state->rotation;
+	switch (rotation) {
+	case BIT(DRM_ROTATE_90):
+		plane_ctl |= PLANE_CTL_ROTATE_90;
+		break;
+
+	case BIT(DRM_ROTATE_180):
 		plane_ctl |= PLANE_CTL_ROTATE_180;
+		break;
+
+	case BIT(DRM_ROTATE_270):
+		plane_ctl |= PLANE_CTL_ROTATE_270;
+		break;
+	}
 
 	obj = intel_fb_obj(fb);
 	stride_div = intel_fb_stride_alignment(dev, fb->modifier[0],
 					       fb->pixel_format);
-	surf_addr = intel_plane_obj_offset(to_intel_plane(crtc->primary), obj);
+	surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj);
+
+	if (intel_rotation_90_or_270(rotation)) {
+		/* stride = Surface height in tiles */
+		tile_height = intel_tile_height(dev, fb->bits_per_pixel,
+						fb->modifier[0]);
+		stride = DIV_ROUND_UP(fb->height, tile_height);
+		x_offset = stride * tile_height - y - (plane->state->src_h >> 16);
+		y_offset = x;
+		plane_size = ((plane->state->src_w >> 16) - 1) << 16 |
+					((plane->state->src_h >> 16) - 1);
+	} else {
+		stride = fb->pitches[0] / stride_div;
+		x_offset = x;
+		y_offset = y;
+		plane_size = ((plane->state->src_h >> 16) - 1) << 16 |
+			((plane->state->src_w >> 16) - 1);
+	}
+	plane_offset = y_offset << 16 | x_offset;
 
 	I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
 	I915_WRITE(PLANE_POS(pipe, 0), 0);
-	I915_WRITE(PLANE_OFFSET(pipe, 0), (y << 16) | x);
-	I915_WRITE(PLANE_SIZE(pipe, 0),
-		   (intel_crtc->config->pipe_src_h - 1) << 16 |
-		   (intel_crtc->config->pipe_src_w - 1));
-	I915_WRITE(PLANE_STRIDE(pipe, 0), fb->pitches[0] / stride_div);
+	I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset);
+	I915_WRITE(PLANE_SIZE(pipe, 0), plane_size);
+	I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
 	I915_WRITE(PLANE_SURF(pipe, 0), surf_addr);
 
 	POSTING_READ(PLANE_SURF(pipe, 0));
@@ -4251,16 +4309,175 @@
 	}
 }
 
-static void skylake_pfit_enable(struct intel_crtc *crtc)
+/**
+ * skl_update_scaler_users - Stages update to crtc's scaler state
+ * @intel_crtc: crtc
+ * @crtc_state: crtc_state
+ * @plane: plane (NULL indicates crtc is requesting update)
+ * @plane_state: plane's state
+ * @force_detach: request unconditional detachment of scaler
+ *
+ * This function updates scaler state for requested plane or crtc.
+ * To request scaler usage update for a plane, caller shall pass plane pointer.
+ * To request scaler usage update for crtc, caller shall pass plane pointer
+ * as NULL.
+ *
+ * Return
+ *     0 - scaler_usage updated successfully
+ *    error - requested scaling cannot be supported or other error condition
+ */
+int
+skl_update_scaler_users(
+	struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state,
+	struct intel_plane *intel_plane, struct intel_plane_state *plane_state,
+	int force_detach)
+{
+	int need_scaling;
+	int idx;
+	int src_w, src_h, dst_w, dst_h;
+	int *scaler_id;
+	struct drm_framebuffer *fb;
+	struct intel_crtc_scaler_state *scaler_state;
+
+	if (!intel_crtc || !crtc_state)
+		return 0;
+
+	scaler_state = &crtc_state->scaler_state;
+
+	idx = intel_plane ? drm_plane_index(&intel_plane->base) : SKL_CRTC_INDEX;
+	fb = intel_plane ? plane_state->base.fb : NULL;
+
+	if (intel_plane) {
+		src_w = drm_rect_width(&plane_state->src) >> 16;
+		src_h = drm_rect_height(&plane_state->src) >> 16;
+		dst_w = drm_rect_width(&plane_state->dst);
+		dst_h = drm_rect_height(&plane_state->dst);
+		scaler_id = &plane_state->scaler_id;
+	} else {
+		struct drm_display_mode *adjusted_mode =
+			&crtc_state->base.adjusted_mode;
+		src_w = crtc_state->pipe_src_w;
+		src_h = crtc_state->pipe_src_h;
+		dst_w = adjusted_mode->hdisplay;
+		dst_h = adjusted_mode->vdisplay;
+		scaler_id = &scaler_state->scaler_id;
+	}
+	need_scaling = (src_w != dst_w || src_h != dst_h);
+
+	/*
+	 * if plane is being disabled or scaler is no more required or force detach
+	 *  - free scaler binded to this plane/crtc
+	 *  - in order to do this, update crtc->scaler_usage
+	 *
+	 * Here scaler state in crtc_state is set free so that
+	 * scaler can be assigned to other user. Actual register
+	 * update to free the scaler is done in plane/panel-fit programming.
+	 * For this purpose crtc/plane_state->scaler_id isn't reset here.
+	 */
+	if (force_detach || !need_scaling || (intel_plane &&
+		(!fb || !plane_state->visible))) {
+		if (*scaler_id >= 0) {
+			scaler_state->scaler_users &= ~(1 << idx);
+			scaler_state->scalers[*scaler_id].in_use = 0;
+
+			DRM_DEBUG_KMS("Staged freeing scaler id %d.%d from %s:%d "
+				"crtc_state = %p scaler_users = 0x%x\n",
+				intel_crtc->pipe, *scaler_id, intel_plane ? "PLANE" : "CRTC",
+				intel_plane ? intel_plane->base.base.id :
+				intel_crtc->base.base.id, crtc_state,
+				scaler_state->scaler_users);
+			*scaler_id = -1;
+		}
+		return 0;
+	}
+
+	/* range checks */
+	if (src_w < SKL_MIN_SRC_W || src_h < SKL_MIN_SRC_H ||
+		dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H ||
+
+		src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H ||
+		dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H) {
+		DRM_DEBUG_KMS("%s:%d scaler_user index %u.%u: src %ux%u dst %ux%u "
+			"size is out of scaler range\n",
+			intel_plane ? "PLANE" : "CRTC",
+			intel_plane ? intel_plane->base.base.id : intel_crtc->base.base.id,
+			intel_crtc->pipe, idx, src_w, src_h, dst_w, dst_h);
+		return -EINVAL;
+	}
+
+	/* check colorkey */
+	if (intel_plane && intel_plane->ckey.flags != I915_SET_COLORKEY_NONE) {
+		DRM_DEBUG_KMS("PLANE:%d scaling with color key not allowed",
+			intel_plane->base.base.id);
+		return -EINVAL;
+	}
+
+	/* Check src format */
+	if (intel_plane) {
+		switch (fb->pixel_format) {
+		case DRM_FORMAT_RGB565:
+		case DRM_FORMAT_XBGR8888:
+		case DRM_FORMAT_XRGB8888:
+		case DRM_FORMAT_ABGR8888:
+		case DRM_FORMAT_ARGB8888:
+		case DRM_FORMAT_XRGB2101010:
+		case DRM_FORMAT_ARGB2101010:
+		case DRM_FORMAT_XBGR2101010:
+		case DRM_FORMAT_ABGR2101010:
+		case DRM_FORMAT_YUYV:
+		case DRM_FORMAT_YVYU:
+		case DRM_FORMAT_UYVY:
+		case DRM_FORMAT_VYUY:
+			break;
+		default:
+			DRM_DEBUG_KMS("PLANE:%d FB:%d unsupported scaling format 0x%x\n",
+				intel_plane->base.base.id, fb->base.id, fb->pixel_format);
+			return -EINVAL;
+		}
+	}
+
+	/* mark this plane as a scaler user in crtc_state */
+	scaler_state->scaler_users |= (1 << idx);
+	DRM_DEBUG_KMS("%s:%d staged scaling request for %ux%u->%ux%u "
+		"crtc_state = %p scaler_users = 0x%x\n",
+		intel_plane ? "PLANE" : "CRTC",
+		intel_plane ? intel_plane->base.base.id : intel_crtc->base.base.id,
+		src_w, src_h, dst_w, dst_h, crtc_state, scaler_state->scaler_users);
+	return 0;
+}
+
+static void skylake_pfit_update(struct intel_crtc *crtc, int enable)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = crtc->pipe;
+	struct intel_crtc_scaler_state *scaler_state =
+		&crtc->config->scaler_state;
+
+	DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config);
+
+	/* To update pfit, first update scaler state */
+	skl_update_scaler_users(crtc, crtc->config, NULL, NULL, !enable);
+	intel_atomic_setup_scalers(crtc->base.dev, crtc, crtc->config);
+	skl_detach_scalers(crtc);
+	if (!enable)
+		return;
 
 	if (crtc->config->pch_pfit.enabled) {
-		I915_WRITE(PS_CTL(pipe), PS_ENABLE);
-		I915_WRITE(PS_WIN_POS(pipe), crtc->config->pch_pfit.pos);
-		I915_WRITE(PS_WIN_SZ(pipe), crtc->config->pch_pfit.size);
+		int id;
+
+		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) {
+			DRM_ERROR("Requesting pfit without getting a scaler first\n");
+			return;
+		}
+
+		id = scaler_state->scaler_id;
+		I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
+			PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
+		I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos);
+		I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size);
+
+		DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id);
 	}
 }
 
@@ -4665,7 +4882,7 @@
 	intel_ddi_enable_pipe_clock(intel_crtc);
 
 	if (IS_SKYLAKE(dev))
-		skylake_pfit_enable(intel_crtc);
+		skylake_pfit_update(intel_crtc, 1);
 	else
 		ironlake_pfit_enable(intel_crtc);
 
@@ -4701,21 +4918,6 @@
 	intel_crtc_enable_planes(crtc);
 }
 
-static void skylake_pfit_disable(struct intel_crtc *crtc)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int pipe = crtc->pipe;
-
-	/* To avoid upsetting the power well on haswell only disable the pfit if
-	 * it's in use. The hw state code will make sure we get this right. */
-	if (crtc->config->pch_pfit.enabled) {
-		I915_WRITE(PS_CTL(pipe), 0);
-		I915_WRITE(PS_WIN_POS(pipe), 0);
-		I915_WRITE(PS_WIN_SZ(pipe), 0);
-	}
-}
-
 static void ironlake_pfit_disable(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -4828,7 +5030,7 @@
 	intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder);
 
 	if (IS_SKYLAKE(dev))
-		skylake_pfit_disable(intel_crtc);
+		skylake_pfit_update(intel_crtc, 0);
 	else
 		ironlake_pfit_disable(intel_crtc);
 
@@ -5160,36 +5362,48 @@
 }
 
 /* compute the max pixel clock for new configuration */
-static int intel_mode_max_pixclk(struct drm_i915_private *dev_priv)
+static int intel_mode_max_pixclk(struct drm_atomic_state *state)
 {
-	struct drm_device *dev = dev_priv->dev;
+	struct drm_device *dev = state->dev;
 	struct intel_crtc *intel_crtc;
+	struct intel_crtc_state *crtc_state;
 	int max_pixclk = 0;
 
 	for_each_intel_crtc(dev, intel_crtc) {
-		if (intel_crtc->new_enabled)
-			max_pixclk = max(max_pixclk,
-					 intel_crtc->new_config->base.adjusted_mode.crtc_clock);
+		crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+
+		if (!crtc_state->base.enable)
+			continue;
+
+		max_pixclk = max(max_pixclk,
+				 crtc_state->base.adjusted_mode.crtc_clock);
 	}
 
 	return max_pixclk;
 }
 
-static void valleyview_modeset_global_pipes(struct drm_device *dev,
+static int valleyview_modeset_global_pipes(struct drm_atomic_state *state,
 					    unsigned *prepare_pipes)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(state->dev);
 	struct intel_crtc *intel_crtc;
-	int max_pixclk = intel_mode_max_pixclk(dev_priv);
+	int max_pixclk = intel_mode_max_pixclk(state);
+
+	if (max_pixclk < 0)
+		return max_pixclk;
 
 	if (valleyview_calc_cdclk(dev_priv, max_pixclk) ==
 	    dev_priv->vlv_cdclk_freq)
-		return;
+		return 0;
 
 	/* disable/enable all currently active pipes while we change cdclk */
-	for_each_intel_crtc(dev, intel_crtc)
+	for_each_intel_crtc(state->dev, intel_crtc)
 		if (intel_crtc->base.state->enable)
 			*prepare_pipes |= (1 << intel_crtc->pipe);
+
+	return 0;
 }
 
 static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv)
@@ -5232,8 +5446,18 @@
 {
 	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int max_pixclk = intel_mode_max_pixclk(dev_priv);
-	int req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk);
+	int max_pixclk = intel_mode_max_pixclk(state);
+	int req_cdclk;
+
+	/* The only reason this can fail is if we fail to add the crtc_state
+	 * to the atomic state. But that can't happen since the call to
+	 * intel_mode_max_pixclk() in valleyview_modeset_global_pipes() (which
+	 * can't have failed otherwise the mode set would be aborted) added all
+	 * the states already. */
+	if (WARN_ON(max_pixclk < 0))
+		return;
+
+	req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk);
 
 	if (req_cdclk != dev_priv->vlv_cdclk_freq) {
 		/*
@@ -5554,7 +5778,7 @@
 	dev_priv->display.crtc_disable(crtc);
 	dev_priv->display.off(crtc);
 
-	crtc->primary->funcs->disable_plane(crtc->primary);
+	drm_plane_helper_disable(crtc->primary);
 
 	/* Update computed state. */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
@@ -5636,6 +5860,34 @@
 	}
 }
 
+int intel_connector_init(struct intel_connector *connector)
+{
+	struct drm_connector_state *connector_state;
+
+	connector_state = kzalloc(sizeof *connector_state, GFP_KERNEL);
+	if (!connector_state)
+		return -ENOMEM;
+
+	connector->base.state = connector_state;
+	return 0;
+}
+
+struct intel_connector *intel_connector_alloc(void)
+{
+	struct intel_connector *connector;
+
+	connector = kzalloc(sizeof *connector, GFP_KERNEL);
+	if (!connector)
+		return NULL;
+
+	if (intel_connector_init(connector) < 0) {
+		kfree(connector);
+		return NULL;
+	}
+
+	return connector;
+}
+
 /* Even simpler default implementation, if there's really no special case to
  * consider. */
 void intel_connector_dpms(struct drm_connector *connector, int mode)
@@ -5667,65 +5919,80 @@
 	return encoder->get_hw_state(encoder, &pipe);
 }
 
-static int pipe_required_fdi_lanes(struct drm_device *dev, enum pipe pipe)
+static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc =
-		to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
-
-	if (crtc->base.state->enable &&
-	    crtc->config->has_pch_encoder)
-		return crtc->config->fdi_lanes;
+	if (crtc_state->base.enable && crtc_state->has_pch_encoder)
+		return crtc_state->fdi_lanes;
 
 	return 0;
 }
 
-static bool ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
+static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
 				     struct intel_crtc_state *pipe_config)
 {
+	struct drm_atomic_state *state = pipe_config->base.state;
+	struct intel_crtc *other_crtc;
+	struct intel_crtc_state *other_crtc_state;
+
 	DRM_DEBUG_KMS("checking fdi config on pipe %c, lanes %i\n",
 		      pipe_name(pipe), pipe_config->fdi_lanes);
 	if (pipe_config->fdi_lanes > 4) {
 		DRM_DEBUG_KMS("invalid fdi lane config on pipe %c: %i lanes\n",
 			      pipe_name(pipe), pipe_config->fdi_lanes);
-		return false;
+		return -EINVAL;
 	}
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		if (pipe_config->fdi_lanes > 2) {
 			DRM_DEBUG_KMS("only 2 lanes on haswell, required: %i lanes\n",
 				      pipe_config->fdi_lanes);
-			return false;
+			return -EINVAL;
 		} else {
-			return true;
+			return 0;
 		}
 	}
 
 	if (INTEL_INFO(dev)->num_pipes == 2)
-		return true;
+		return 0;
 
 	/* Ivybridge 3 pipe is really complicated */
 	switch (pipe) {
 	case PIPE_A:
-		return true;
+		return 0;
 	case PIPE_B:
-		if (pipe_config->fdi_lanes > 2 &&
-		    pipe_required_fdi_lanes(dev, PIPE_C) > 0) {
+		if (pipe_config->fdi_lanes <= 2)
+			return 0;
+
+		other_crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, PIPE_C));
+		other_crtc_state =
+			intel_atomic_get_crtc_state(state, other_crtc);
+		if (IS_ERR(other_crtc_state))
+			return PTR_ERR(other_crtc_state);
+
+		if (pipe_required_fdi_lanes(other_crtc_state) > 0) {
 			DRM_DEBUG_KMS("invalid shared fdi lane config on pipe %c: %i lanes\n",
 				      pipe_name(pipe), pipe_config->fdi_lanes);
-			return false;
+			return -EINVAL;
 		}
-		return true;
+		return 0;
 	case PIPE_C:
 		if (pipe_config->fdi_lanes > 2) {
 			DRM_DEBUG_KMS("only 2 lanes on pipe %c: required %i lanes\n",
 				      pipe_name(pipe), pipe_config->fdi_lanes);
-			return false;
+			return -EINVAL;
 		}
-		if (pipe_required_fdi_lanes(dev, PIPE_B) > 2) {
+
+		other_crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, PIPE_B));
+		other_crtc_state =
+			intel_atomic_get_crtc_state(state, other_crtc);
+		if (IS_ERR(other_crtc_state))
+			return PTR_ERR(other_crtc_state);
+
+		if (pipe_required_fdi_lanes(other_crtc_state) > 2) {
 			DRM_DEBUG_KMS("fdi link B uses too many lanes to enable link C\n");
-			return false;
+			return -EINVAL;
 		}
-		return true;
+		return 0;
 	default:
 		BUG();
 	}
@@ -5737,8 +6004,8 @@
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	int lane, link_bw, fdi_dotclock;
-	bool setup_ok, needs_recompute = false;
+	int lane, link_bw, fdi_dotclock, ret;
+	bool needs_recompute = false;
 
 retry:
 	/* FDI is a binary signal running at ~2.7GHz, encoding
@@ -5760,9 +6027,9 @@
 	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
 			       link_bw, &pipe_config->fdi_m_n);
 
-	setup_ok = ironlake_check_fdi_lanes(intel_crtc->base.dev,
-					    intel_crtc->pipe, pipe_config);
-	if (!setup_ok && pipe_config->pipe_bpp > 6*3) {
+	ret = ironlake_check_fdi_lanes(intel_crtc->base.dev,
+				       intel_crtc->pipe, pipe_config);
+	if (ret == -EINVAL && pipe_config->pipe_bpp > 6*3) {
 		pipe_config->pipe_bpp -= 2*3;
 		DRM_DEBUG_KMS("fdi link bw constraint, reducing pipe bpp to %i\n",
 			      pipe_config->pipe_bpp);
@@ -5775,7 +6042,7 @@
 	if (needs_recompute)
 		return RETRY;
 
-	return setup_ok ? 0 : -EINVAL;
+	return ret;
 }
 
 static void hsw_compute_ips_config(struct intel_crtc *crtc,
@@ -5792,6 +6059,7 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	int ret;
 
 	/* FIXME should check pixel clock limits on all platforms */
 	if (INTEL_INFO(dev)->gen < 4) {
@@ -5846,7 +6114,101 @@
 	if (pipe_config->has_pch_encoder)
 		return ironlake_fdi_compute_config(crtc, pipe_config);
 
-	return 0;
+	/* FIXME: remove below call once atomic mode set is place and all crtc
+	 * related checks called from atomic_crtc_check function */
+	ret = 0;
+	DRM_DEBUG_KMS("intel_crtc = %p drm_state (pipe_config->base.state) = %p\n",
+		crtc, pipe_config->base.state);
+	ret = intel_atomic_setup_scalers(dev, crtc, pipe_config);
+
+	return ret;
+}
+
+static int skylake_get_display_clock_speed(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
+	uint32_t cdctl = I915_READ(CDCLK_CTL);
+	uint32_t linkrate;
+
+	if (!(lcpll1 & LCPLL_PLL_ENABLE)) {
+		WARN(1, "LCPLL1 not enabled\n");
+		return 24000; /* 24MHz is the cd freq with NSSC ref */
+	}
+
+	if ((cdctl & CDCLK_FREQ_SEL_MASK) == CDCLK_FREQ_540)
+		return 540000;
+
+	linkrate = (I915_READ(DPLL_CTRL1) &
+		    DPLL_CRTL1_LINK_RATE_MASK(SKL_DPLL0)) >> 1;
+
+	if (linkrate == DPLL_CRTL1_LINK_RATE_2160 ||
+	    linkrate == DPLL_CRTL1_LINK_RATE_1080) {
+		/* vco 8640 */
+		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
+		case CDCLK_FREQ_450_432:
+			return 432000;
+		case CDCLK_FREQ_337_308:
+			return 308570;
+		case CDCLK_FREQ_675_617:
+			return 617140;
+		default:
+			WARN(1, "Unknown cd freq selection\n");
+		}
+	} else {
+		/* vco 8100 */
+		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
+		case CDCLK_FREQ_450_432:
+			return 450000;
+		case CDCLK_FREQ_337_308:
+			return 337500;
+		case CDCLK_FREQ_675_617:
+			return 675000;
+		default:
+			WARN(1, "Unknown cd freq selection\n");
+		}
+	}
+
+	/* error case, do as if DPLL0 isn't enabled */
+	return 24000;
+}
+
+static int broadwell_get_display_clock_speed(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t lcpll = I915_READ(LCPLL_CTL);
+	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
+
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
+		return 800000;
+	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_450)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_54O_BDW)
+		return 540000;
+	else if (freq == LCPLL_CLK_FREQ_337_5_BDW)
+		return 337500;
+	else
+		return 675000;
+}
+
+static int haswell_get_display_clock_speed(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t lcpll = I915_READ(LCPLL_CTL);
+	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
+
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
+		return 800000;
+	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_450)
+		return 450000;
+	else if (IS_HSW_ULT(dev))
+		return 337500;
+	else
+		return 540000;
 }
 
 static int valleyview_get_display_clock_speed(struct drm_device *dev)
@@ -5871,6 +6233,11 @@
 	return DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, divider + 1);
 }
 
+static int ilk_get_display_clock_speed(struct drm_device *dev)
+{
+	return 450000;
+}
+
 static int i945_get_display_clock_speed(struct drm_device *dev)
 {
 	return 400000;
@@ -5878,7 +6245,7 @@
 
 static int i915_get_display_clock_speed(struct drm_device *dev)
 {
-	return 333000;
+	return 333333;
 }
 
 static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
@@ -5894,19 +6261,19 @@
 
 	switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
 	case GC_DISPLAY_CLOCK_267_MHZ_PNV:
-		return 267000;
+		return 266667;
 	case GC_DISPLAY_CLOCK_333_MHZ_PNV:
-		return 333000;
+		return 333333;
 	case GC_DISPLAY_CLOCK_444_MHZ_PNV:
-		return 444000;
+		return 444444;
 	case GC_DISPLAY_CLOCK_200_MHZ_PNV:
 		return 200000;
 	default:
 		DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc);
 	case GC_DISPLAY_CLOCK_133_MHZ_PNV:
-		return 133000;
+		return 133333;
 	case GC_DISPLAY_CLOCK_167_MHZ_PNV:
-		return 167000;
+		return 166667;
 	}
 }
 
@@ -5917,11 +6284,11 @@
 	pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
 
 	if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
-		return 133000;
+		return 133333;
 	else {
 		switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
 		case GC_DISPLAY_CLOCK_333_MHZ:
-			return 333000;
+			return 333333;
 		default:
 		case GC_DISPLAY_CLOCK_190_200_MHZ:
 			return 190000;
@@ -5931,7 +6298,7 @@
 
 static int i865_get_display_clock_speed(struct drm_device *dev)
 {
-	return 266000;
+	return 266667;
 }
 
 static int i855_get_display_clock_speed(struct drm_device *dev)
@@ -5947,7 +6314,7 @@
 	case GC_CLOCK_166_250:
 		return 250000;
 	case GC_CLOCK_100_133:
-		return 133000;
+		return 133333;
 	}
 
 	/* Shouldn't happen */
@@ -5956,7 +6323,7 @@
 
 static int i830_get_display_clock_speed(struct drm_device *dev)
 {
-	return 133000;
+	return 133333;
 }
 
 static void
@@ -7952,14 +8319,28 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t tmp;
+	struct intel_crtc_scaler_state *scaler_state = &pipe_config->scaler_state;
+	uint32_t ps_ctrl = 0;
+	int id = -1;
+	int i;
 
-	tmp = I915_READ(PS_CTL(crtc->pipe));
+	/* find scaler attached to this pipe */
+	for (i = 0; i < crtc->num_scalers; i++) {
+		ps_ctrl = I915_READ(SKL_PS_CTRL(crtc->pipe, i));
+		if (ps_ctrl & PS_SCALER_EN && !(ps_ctrl & PS_PLANE_SEL_MASK)) {
+			id = i;
+			pipe_config->pch_pfit.enabled = true;
+			pipe_config->pch_pfit.pos = I915_READ(SKL_PS_WIN_POS(crtc->pipe, i));
+			pipe_config->pch_pfit.size = I915_READ(SKL_PS_WIN_SZ(crtc->pipe, i));
+			break;
+		}
+	}
 
-	if (tmp & PS_ENABLE) {
-		pipe_config->pch_pfit.enabled = true;
-		pipe_config->pch_pfit.pos = I915_READ(PS_WIN_POS(crtc->pipe));
-		pipe_config->pch_pfit.size = I915_READ(PS_WIN_SZ(crtc->pipe));
+	scaler_state->scaler_id = id;
+	if (id >= 0) {
+		scaler_state->scaler_users |= (1 << SKL_CRTC_INDEX);
+	} else {
+		scaler_state->scaler_users &= ~(1 << SKL_CRTC_INDEX);
 	}
 }
 
@@ -8593,12 +8974,19 @@
 
 	intel_get_pipe_timings(crtc, pipe_config);
 
+	if (INTEL_INFO(dev)->gen >= 9) {
+		skl_init_scalers(dev, crtc, pipe_config);
+	}
+
 	pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
 	if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
 		if (IS_SKYLAKE(dev))
 			skylake_get_pfit_config(crtc, pipe_config);
 		else
 			ironlake_get_pfit_config(crtc, pipe_config);
+	} else {
+		pipe_config->scaler_state.scaler_id = -1;
+		pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
 	}
 
 	if (IS_HASWELL(dev))
@@ -9042,7 +9430,6 @@
 
 	intel_crtc = to_intel_crtc(crtc);
 	intel_crtc->new_enabled = true;
-	intel_crtc->new_config = intel_crtc->config;
 	old->dpms_mode = connector->dpms;
 	old->load_detect_temp = true;
 	old->release_fb = NULL;
@@ -9098,10 +9485,6 @@
 
  fail:
 	intel_crtc->new_enabled = crtc->state->enable;
-	if (intel_crtc->new_enabled)
-		intel_crtc->new_config = intel_crtc->config;
-	else
-		intel_crtc->new_config = NULL;
 fail_unlock:
 	if (state) {
 		drm_atomic_state_free(state);
@@ -9147,7 +9530,6 @@
 		to_intel_connector(connector)->new_encoder = NULL;
 		intel_encoder->new_crtc = NULL;
 		intel_crtc->new_enabled = false;
-		intel_crtc->new_config = NULL;
 
 		connector_state->best_encoder = NULL;
 		connector_state->crtc = NULL;
@@ -10057,6 +10439,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_unpin_work *work;
 
 	WARN_ON(!in_interrupt());
 
@@ -10064,12 +10447,16 @@
 		return;
 
 	spin_lock(&dev->event_lock);
-	if (intel_crtc->unpin_work && __intel_pageflip_stall_check(dev, crtc)) {
+	work = intel_crtc->unpin_work;
+	if (work != NULL && __intel_pageflip_stall_check(dev, crtc)) {
 		WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now %d\n",
-			 intel_crtc->unpin_work->flip_queued_vblank,
-			 drm_vblank_count(dev, pipe));
+			 work->flip_queued_vblank, drm_vblank_count(dev, pipe));
 		page_flip_completed(intel_crtc);
+		work = NULL;
 	}
+	if (work != NULL &&
+	    drm_vblank_count(dev, pipe) - work->flip_queued_vblank > 1)
+		intel_queue_rps_boost_for_request(dev, work->flip_queued_req);
 	spin_unlock(&dev->event_lock);
 }
 
@@ -10087,6 +10474,7 @@
 	enum pipe pipe = intel_crtc->pipe;
 	struct intel_unpin_work *work;
 	struct intel_engine_cs *ring;
+	bool mmio_flip;
 	int ret;
 
 	/*
@@ -10184,15 +10572,23 @@
 		ring = &dev_priv->ring[RCS];
 	}
 
+	mmio_flip = use_mmio_flip(ring, obj);
+
+	/* When using CS flips, we want to emit semaphores between rings.
+	 * However, when using mmio flips we will create a task to do the
+	 * synchronisation, so all we want here is to pin the framebuffer
+	 * into the display plane and skip any waits.
+	 */
 	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
-					 crtc->primary->state, ring);
+					 crtc->primary->state,
+					 mmio_flip ? i915_gem_request_get_ring(obj->last_read_req) : ring);
 	if (ret)
 		goto cleanup_pending;
 
 	work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary), obj)
 						  + intel_crtc->dspaddr_offset;
 
-	if (use_mmio_flip(ring, obj)) {
+	if (mmio_flip) {
 		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring,
 					    page_flip_flags);
 		if (ret)
@@ -10287,11 +10683,6 @@
 
 	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = crtc->base.state->enable;
-
-		if (crtc->new_enabled)
-			crtc->new_config = crtc->config;
-		else
-			crtc->new_config = NULL;
 	}
 }
 
@@ -10449,8 +10840,14 @@
 				   struct intel_crtc_state *pipe_config,
 				   const char *context)
 {
-	DRM_DEBUG_KMS("[CRTC:%d]%s config for pipe %c\n", crtc->base.base.id,
-		      context, pipe_name(crtc->pipe));
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_plane *plane;
+	struct intel_plane *intel_plane;
+	struct intel_plane_state *state;
+	struct drm_framebuffer *fb;
+
+	DRM_DEBUG_KMS("[CRTC:%d]%s config %p for pipe %c\n", crtc->base.base.id,
+		      context, pipe_config, pipe_name(crtc->pipe));
 
 	DRM_DEBUG_KMS("cpu_transcoder: %c\n", transcoder_name(pipe_config->cpu_transcoder));
 	DRM_DEBUG_KMS("pipe bpp: %i, dithering: %i\n",
@@ -10487,6 +10884,9 @@
 	DRM_DEBUG_KMS("port clock: %d\n", pipe_config->port_clock);
 	DRM_DEBUG_KMS("pipe src size: %dx%d\n",
 		      pipe_config->pipe_src_w, pipe_config->pipe_src_h);
+	DRM_DEBUG_KMS("num_scalers: %d\n", crtc->num_scalers);
+	DRM_DEBUG_KMS("scaler_users: 0x%x\n", pipe_config->scaler_state.scaler_users);
+	DRM_DEBUG_KMS("scaler id: %d\n", pipe_config->scaler_state.scaler_id);
 	DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n",
 		      pipe_config->gmch_pfit.control,
 		      pipe_config->gmch_pfit.pgm_ratios,
@@ -10497,6 +10897,40 @@
 		      pipe_config->pch_pfit.enabled ? "enabled" : "disabled");
 	DRM_DEBUG_KMS("ips: %i\n", pipe_config->ips_enabled);
 	DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
+
+	DRM_DEBUG_KMS("planes on this crtc\n");
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		intel_plane = to_intel_plane(plane);
+		if (intel_plane->pipe != crtc->pipe)
+			continue;
+
+		state = to_intel_plane_state(plane->state);
+		fb = state->base.fb;
+		if (!fb) {
+			DRM_DEBUG_KMS("%s PLANE:%d plane: %u.%u idx: %d "
+				"disabled, scaler_id = %d\n",
+				plane->type == DRM_PLANE_TYPE_CURSOR ? "CURSOR" : "STANDARD",
+				plane->base.id, intel_plane->pipe,
+				(crtc->base.primary == plane) ? 0 : intel_plane->plane + 1,
+				drm_plane_index(plane), state->scaler_id);
+			continue;
+		}
+
+		DRM_DEBUG_KMS("%s PLANE:%d plane: %u.%u idx: %d enabled",
+			plane->type == DRM_PLANE_TYPE_CURSOR ? "CURSOR" : "STANDARD",
+			plane->base.id, intel_plane->pipe,
+			crtc->base.primary == plane ? 0 : intel_plane->plane + 1,
+			drm_plane_index(plane));
+		DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = 0x%x",
+			fb->base.id, fb->width, fb->height, fb->pixel_format);
+		DRM_DEBUG_KMS("\tscaler:%d src (%u, %u) %ux%u dst (%u, %u) %ux%u\n",
+			state->scaler_id,
+			state->src.x1 >> 16, state->src.y1 >> 16,
+			drm_rect_width(&state->src) >> 16,
+			drm_rect_height(&state->src) >> 16,
+			state->dst.x1, state->dst.y1,
+			drm_rect_width(&state->dst), drm_rect_height(&state->dst));
+	}
 }
 
 static bool encoders_cloneable(const struct intel_encoder *a,
@@ -10507,16 +10941,24 @@
 			  b->cloneable & (1 << a->type));
 }
 
-static bool check_single_encoder_cloning(struct intel_crtc *crtc,
+static bool check_single_encoder_cloning(struct drm_atomic_state *state,
+					 struct intel_crtc *crtc,
 					 struct intel_encoder *encoder)
 {
-	struct drm_device *dev = crtc->base.dev;
 	struct intel_encoder *source_encoder;
+	struct drm_connector_state *connector_state;
+	int i;
 
-	for_each_intel_encoder(dev, source_encoder) {
-		if (source_encoder->new_crtc != crtc)
+	for (i = 0; i < state->num_connector; i++) {
+		if (!state->connectors[i])
 			continue;
 
+		connector_state = state->connector_states[i];
+		if (connector_state->crtc != &crtc->base)
+			continue;
+
+		source_encoder =
+			to_intel_encoder(connector_state->best_encoder);
 		if (!encoders_cloneable(encoder, source_encoder))
 			return false;
 	}
@@ -10524,39 +10966,53 @@
 	return true;
 }
 
-static bool check_encoder_cloning(struct intel_crtc *crtc)
+static bool check_encoder_cloning(struct drm_atomic_state *state,
+				  struct intel_crtc *crtc)
 {
-	struct drm_device *dev = crtc->base.dev;
 	struct intel_encoder *encoder;
+	struct drm_connector_state *connector_state;
+	int i;
 
-	for_each_intel_encoder(dev, encoder) {
-		if (encoder->new_crtc != crtc)
+	for (i = 0; i < state->num_connector; i++) {
+		if (!state->connectors[i])
 			continue;
 
-		if (!check_single_encoder_cloning(crtc, encoder))
+		connector_state = state->connector_states[i];
+		if (connector_state->crtc != &crtc->base)
+			continue;
+
+		encoder = to_intel_encoder(connector_state->best_encoder);
+		if (!check_single_encoder_cloning(state, crtc, encoder))
 			return false;
 	}
 
 	return true;
 }
 
-static bool check_digital_port_conflicts(struct drm_device *dev)
+static bool check_digital_port_conflicts(struct drm_atomic_state *state)
 {
-	struct intel_connector *connector;
+	struct drm_device *dev = state->dev;
+	struct intel_encoder *encoder;
+	struct drm_connector_state *connector_state;
 	unsigned int used_ports = 0;
+	int i;
 
 	/*
 	 * Walk the connector list instead of the encoder
 	 * list to detect the problem on ddi platforms
 	 * where there's just one encoder per digital port.
 	 */
-	for_each_intel_connector(dev, connector) {
-		struct intel_encoder *encoder = connector->new_encoder;
-
-		if (!encoder)
+	for (i = 0; i < state->num_connector; i++) {
+		if (!state->connectors[i])
 			continue;
 
-		WARN_ON(!encoder->new_crtc);
+		connector_state = state->connector_states[i];
+		if (!connector_state->best_encoder)
+			continue;
+
+		encoder = to_intel_encoder(connector_state->best_encoder);
+
+		WARN_ON(!connector_state->crtc);
 
 		switch (encoder->type) {
 			unsigned int port_mask;
@@ -10585,11 +11041,14 @@
 clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 {
 	struct drm_crtc_state tmp_state;
+	struct intel_crtc_scaler_state scaler_state;
 
-	/* Clear only the intel specific part of the crtc state */
+	/* Clear only the intel specific part of the crtc state excluding scalers */
 	tmp_state = crtc_state->base;
+	scaler_state = crtc_state->scaler_state;
 	memset(crtc_state, 0, sizeof *crtc_state);
 	crtc_state->base = tmp_state;
+	crtc_state->scaler_state = scaler_state;
 }
 
 static struct intel_crtc_state *
@@ -10598,7 +11057,6 @@
 			  struct drm_display_mode *mode,
 			  struct drm_atomic_state *state)
 {
-	struct drm_device *dev = crtc->dev;
 	struct intel_encoder *encoder;
 	struct intel_connector *connector;
 	struct drm_connector_state *connector_state;
@@ -10607,12 +11065,12 @@
 	int i;
 	bool retry = true;
 
-	if (!check_encoder_cloning(to_intel_crtc(crtc))) {
+	if (!check_encoder_cloning(state, to_intel_crtc(crtc))) {
 		DRM_DEBUG_KMS("rejecting invalid cloning configuration\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (!check_digital_port_conflicts(dev)) {
+	if (!check_digital_port_conflicts(state)) {
 		DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -10852,9 +11310,6 @@
 	/* Double check state. */
 	for_each_intel_crtc(dev, intel_crtc) {
 		WARN_ON(intel_crtc->base.state->enable != intel_crtc_in_use(&intel_crtc->base));
-		WARN_ON(intel_crtc->new_config &&
-			intel_crtc->new_config != intel_crtc->config);
-		WARN_ON(intel_crtc->base.state->enable != !!intel_crtc->new_config);
 	}
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
@@ -11059,6 +11514,8 @@
 		PIPE_CONF_CHECK_I(pch_pfit.size);
 	}
 
+	PIPE_CONF_CHECK_I(scaler_state.scaler_id);
+
 	/* BDW+ don't expose a synchronous way to read the state */
 	if (IS_HASWELL(dev))
 		PIPE_CONF_CHECK_I(ips_enabled);
@@ -11443,6 +11900,8 @@
 		if (IS_ERR(pipe_config))
 			return pipe_config;
 
+		pipe_config->base.enable = true;
+
 		intel_dump_pipe_config(to_intel_crtc(crtc), pipe_config,
 				       "[modeset]");
 	}
@@ -11450,10 +11909,11 @@
 	return intel_atomic_get_crtc_state(state, to_intel_crtc(crtc));;
 }
 
-static int __intel_set_mode_setup_plls(struct drm_device *dev,
+static int __intel_set_mode_setup_plls(struct drm_atomic_state *state,
 				       unsigned modeset_pipes,
 				       unsigned disable_pipes)
 {
+	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	unsigned clear_pipes = modeset_pipes | disable_pipes;
 	struct intel_crtc *intel_crtc;
@@ -11467,9 +11927,15 @@
 		goto done;
 
 	for_each_intel_crtc_masked(dev, modeset_pipes, intel_crtc) {
-		struct intel_crtc_state *state = intel_crtc->new_config;
+		struct intel_crtc_state *crtc_state =
+			intel_atomic_get_crtc_state(state, intel_crtc);
+
+		/* Modeset pipes should have a new state by now */
+		if (WARN_ON(IS_ERR(crtc_state)))
+			continue;
+
 		ret = dev_priv->display.crtc_compute_clock(intel_crtc,
-							   state);
+							   crtc_state);
 		if (ret) {
 			intel_shared_dpll_abort_config(dev_priv);
 			goto done;
@@ -11491,6 +11957,7 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_display_mode *saved_mode;
+	struct drm_atomic_state *state = pipe_config->base.state;
 	struct intel_crtc_state *crtc_state_copy = NULL;
 	struct intel_crtc *intel_crtc;
 	int ret = 0;
@@ -11507,9 +11974,6 @@
 
 	*saved_mode = crtc->mode;
 
-	if (modeset_pipes)
-		to_intel_crtc(crtc)->new_config = pipe_config;
-
 	/*
 	 * See if the config requires any additional preparation, e.g.
 	 * to adjust global state with pipes off.  We need to do this
@@ -11518,13 +11982,15 @@
 	 * adjusted_mode bits in the crtc directly.
 	 */
 	if (IS_VALLEYVIEW(dev)) {
-		valleyview_modeset_global_pipes(dev, &prepare_pipes);
+		ret = valleyview_modeset_global_pipes(state, &prepare_pipes);
+		if (ret)
+			goto done;
 
 		/* may have added more to prepare_pipes than we should */
 		prepare_pipes &= ~disable_pipes;
 	}
 
-	ret = __intel_set_mode_setup_plls(dev, modeset_pipes, disable_pipes);
+	ret = __intel_set_mode_setup_plls(state, modeset_pipes, disable_pipes);
 	if (ret)
 		goto done;
 
@@ -11562,21 +12028,18 @@
 	 * update the the output configuration. */
 	intel_modeset_update_state(dev, prepare_pipes);
 
-	modeset_update_crtc_power_domains(pipe_config->base.state);
+	modeset_update_crtc_power_domains(state);
 
-	/* Set up the DPLL and any encoders state that needs to adjust or depend
-	 * on the DPLL.
-	 */
 	for_each_intel_crtc_masked(dev, modeset_pipes, intel_crtc) {
 		struct drm_plane *primary = intel_crtc->base.primary;
 		int vdisplay, hdisplay;
 
 		drm_crtc_get_hv_timing(mode, &hdisplay, &vdisplay);
-		ret = primary->funcs->update_plane(primary, &intel_crtc->base,
-						   fb, 0, 0,
-						   hdisplay, vdisplay,
-						   x << 16, y << 16,
-						   hdisplay << 16, vdisplay << 16);
+		ret = drm_plane_helper_update(primary, &intel_crtc->base,
+					      fb, 0, 0,
+					      hdisplay, vdisplay,
+					      x << 16, y << 16,
+					      hdisplay << 16, vdisplay << 16);
 	}
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
@@ -11600,9 +12063,6 @@
 		       sizeof *crtc_state_copy);
 		intel_crtc->config = crtc_state_copy;
 		intel_crtc->base.state = &crtc_state_copy->base;
-
-		if (modeset_pipes)
-			intel_crtc->new_config = intel_crtc->config;
 	} else {
 		kfree(crtc_state_copy);
 	}
@@ -11781,11 +12241,6 @@
 	count = 0;
 	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = config->save_crtc_enabled[count++];
-
-		if (crtc->new_enabled)
-			crtc->new_config = crtc->config;
-		else
-			crtc->new_config = NULL;
 	}
 
 	count = 0;
@@ -11993,6 +12448,7 @@
 				connector->encoder = connector->new_encoder;
 		} else {
 			connector_state->crtc = NULL;
+			connector_state->best_encoder = NULL;
 		}
 	}
 	for_each_intel_crtc(dev, crtc) {
@@ -12011,11 +12467,6 @@
 				      crtc->new_enabled ? "en" : "dis");
 			config->mode_changed = true;
 		}
-
-		if (crtc->new_enabled)
-			crtc->new_config = crtc->config;
-		else
-			crtc->new_config = NULL;
 	}
 
 	return 0;
@@ -12042,7 +12493,6 @@
 	}
 
 	crtc->new_enabled = false;
-	crtc->new_config = NULL;
 }
 
 static int intel_crtc_set_config(struct drm_mode_set *set)
@@ -12140,10 +12590,10 @@
 		int vdisplay, hdisplay;
 
 		drm_crtc_get_hv_timing(set->mode, &hdisplay, &vdisplay);
-		ret = primary->funcs->update_plane(primary, set->crtc, set->fb,
-						   0, 0, hdisplay, vdisplay,
-						   set->x << 16, set->y << 16,
-						   hdisplay << 16, vdisplay << 16);
+		ret = drm_plane_helper_update(primary, set->crtc, set->fb,
+					      0, 0, hdisplay, vdisplay,
+					      set->x << 16, set->y << 16,
+					      hdisplay << 16, vdisplay << 16);
 
 		/*
 		 * We need to make sure the primary plane is re-enabled if it
@@ -12428,16 +12878,21 @@
 	struct drm_rect *dest = &state->dst;
 	struct drm_rect *src = &state->src;
 	const struct drm_rect *clip = &state->clip;
+	bool can_position = false;
 	int ret;
 
 	crtc = crtc ? crtc : plane->crtc;
 	intel_crtc = to_intel_crtc(crtc);
 
+	if (INTEL_INFO(dev)->gen >= 9)
+		can_position = true;
+
 	ret = drm_plane_helper_check_update(plane, crtc, fb,
 					    src, dest, clip,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    DRM_PLANE_HELPER_NO_SCALING,
-					    false, true, &state->visible);
+					    can_position, true,
+					    &state->visible);
 	if (ret)
 		return ret;
 
@@ -12627,8 +13082,8 @@
 }
 
 const struct drm_plane_funcs intel_plane_funcs = {
-	.update_plane = drm_plane_helper_update,
-	.disable_plane = drm_plane_helper_disable,
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
 	.set_property = drm_atomic_helper_plane_set_property,
 	.atomic_get_property = intel_plane_atomic_get_property,
@@ -12659,10 +13114,12 @@
 
 	primary->can_scale = false;
 	primary->max_downscale = 1;
+	state->scaler_id = -1;
 	primary->pipe = pipe;
 	primary->plane = pipe;
 	primary->check_plane = intel_check_primary_plane;
 	primary->commit_plane = intel_commit_primary_plane;
+	primary->ckey.flags = I915_SET_COLORKEY_NONE;
 	if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4)
 		primary->plane = !pipe;
 
@@ -12679,23 +13136,32 @@
 				 intel_primary_formats, num_formats,
 				 DRM_PLANE_TYPE_PRIMARY);
 
-	if (INTEL_INFO(dev)->gen >= 4) {
-		if (!dev->mode_config.rotation_property)
-			dev->mode_config.rotation_property =
-				drm_mode_create_rotation_property(dev,
-							BIT(DRM_ROTATE_0) |
-							BIT(DRM_ROTATE_180));
-		if (dev->mode_config.rotation_property)
-			drm_object_attach_property(&primary->base.base,
-				dev->mode_config.rotation_property,
-				state->base.rotation);
-	}
+	if (INTEL_INFO(dev)->gen >= 4)
+		intel_create_rotation_property(dev, primary);
 
 	drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs);
 
 	return &primary->base;
 }
 
+void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane)
+{
+	if (!dev->mode_config.rotation_property) {
+		unsigned long flags = BIT(DRM_ROTATE_0) |
+			BIT(DRM_ROTATE_180);
+
+		if (INTEL_INFO(dev)->gen >= 9)
+			flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270);
+
+		dev->mode_config.rotation_property =
+			drm_mode_create_rotation_property(dev, flags);
+	}
+	if (dev->mode_config.rotation_property)
+		drm_object_attach_property(&plane->base.base,
+				dev->mode_config.rotation_property,
+				plane->base.state->rotation);
+}
+
 static int
 intel_check_cursor_plane(struct drm_plane *plane,
 			 struct intel_plane_state *state)
@@ -12813,6 +13279,7 @@
 	cursor->max_downscale = 1;
 	cursor->pipe = pipe;
 	cursor->plane = pipe;
+	state->scaler_id = -1;
 	cursor->check_plane = intel_check_cursor_plane;
 	cursor->commit_plane = intel_commit_cursor_plane;
 
@@ -12839,6 +13306,24 @@
 	return &cursor->base;
 }
 
+static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state)
+{
+	int i;
+	struct intel_scaler *intel_scaler;
+	struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state;
+
+	for (i = 0; i < intel_crtc->num_scalers; i++) {
+		intel_scaler = &scaler_state->scalers[i];
+		intel_scaler->in_use = 0;
+		intel_scaler->id = i;
+
+		intel_scaler->mode = PS_SCALER_MODE_DYN;
+	}
+
+	scaler_state->scaler_id = -1;
+}
+
 static void intel_crtc_init(struct drm_device *dev, int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -12858,6 +13343,16 @@
 	intel_crtc_set_state(intel_crtc, crtc_state);
 	crtc_state->base.crtc = &intel_crtc->base;
 
+	/* initialize shared scalers */
+	if (INTEL_INFO(dev)->gen >= 9) {
+		if (pipe == PIPE_C)
+			intel_crtc->num_scalers = 1;
+		else
+			intel_crtc->num_scalers = SKL_NUM_SCALERS;
+
+		skl_init_scalers(dev, intel_crtc, crtc_state);
+	}
+
 	primary = intel_primary_plane_create(dev, pipe);
 	if (!primary)
 		goto fail;
@@ -13003,7 +13498,6 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_encoder *encoder;
-	struct drm_connector *connector;
 	bool dpd_is_edp = false;
 
 	intel_lvds_init(dev);
@@ -13148,39 +13642,6 @@
 	if (SUPPORTS_TV(dev))
 		intel_tv_init(dev);
 
-	/*
-	 * FIXME:  We don't have full atomic support yet, but we want to be
-	 * able to enable/test plane updates via the atomic interface in the
-	 * meantime.  However as soon as we flip DRIVER_ATOMIC on, the DRM core
-	 * will take some atomic codepaths to lookup properties during
-	 * drmModeGetConnector() that unconditionally dereference
-	 * connector->state.
-	 *
-	 * We create a dummy connector state here for each connector to ensure
-	 * the DRM core doesn't try to dereference a NULL connector->state.
-	 * The actual connector properties will never be updated or contain
-	 * useful information, but since we're doing this specifically for
-	 * testing/debug of the plane operations (and only when a specific
-	 * kernel module option is given), that shouldn't really matter.
-	 *
-	 * We are also relying on these states to convert the legacy mode set
-	 * to use a drm_atomic_state struct. The states are kept consistent
-	 * with actual state, so that it is safe to rely on that instead of
-	 * the staged config.
-	 *
-	 * Once atomic support for crtc's + connectors lands, this loop should
-	 * be removed since we'll be setting up real connector state, which
-	 * will contain Intel-specific properties.
-	 */
-	list_for_each_entry(connector,
-			    &dev->mode_config.connector_list,
-			    head) {
-		if (!WARN_ON(connector->state)) {
-			connector->state = kzalloc(sizeof(*connector->state),
-						   GFP_KERNEL);
-		}
-	}
-
 	intel_psr_init(dev);
 
 	for_each_intel_encoder(dev, encoder) {
@@ -13489,10 +13950,23 @@
 	}
 
 	/* Returns the core display clock speed */
-	if (IS_VALLEYVIEW(dev))
+	if (IS_SKYLAKE(dev))
+		dev_priv->display.get_display_clock_speed =
+			skylake_get_display_clock_speed;
+	else if (IS_BROADWELL(dev))
+		dev_priv->display.get_display_clock_speed =
+			broadwell_get_display_clock_speed;
+	else if (IS_HASWELL(dev))
+		dev_priv->display.get_display_clock_speed =
+			haswell_get_display_clock_speed;
+	else if (IS_VALLEYVIEW(dev))
 		dev_priv->display.get_display_clock_speed =
 			valleyview_get_display_clock_speed;
-	else if (IS_I945G(dev) || (IS_G33(dev) && !IS_PINEVIEW_M(dev)))
+	else if (IS_GEN5(dev))
+		dev_priv->display.get_display_clock_speed =
+			ilk_get_display_clock_speed;
+	else if (IS_I945G(dev) || IS_BROADWATER(dev) ||
+		 IS_GEN6(dev) || IS_IVYBRIDGE(dev) || (IS_G33(dev) && !IS_PINEVIEW_M(dev)))
 		dev_priv->display.get_display_clock_speed =
 			i945_get_display_clock_speed;
 	else if (IS_I915G(dev))
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index b70e635..3ea68e1 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -696,15 +696,13 @@
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (index)
 		return 0;
 
 	if (intel_dig_port->port == PORT_A) {
-		if (IS_GEN6(dev) || IS_GEN7(dev))
-			return 200; /* SNB & IVB eDP input clock at 400Mhz */
-		else
-			return 225; /* eDP input clock at 450Mhz */
+		return DIV_ROUND_UP(dev_priv->display.get_display_clock_speed(dev), 2000);
 	} else {
 		return DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
 	}
@@ -719,7 +717,7 @@
 	if (intel_dig_port->port == PORT_A) {
 		if (index)
 			return 0;
-		return DIV_ROUND_CLOSEST(intel_ddi_get_cdclk_freq(dev_priv), 2000);
+		return DIV_ROUND_CLOSEST(dev_priv->display.get_display_clock_speed(dev), 2000);
 	} else if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
 		/* Workaround for non-ULT HSW */
 		switch (index) {
@@ -1353,6 +1351,14 @@
 	if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) {
 		intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
 				       adjusted_mode);
+
+		if (INTEL_INFO(dev)->gen >= 9) {
+			int ret;
+			ret = skl_update_scaler_users(intel_crtc, pipe_config, NULL, NULL, 0);
+			if (ret)
+				return ret;
+		}
+
 		if (!HAS_PCH_SPLIT(dev))
 			intel_gmch_panel_fitting(intel_crtc, pipe_config,
 						 intel_connector->panel.fitting_mode);
@@ -3787,6 +3793,21 @@
 			dev_priv->psr.sink_support = true;
 			DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
 		}
+
+		if (INTEL_INFO(dev)->gen >= 9 &&
+			(intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) {
+			uint8_t frame_sync_cap;
+
+			dev_priv->psr.sink_support = true;
+			intel_dp_dpcd_read_wake(&intel_dp->aux,
+					DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
+					&frame_sync_cap, 1);
+			dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
+			/* PSR2 needs frame sync as well */
+			dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
+			DRM_DEBUG_KMS("PSR2 %s on sink",
+				dev_priv->psr.psr2_support ? "supported" : "not supported");
+		}
 	}
 
 	/* Training Pattern 3 support, both source and sink */
@@ -5571,6 +5592,8 @@
 		I915_WRITE(PEG_BAND_GAP_DATA, (temp & ~0xf) | 0xd);
 	}
 
+	i915_debugfs_connector_add(connector);
+
 	return true;
 }
 
@@ -5587,7 +5610,7 @@
 	if (!intel_dig_port)
 		return;
 
-	intel_connector = kzalloc(sizeof(*intel_connector), GFP_KERNEL);
+	intel_connector = intel_connector_alloc();
 	if (!intel_connector) {
 		kfree(intel_dig_port);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 5329c85..7335089 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -150,14 +150,14 @@
 	enum port port = intel_dig_port->port;
 	int ret;
 	uint32_t temp;
-	struct intel_connector *found = NULL, *intel_connector;
+	struct intel_connector *found = NULL, *connector;
 	int slots;
 	struct drm_crtc *crtc = encoder->base.crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	for_each_intel_connector(dev, intel_connector) {
-		if (intel_connector->new_encoder == encoder) {
-			found = intel_connector;
+	for_each_intel_connector(dev, connector) {
+		if (connector->base.state->best_encoder == &encoder->base) {
+			found = connector;
 			break;
 		}
 	}
@@ -415,7 +415,7 @@
 	struct drm_connector *connector;
 	int i;
 
-	intel_connector = kzalloc(sizeof(*intel_connector), GFP_KERNEL);
+	intel_connector = intel_connector_alloc();
 	if (!intel_connector)
 		return NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 6036e3b..6a2ee0c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -256,6 +256,26 @@
 	 * enable/disable the primary plane
 	 */
 	bool hides_primary;
+
+	/*
+	 * scaler_id
+	 *    = -1 : not using a scaler
+	 *    >=  0 : using a scalers
+	 *
+	 * plane requiring a scaler:
+	 *   - During check_plane, its bit is set in
+	 *     crtc_state->scaler_state.scaler_users by calling helper function
+	 *     update_scaler_users.
+	 *   - scaler_id indicates the scaler it got assigned.
+	 *
+	 * plane doesn't require a scaler:
+	 *   - this can happen when scaling is no more required or plane simply
+	 *     got disabled.
+	 *   - During check_plane, corresponding bit is reset in
+	 *     crtc_state->scaler_state.scaler_users by calling helper function
+	 *     update_scaler_users.
+	 */
+	int scaler_id;
 };
 
 struct intel_initial_plane_config {
@@ -265,6 +285,49 @@
 	u32 base;
 };
 
+#define SKL_MIN_SRC_W 8
+#define SKL_MAX_SRC_W 4096
+#define SKL_MIN_SRC_H 8
+#define SKL_MAX_SRC_H 2304
+#define SKL_MIN_DST_W 8
+#define SKL_MAX_DST_W 4096
+#define SKL_MIN_DST_H 8
+#define SKL_MAX_DST_H 2304
+
+struct intel_scaler {
+	int id;
+	int in_use;
+	uint32_t mode;
+};
+
+struct intel_crtc_scaler_state {
+#define SKL_NUM_SCALERS 2
+	struct intel_scaler scalers[SKL_NUM_SCALERS];
+
+	/*
+	 * scaler_users: keeps track of users requesting scalers on this crtc.
+	 *
+	 *     If a bit is set, a user is using a scaler.
+	 *     Here user can be a plane or crtc as defined below:
+	 *       bits 0-30 - plane (bit position is index from drm_plane_index)
+	 *       bit 31    - crtc
+	 *
+	 * Instead of creating a new index to cover planes and crtc, using
+	 * existing drm_plane_index for planes which is well less than 31
+	 * planes and bit 31 for crtc. This should be fine to cover all
+	 * our platforms.
+	 *
+	 * intel_atomic_setup_scalers will setup available scalers to users
+	 * requesting scalers. It will gracefully fail if request exceeds
+	 * avilability.
+	 */
+#define SKL_CRTC_INDEX 31
+	unsigned scaler_users;
+
+	/* scaler used by crtc for panel fitting purpose */
+	int scaler_id;
+};
+
 struct intel_crtc_state {
 	struct drm_crtc_state base;
 
@@ -391,6 +454,8 @@
 
 	bool dp_encoder_is_mst;
 	int pbn;
+
+	struct intel_crtc_scaler_state scaler_state;
 };
 
 struct intel_pipe_wm {
@@ -471,7 +536,6 @@
 
 	struct intel_initial_plane_config plane_config;
 	struct intel_crtc_state *config;
-	struct intel_crtc_state *new_config;
 	bool new_enabled;
 
 	/* reset counter value when the last flip was submitted */
@@ -493,6 +557,9 @@
 	struct intel_mmio_flip mmio_flip;
 
 	struct intel_crtc_atomic_commit atomic;
+
+	/* scalers available on this crtc */
+	int num_scalers;
 };
 
 struct intel_plane_wm_parameters {
@@ -855,7 +922,6 @@
 void intel_ddi_init(struct drm_device *dev, enum port port);
 enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder);
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe);
-int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv);
 void intel_ddi_pll_init(struct drm_device *dev);
 void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc);
 void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
@@ -930,6 +996,8 @@
 void intel_crtc_control(struct drm_crtc *crtc, bool enable);
 void intel_crtc_update_dpms(struct drm_crtc *crtc);
 void intel_encoder_destroy(struct drm_encoder *encoder);
+int intel_connector_init(struct intel_connector *);
+struct intel_connector *intel_connector_alloc(void);
 void intel_connector_dpms(struct drm_connector *, int mode);
 bool intel_connector_get_hw_state(struct intel_connector *connector);
 void intel_modeset_check_state(struct drm_device *dev);
@@ -998,6 +1066,12 @@
 	return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270));
 }
 
+unsigned int
+intel_tile_height(struct drm_device *dev, uint32_t bits_per_pixel,
+		  uint64_t fb_modifier);
+void intel_create_rotation_property(struct drm_device *dev,
+					struct intel_plane *plane);
+
 bool intel_wm_need_update(struct drm_plane *plane,
 			  struct drm_plane_state *state);
 
@@ -1054,6 +1128,10 @@
 				 struct intel_crtc_state *pipe_config);
 void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc);
 void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file);
+void skl_detach_scalers(struct intel_crtc *intel_crtc);
+int skl_update_scaler_users(struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state, struct intel_plane *intel_plane,
+	struct intel_plane_state *plane_state, int force_detach);
 
 unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane,
 				     struct drm_i915_gem_object *obj);
@@ -1264,7 +1342,10 @@
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+		    struct drm_i915_file_private *file_priv);
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+				       struct drm_i915_gem_request *rq);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
@@ -1315,6 +1396,9 @@
 
 	return to_intel_crtc_state(crtc_state);
 }
+int intel_atomic_setup_scalers(struct drm_device *dev,
+	struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state);
 
 /* intel_atomic_plane.c */
 struct intel_plane_state *intel_create_plane_state(struct drm_plane *plane);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 572251e..5196642 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -1007,7 +1007,7 @@
 	if (!intel_dsi)
 		return;
 
-	intel_connector = kzalloc(sizeof(*intel_connector), GFP_KERNEL);
+	intel_connector = intel_connector_alloc();
 	if (!intel_connector) {
 		kfree(intel_dsi);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 4ccd6c3..7c9f852 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -80,7 +80,7 @@
 		.name = "ch7017",
 		.dvo_reg = DVOC,
 		.slave_addr = 0x75,
-		.gpio = GMBUS_PORT_DPB,
+		.gpio = GMBUS_PIN_DPB,
 		.dev_ops = &ch7017_ops,
 	},
 	{
@@ -364,7 +364,7 @@
 	 * that's not the case.
 	 */
 	intel_ddc_get_modes(connector,
-			    intel_gmbus_get_adapter(dev_priv, GMBUS_PORT_DPC));
+			    intel_gmbus_get_adapter(dev_priv, GMBUS_PIN_DPC));
 	if (!list_empty(&connector->probed_modes))
 		return 1;
 
@@ -469,7 +469,7 @@
 	if (!intel_dvo)
 		return;
 
-	intel_connector = kzalloc(sizeof(*intel_connector), GFP_KERNEL);
+	intel_connector = intel_connector_alloc();
 	if (!intel_connector) {
 		kfree(intel_dvo);
 		return;
@@ -495,17 +495,19 @@
 		struct i2c_adapter *i2c;
 		int gpio;
 		bool dvoinit;
+		enum pipe pipe;
+		uint32_t dpll[2];
 
 		/* Allow the I2C driver info to specify the GPIO to be used in
 		 * special cases, but otherwise default to what's defined
 		 * in the spec.
 		 */
-		if (intel_gmbus_is_port_valid(dvo->gpio))
+		if (intel_gmbus_is_valid_pin(dev_priv, dvo->gpio))
 			gpio = dvo->gpio;
 		else if (dvo->type == INTEL_DVO_CHIP_LVDS)
-			gpio = GMBUS_PORT_SSC;
+			gpio = GMBUS_PIN_SSC;
 		else
-			gpio = GMBUS_PORT_DPB;
+			gpio = GMBUS_PIN_DPB;
 
 		/* Set up the I2C bus necessary for the chip we're probing.
 		 * It appears that everything is on GPIOE except for panels
@@ -520,8 +522,23 @@
 		 */
 		intel_gmbus_force_bit(i2c, true);
 
+		/* ns2501 requires the DVO 2x clock before it will
+		 * respond to i2c accesses, so make sure we have
+		 * have the clock enabled before we attempt to
+		 * initialize the device.
+		 */
+		for_each_pipe(dev_priv, pipe) {
+			dpll[pipe] = I915_READ(DPLL(pipe));
+			I915_WRITE(DPLL(pipe), dpll[pipe] | DPLL_DVO_2X_MODE);
+		}
+
 		dvoinit = dvo->dev_ops->init(&intel_dvo->dev, i2c);
 
+		/* restore the DVO 2x clock state to original */
+		for_each_pipe(dev_priv, pipe) {
+			I915_WRITE(DPLL(pipe), dpll[pipe]);
+		}
+
 		intel_gmbus_force_bit(i2c, false);
 
 		if (!dvoinit)
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index cacbafd..02252d9 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1681,18 +1681,18 @@
 
 	switch (port) {
 	case PORT_B:
-		intel_hdmi->ddc_bus = GMBUS_PORT_DPB;
+		intel_hdmi->ddc_bus = GMBUS_PIN_DPB;
 		intel_encoder->hpd_pin = HPD_PORT_B;
 		break;
 	case PORT_C:
-		intel_hdmi->ddc_bus = GMBUS_PORT_DPC;
+		intel_hdmi->ddc_bus = GMBUS_PIN_DPC;
 		intel_encoder->hpd_pin = HPD_PORT_C;
 		break;
 	case PORT_D:
 		if (IS_CHERRYVIEW(dev))
-			intel_hdmi->ddc_bus = GMBUS_PORT_DPD_CHV;
+			intel_hdmi->ddc_bus = GMBUS_PIN_DPD_CHV;
 		else
-			intel_hdmi->ddc_bus = GMBUS_PORT_DPD;
+			intel_hdmi->ddc_bus = GMBUS_PIN_DPD;
 		intel_encoder->hpd_pin = HPD_PORT_D;
 		break;
 	case PORT_A:
@@ -1755,7 +1755,7 @@
 	if (!intel_dig_port)
 		return;
 
-	intel_connector = kzalloc(sizeof(*intel_connector), GFP_KERNEL);
+	intel_connector = intel_connector_alloc();
 	if (!intel_connector) {
 		kfree(intel_dig_port);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index b31088a..ec9cc8c 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -34,20 +34,27 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
-struct gmbus_port {
+struct gmbus_pin {
 	const char *name;
 	int reg;
 };
 
-static const struct gmbus_port gmbus_ports[] = {
-	{ "ssc", GPIOB },
-	{ "vga", GPIOA },
-	{ "panel", GPIOC },
-	{ "dpc", GPIOD },
-	{ "dpb", GPIOE },
-	{ "dpd", GPIOF },
+/* Map gmbus pin pairs to names and registers. */
+static const struct gmbus_pin gmbus_pins[] = {
+	[GMBUS_PIN_SSC] = { "ssc", GPIOB },
+	[GMBUS_PIN_VGADDC] = { "vga", GPIOA },
+	[GMBUS_PIN_PANEL] = { "panel", GPIOC },
+	[GMBUS_PIN_DPC] = { "dpc", GPIOD },
+	[GMBUS_PIN_DPB] = { "dpb", GPIOE },
+	[GMBUS_PIN_DPD] = { "dpd", GPIOF },
 };
 
+bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
+			      unsigned int pin)
+{
+	return pin < ARRAY_SIZE(gmbus_pins) && gmbus_pins[pin].reg;
+}
+
 /* Intel GPIO access functions */
 
 #define I2C_RISEFALL_TIME 10
@@ -182,15 +189,14 @@
 }
 
 static void
-intel_gpio_setup(struct intel_gmbus *bus, u32 pin)
+intel_gpio_setup(struct intel_gmbus *bus, unsigned int pin)
 {
 	struct drm_i915_private *dev_priv = bus->dev_priv;
 	struct i2c_algo_bit_data *algo;
 
 	algo = &bus->bit_algo;
 
-	/* -1 to map pin pair to gmbus index */
-	bus->gpio_reg = dev_priv->gpio_mmio_base + gmbus_ports[pin - 1].reg;
+	bus->gpio_reg = dev_priv->gpio_mmio_base + gmbus_pins[pin].reg;
 
 	bus->adapter.algo_data = algo;
 	algo->setsda = set_data;
@@ -517,7 +523,9 @@
 int intel_setup_gmbus(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret, i;
+	struct intel_gmbus *bus;
+	unsigned int pin;
+	int ret;
 
 	if (HAS_PCH_NOP(dev))
 		return 0;
@@ -531,16 +539,18 @@
 	mutex_init(&dev_priv->gmbus_mutex);
 	init_waitqueue_head(&dev_priv->gmbus_wait_queue);
 
-	for (i = 0; i < GMBUS_NUM_PORTS; i++) {
-		struct intel_gmbus *bus = &dev_priv->gmbus[i];
-		u32 port = i + 1; /* +1 to map gmbus index to pin pair */
+	for (pin = 0; pin < ARRAY_SIZE(dev_priv->gmbus); pin++) {
+		if (!intel_gmbus_is_valid_pin(dev_priv, pin))
+			continue;
+
+		bus = &dev_priv->gmbus[pin];
 
 		bus->adapter.owner = THIS_MODULE;
 		bus->adapter.class = I2C_CLASS_DDC;
 		snprintf(bus->adapter.name,
 			 sizeof(bus->adapter.name),
 			 "i915 gmbus %s",
-			 gmbus_ports[i].name);
+			 gmbus_pins[pin].name);
 
 		bus->adapter.dev.parent = &dev->pdev->dev;
 		bus->dev_priv = dev_priv;
@@ -548,13 +558,13 @@
 		bus->adapter.algo = &gmbus_algorithm;
 
 		/* By default use a conservative clock rate */
-		bus->reg0 = port | GMBUS_RATE_100KHZ;
+		bus->reg0 = pin | GMBUS_RATE_100KHZ;
 
 		/* gmbus seems to be broken on i830 */
 		if (IS_I830(dev))
 			bus->force_bit = 1;
 
-		intel_gpio_setup(bus, port);
+		intel_gpio_setup(bus, pin);
 
 		ret = i2c_add_adapter(&bus->adapter);
 		if (ret)
@@ -566,20 +576,23 @@
 	return 0;
 
 err:
-	while (--i) {
-		struct intel_gmbus *bus = &dev_priv->gmbus[i];
+	while (--pin) {
+		if (!intel_gmbus_is_valid_pin(dev_priv, pin))
+			continue;
+
+		bus = &dev_priv->gmbus[pin];
 		i2c_del_adapter(&bus->adapter);
 	}
 	return ret;
 }
 
 struct i2c_adapter *intel_gmbus_get_adapter(struct drm_i915_private *dev_priv,
-					    unsigned port)
+					    unsigned int pin)
 {
-	WARN_ON(!intel_gmbus_is_port_valid(port));
-	/* -1 to map pin pair to gmbus index */
-	return (intel_gmbus_is_port_valid(port)) ?
-		&dev_priv->gmbus[port - 1].adapter : NULL;
+	if (WARN_ON(!intel_gmbus_is_valid_pin(dev_priv, pin)))
+		return NULL;
+
+	return &dev_priv->gmbus[pin].adapter;
 }
 
 void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed)
@@ -602,10 +615,14 @@
 void intel_teardown_gmbus(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int i;
+	struct intel_gmbus *bus;
+	unsigned int pin;
 
-	for (i = 0; i < GMBUS_NUM_PORTS; i++) {
-		struct intel_gmbus *bus = &dev_priv->gmbus[i];
+	for (pin = 0; pin < ARRAY_SIZE(dev_priv->gmbus); pin++) {
+		if (!intel_gmbus_is_valid_pin(dev_priv, pin))
+			continue;
+
+		bus = &dev_priv->gmbus[pin];
 		i2c_del_adapter(&bus->adapter);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 71aeeb3..a798d75 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -188,6 +188,15 @@
 #define GEN8_CTX_FORCE_RESTORE (1<<2)
 #define GEN8_CTX_L3LLC_COHERENT (1<<5)
 #define GEN8_CTX_PRIVILEGE (1<<8)
+
+#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
+	const u64 _addr = test_bit(n, ppgtt->pdp.used_pdpes) ? \
+		ppgtt->pdp.page_directory[n]->daddr : \
+		ppgtt->scratch_pd->daddr; \
+	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
+	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
+}
+
 enum {
 	ADVANCED_CONTEXT = 0,
 	LEGACY_CONTEXT,
@@ -265,7 +274,8 @@
 
 	desc = GEN8_CTX_VALID;
 	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
-	desc |= GEN8_CTX_L3LLC_COHERENT;
+	if (IS_GEN8(ctx_obj->base.dev))
+		desc |= GEN8_CTX_L3LLC_COHERENT;
 	desc |= GEN8_CTX_PRIVILEGE;
 	desc |= lrca;
 	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
@@ -305,21 +315,24 @@
 	desc[3] = (u32)(temp >> 32);
 	desc[2] = (u32)temp;
 
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	I915_WRITE(RING_ELSP(ring), desc[1]);
-	I915_WRITE(RING_ELSP(ring), desc[0]);
-	I915_WRITE(RING_ELSP(ring), desc[3]);
+	spin_lock(&dev_priv->uncore.lock);
+	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+	I915_WRITE_FW(RING_ELSP(ring), desc[1]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[0]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[3]);
 
 	/* The context is automatically loaded after the following */
-	I915_WRITE(RING_ELSP(ring), desc[2]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[2]);
 
 	/* ELSP is a wo register, so use another nearby reg for posting instead */
-	POSTING_READ(RING_EXECLIST_STATUS(ring));
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	POSTING_READ_FW(RING_EXECLIST_STATUS(ring));
+	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+	spin_unlock(&dev_priv->uncore.lock);
 }
 
 static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
 				    struct drm_i915_gem_object *ring_obj,
+				    struct i915_hw_ppgtt *ppgtt,
 				    u32 tail)
 {
 	struct page *page;
@@ -331,6 +344,16 @@
 	reg_state[CTX_RING_TAIL+1] = tail;
 	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
+	/* True PPGTT with dynamic page allocation: update PDP registers and
+	 * point the unallocated PDPs to the scratch page
+	 */
+	if (ppgtt) {
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	}
+
 	kunmap_atomic(reg_state);
 
 	return 0;
@@ -349,7 +372,7 @@
 	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
 	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
 
-	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
+	execlists_update_context(ctx_obj0, ringbuf0->obj, to0->ppgtt, tail0);
 
 	if (to1) {
 		ringbuf1 = to1->engine[ring->id].ringbuf;
@@ -358,7 +381,7 @@
 		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
 		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
 
-		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
+		execlists_update_context(ctx_obj1, ringbuf1->obj, to1->ppgtt, tail1);
 	}
 
 	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -500,8 +523,6 @@
 				   struct drm_i915_gem_request *request)
 {
 	struct drm_i915_gem_request *cursor;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	unsigned long flags;
 	int num_elements = 0;
 
 	if (to != ring->default_context)
@@ -518,7 +539,6 @@
 		request->ring = ring;
 		request->ctx = to;
 		kref_init(&request->ref);
-		request->uniq = dev_priv->request_uniq++;
 		i915_gem_context_reference(request->ctx);
 	} else {
 		i915_gem_request_reference(request);
@@ -526,9 +546,7 @@
 	}
 	request->tail = tail;
 
-	intel_runtime_pm_get(dev_priv);
-
-	spin_lock_irqsave(&ring->execlist_lock, flags);
+	spin_lock_irq(&ring->execlist_lock);
 
 	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
 		if (++num_elements > 2)
@@ -554,7 +572,7 @@
 	if (num_elements == 0)
 		execlists_context_unqueue(ring);
 
-	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+	spin_unlock_irq(&ring->execlist_lock);
 
 	return 0;
 }
@@ -611,6 +629,173 @@
 	return logical_ring_invalidate_all_caches(ringbuf, ctx);
 }
 
+int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request,
+					    struct intel_context *ctx)
+{
+	int ret;
+
+	if (ctx != request->ring->default_context) {
+		ret = intel_lr_context_pin(request->ring, ctx);
+		if (ret)
+			return ret;
+	}
+
+	request->ringbuf = ctx->engine[request->ring->id].ringbuf;
+	request->ctx     = ctx;
+	i915_gem_context_reference(request->ctx);
+
+	return 0;
+}
+
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
+				       struct intel_context *ctx,
+				       int bytes)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_i915_gem_request *request;
+	int ret, new_space;
+
+	if (intel_ring_space(ringbuf) >= bytes)
+		return 0;
+
+	list_for_each_entry(request, &ring->request_list, list) {
+		/*
+		 * The request queue is per-engine, so can contain requests
+		 * from multiple ringbuffers. Here, we must ignore any that
+		 * aren't from the ringbuffer we're considering.
+		 */
+		struct intel_context *ctx = request->ctx;
+		if (ctx->engine[ring->id].ringbuf != ringbuf)
+			continue;
+
+		/* Would completion of this request free enough space? */
+		new_space = __intel_ring_space(request->postfix, ringbuf->tail,
+				       ringbuf->size);
+		if (new_space >= bytes)
+			break;
+	}
+
+	if (WARN_ON(&request->list == &ring->request_list))
+		return -ENOSPC;
+
+	ret = i915_wait_request(request);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests_ring(ring);
+
+	WARN_ON(intel_ring_space(ringbuf) < new_space);
+
+	return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC;
+}
+
+/*
+ * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
+ * @ringbuf: Logical Ringbuffer to advance.
+ *
+ * The tail is updated in our logical ringbuffer struct, not in the actual context. What
+ * really happens during submission is that the context and current tail will be placed
+ * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
+ * point, the tail *inside* the context is updated and the ELSP written to.
+ */
+static void
+intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
+				      struct intel_context *ctx,
+				      struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+
+	intel_logical_ring_advance(ringbuf);
+
+	if (intel_ring_stopped(ring))
+		return;
+
+	execlists_context_queue(ring, ctx, ringbuf->tail, request);
+}
+
+static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf,
+				    struct intel_context *ctx)
+{
+	uint32_t __iomem *virt;
+	int rem = ringbuf->size - ringbuf->tail;
+
+	if (ringbuf->space < rem) {
+		int ret = logical_ring_wait_for_space(ringbuf, ctx, rem);
+
+		if (ret)
+			return ret;
+	}
+
+	virt = ringbuf->virtual_start + ringbuf->tail;
+	rem /= 4;
+	while (rem--)
+		iowrite32(MI_NOOP, virt++);
+
+	ringbuf->tail = 0;
+	intel_ring_update_space(ringbuf);
+
+	return 0;
+}
+
+static int logical_ring_prepare(struct intel_ringbuffer *ringbuf,
+				struct intel_context *ctx, int bytes)
+{
+	int ret;
+
+	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
+		ret = logical_ring_wrap_buffer(ringbuf, ctx);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	if (unlikely(ringbuf->space < bytes)) {
+		ret = logical_ring_wait_for_space(ringbuf, ctx, bytes);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
+ *
+ * @ringbuf: Logical ringbuffer.
+ * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
+ *
+ * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
+ * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
+ * and also preallocates a request (every workload submission is still mediated through
+ * requests, same as it did with legacy ringbuffer submission).
+ *
+ * Return: non-zero if the ringbuffer is not ready to be written to.
+ */
+static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
+				    struct intel_context *ctx, int num_dwords)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible);
+	if (ret)
+		return ret;
+
+	ret = logical_ring_prepare(ringbuf, ctx, num_dwords * sizeof(uint32_t));
+	if (ret)
+		return ret;
+
+	/* Preallocate the olr before touching the ring */
+	ret = i915_gem_request_alloc(ring, ctx);
+	if (ret)
+		return ret;
+
+	ringbuf->space -= num_dwords * sizeof(uint32_t);
+	return 0;
+}
+
 /**
  * execlists_submission() - submit a batchbuffer for execution, Execlists style
  * @dev: DRM device.
@@ -722,8 +907,6 @@
 void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *req, *tmp;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	unsigned long flags;
 	struct list_head retired_list;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -731,9 +914,9 @@
 		return;
 
 	INIT_LIST_HEAD(&retired_list);
-	spin_lock_irqsave(&ring->execlist_lock, flags);
+	spin_lock_irq(&ring->execlist_lock);
 	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
-	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+	spin_unlock_irq(&ring->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
 		struct intel_context *ctx = req->ctx;
@@ -742,7 +925,6 @@
 
 		if (ctx_obj && (ctx != ring->default_context))
 			intel_lr_context_unpin(ring, ctx);
-		intel_runtime_pm_put(dev_priv);
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
 	}
@@ -787,30 +969,6 @@
 	return 0;
 }
 
-/*
- * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
- * @ringbuf: Logical Ringbuffer to advance.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static void
-intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
-				      struct intel_context *ctx,
-				      struct drm_i915_gem_request *request)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-
-	intel_logical_ring_advance(ringbuf);
-
-	if (intel_ring_stopped(ring))
-		return;
-
-	execlists_context_queue(ring, ctx, ringbuf->tail, request);
-}
-
 static int intel_lr_context_pin(struct intel_engine_cs *ring,
 		struct intel_context *ctx)
 {
@@ -855,219 +1013,6 @@
 	}
 }
 
-static int logical_ring_alloc_request(struct intel_engine_cs *ring,
-				      struct intel_context *ctx)
-{
-	struct drm_i915_gem_request *request;
-	struct drm_i915_private *dev_private = ring->dev->dev_private;
-	int ret;
-
-	if (ring->outstanding_lazy_request)
-		return 0;
-
-	request = kzalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL)
-		return -ENOMEM;
-
-	if (ctx != ring->default_context) {
-		ret = intel_lr_context_pin(ring, ctx);
-		if (ret) {
-			kfree(request);
-			return ret;
-		}
-	}
-
-	kref_init(&request->ref);
-	request->ring = ring;
-	request->uniq = dev_private->request_uniq++;
-
-	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
-	if (ret) {
-		intel_lr_context_unpin(ring, ctx);
-		kfree(request);
-		return ret;
-	}
-
-	request->ctx = ctx;
-	i915_gem_context_reference(request->ctx);
-	request->ringbuf = ctx->engine[ring->id].ringbuf;
-
-	ring->outstanding_lazy_request = request;
-	return 0;
-}
-
-static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
-				     int bytes)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_i915_gem_request *request;
-	int ret;
-
-	if (intel_ring_space(ringbuf) >= bytes)
-		return 0;
-
-	list_for_each_entry(request, &ring->request_list, list) {
-		/*
-		 * The request queue is per-engine, so can contain requests
-		 * from multiple ringbuffers. Here, we must ignore any that
-		 * aren't from the ringbuffer we're considering.
-		 */
-		struct intel_context *ctx = request->ctx;
-		if (ctx->engine[ring->id].ringbuf != ringbuf)
-			continue;
-
-		/* Would completion of this request free enough space? */
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= bytes) {
-			break;
-		}
-	}
-
-	if (&request->list == &ring->request_list)
-		return -ENOSPC;
-
-	ret = i915_wait_request(request);
-	if (ret)
-		return ret;
-
-	i915_gem_retire_requests_ring(ring);
-
-	return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC;
-}
-
-static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
-				       struct intel_context *ctx,
-				       int bytes)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long end;
-	int ret;
-
-	ret = logical_ring_wait_request(ringbuf, bytes);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* Force the context submission in case we have been skipping it */
-	intel_logical_ring_advance_and_submit(ringbuf, ctx, NULL);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	ret = 0;
-	do {
-		if (intel_ring_space(ringbuf) >= bytes)
-			break;
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-
-	return ret;
-}
-
-static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf,
-				    struct intel_context *ctx)
-{
-	uint32_t __iomem *virt;
-	int rem = ringbuf->size - ringbuf->tail;
-
-	if (ringbuf->space < rem) {
-		int ret = logical_ring_wait_for_space(ringbuf, ctx, rem);
-
-		if (ret)
-			return ret;
-	}
-
-	virt = ringbuf->virtual_start + ringbuf->tail;
-	rem /= 4;
-	while (rem--)
-		iowrite32(MI_NOOP, virt++);
-
-	ringbuf->tail = 0;
-	intel_ring_update_space(ringbuf);
-
-	return 0;
-}
-
-static int logical_ring_prepare(struct intel_ringbuffer *ringbuf,
-				struct intel_context *ctx, int bytes)
-{
-	int ret;
-
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = logical_ring_wrap_buffer(ringbuf, ctx);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = logical_ring_wait_for_space(ringbuf, ctx, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	return 0;
-}
-
-/**
- * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
- *
- * @ringbuf: Logical ringbuffer.
- * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
- *
- * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
- * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
- * and also preallocates a request (every workload submission is still mediated through
- * requests, same as it did with legacy ringbuffer submission).
- *
- * Return: non-zero if the ringbuffer is not ready to be written to.
- */
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
-			     struct intel_context *ctx, int num_dwords)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
-	ret = logical_ring_prepare(ringbuf, ctx, num_dwords * sizeof(uint32_t));
-	if (ret)
-		return ret;
-
-	/* Preallocate the olr before touching the ring */
-	ret = logical_ring_alloc_request(ring, ctx);
-	if (ret)
-		return ret;
-
-	ringbuf->space -= num_dwords * sizeof(uint32_t);
-	return 0;
-}
-
 static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
 					       struct intel_context *ctx)
 {
@@ -1421,6 +1366,7 @@
 		ring->cleanup(ring);
 
 	i915_cmd_parser_fini_ring(ring);
+	i915_gem_batch_pool_fini(&ring->batch_pool);
 
 	if (ring->status_page.obj) {
 		kunmap(sg_page(ring->status_page.obj->pages->sgl));
@@ -1438,6 +1384,7 @@
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
@@ -1790,14 +1737,14 @@
 	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
 	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
 	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
-	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr);
-	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr);
-	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr);
-	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr);
-	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr);
-	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr);
-	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr);
-	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr);
+
+	/* With dynamic page allocation, PDPs may not be allocated at this point,
+	 * Point the unallocated PDPs to the scratch page
+	 */
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
@@ -1914,7 +1861,7 @@
 
 	context_size = round_up(get_lr_context_size(ring), 4096);
 
-	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
+	ctx_obj = i915_gem_alloc_object(dev, context_size);
 	if (IS_ERR(ctx_obj)) {
 		ret = PTR_ERR(ctx_obj);
 		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index adb731e4..04d3a6d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -36,6 +36,8 @@
 #define RING_CONTEXT_STATUS_PTR(ring)	((ring)->mmio_base+0x3a0)
 
 /* Logical Rings */
+int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request,
+					    struct intel_context *ctx);
 void intel_logical_ring_stop(struct intel_engine_cs *ring);
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_init(struct drm_device *dev);
@@ -63,9 +65,6 @@
 	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
-			     struct intel_context *ctx,
-			     int num_dwords);
 
 /* Logical Ring Contexts */
 void intel_lr_context_free(struct intel_context *ctx);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 06d2da3..9a9df0f 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -781,7 +781,7 @@
 		    child->device_type != DEVICE_TYPE_LFP)
 			continue;
 
-		if (intel_gmbus_is_port_valid(child->i2c_pin))
+		if (intel_gmbus_is_valid_pin(dev_priv, child->i2c_pin))
 			*i2c_pin = child->i2c_pin;
 
 		/* However, we cannot trust the BIOS writers to populate
@@ -921,7 +921,7 @@
 	if (dmi_check_system(intel_no_lvds))
 		return;
 
-	pin = GMBUS_PORT_PANEL;
+	pin = GMBUS_PIN_PANEL;
 	if (!lvds_is_present_in_vbt(dev, &pin)) {
 		DRM_DEBUG_KMS("LVDS is not present in VBT\n");
 		return;
@@ -946,6 +946,12 @@
 		return;
 	}
 
+	if (intel_connector_init(&lvds_connector->base) < 0) {
+		kfree(lvds_connector);
+		kfree(lvds_encoder);
+		return;
+	}
+
 	lvds_encoder->attached_connector = lvds_connector;
 
 	intel_encoder = &lvds_encoder->base;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index dd92122..5fd2d5a 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -172,10 +172,11 @@
 	struct intel_crtc *crtc;
 	struct drm_i915_gem_object *vid_bo;
 	struct drm_i915_gem_object *old_vid_bo;
-	int active;
-	int pfit_active;
+	bool active;
+	bool pfit_active;
 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
-	u32 color_key;
+	u32 color_key:24;
+	u32 color_key_enabled:1;
 	u32 brightness, contrast, saturation;
 	u32 old_xscale, old_yscale;
 	/* register access */
@@ -216,7 +217,7 @@
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->last_flip_req);
+	WARN_ON(overlay->last_flip_req);
 	i915_gem_request_assign(&overlay->last_flip_req,
 					     ring->outstanding_lazy_request);
 	ret = i915_add_request(ring);
@@ -241,15 +242,15 @@
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->active);
-	overlay->active = 1;
-
+	WARN_ON(overlay->active);
 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
 
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
 
+	overlay->active = true;
+
 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
@@ -270,7 +271,7 @@
 	u32 tmp;
 	int ret;
 
-	BUG_ON(!overlay->active);
+	WARN_ON(!overlay->active);
 
 	if (load_polyphase_filter)
 		flip_addr |= OFC_UPDATE;
@@ -309,7 +310,8 @@
 	struct drm_i915_gem_object *obj = overlay->vid_bo;
 
 	/* never have the overlay hw on without showing a frame */
-	BUG_ON(!overlay->vid_bo);
+	if (WARN_ON(!obj))
+		return;
 
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
@@ -317,7 +319,7 @@
 
 	overlay->crtc->overlay = NULL;
 	overlay->crtc = NULL;
-	overlay->active = 0;
+	overlay->active = false;
 }
 
 /* overlay needs to be disabled in OCMD reg */
@@ -329,7 +331,7 @@
 	u32 flip_addr = overlay->flip_addr;
 	int ret;
 
-	BUG_ON(!overlay->active);
+	WARN_ON(!overlay->active);
 
 	/* According to intel docs the overlay hw may hang (when switching
 	 * off) without loading the filter coeffs. It is however unclear whether
@@ -629,31 +631,36 @@
 			    struct overlay_registers __iomem *regs)
 {
 	u32 key = overlay->color_key;
+	u32 flags;
+
+	flags = 0;
+	if (overlay->color_key_enabled)
+		flags |= DST_KEY_ENABLE;
 
 	switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
 	case 8:
-		iowrite32(0, &regs->DCLRKV);
-		iowrite32(CLK_RGB8I_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
+		key = 0;
+		flags |= CLK_RGB8I_MASK;
 		break;
 
 	case 16:
 		if (overlay->crtc->base.primary->fb->depth == 15) {
-			iowrite32(RGB15_TO_COLORKEY(key), &regs->DCLRKV);
-			iowrite32(CLK_RGB15_MASK | DST_KEY_ENABLE,
-				  &regs->DCLRKM);
+			key = RGB15_TO_COLORKEY(key);
+			flags |= CLK_RGB15_MASK;
 		} else {
-			iowrite32(RGB16_TO_COLORKEY(key), &regs->DCLRKV);
-			iowrite32(CLK_RGB16_MASK | DST_KEY_ENABLE,
-				  &regs->DCLRKM);
+			key = RGB16_TO_COLORKEY(key);
+			flags |= CLK_RGB16_MASK;
 		}
 		break;
 
 	case 24:
 	case 32:
-		iowrite32(key, &regs->DCLRKV);
-		iowrite32(CLK_RGB24_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
+		flags |= CLK_RGB24_MASK;
 		break;
 	}
+
+	iowrite32(key, &regs->DCLRKV);
+	iowrite32(flags, &regs->DCLRKM);
 }
 
 static u32 overlay_cmd_reg(struct put_image_params *params)
@@ -712,9 +719,8 @@
 	u32 swidth, swidthsw, sheight, ostride;
 	enum pipe pipe = overlay->crtc->pipe;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
-	BUG_ON(!overlay);
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	ret = intel_overlay_release_old_vid(overlay);
 	if (ret != 0)
@@ -824,8 +830,8 @@
 	struct drm_device *dev = overlay->dev;
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	ret = intel_overlay_recover_from_interrupt(overlay);
 	if (ret != 0)
@@ -1131,10 +1137,10 @@
 		/* line too wide, i.e. one-line-mode */
 		if (mode->hdisplay > 1024 &&
 		    intel_panel_fitter_pipe(dev) == crtc->pipe) {
-			overlay->pfit_active = 1;
+			overlay->pfit_active = true;
 			update_pfit_vscale_ratio(overlay);
 		} else
-			overlay->pfit_active = 0;
+			overlay->pfit_active = false;
 	}
 
 	ret = check_overlay_dst(overlay, put_image_rec);
@@ -1329,6 +1335,7 @@
 			I915_WRITE(OGAMC5, attrs->gamma5);
 		}
 	}
+	overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0;
 
 	ret = 0;
 out_unlock:
@@ -1392,6 +1399,7 @@
 
 	/* init all values */
 	overlay->color_key = 0x0101fe;
+	overlay->color_key_enabled = true;
 	overlay->brightness = -19;
 	overlay->contrast = 75;
 	overlay->saturation = 146;
@@ -1432,7 +1440,7 @@
 	/* The bo's should be free'd by the generic code already.
 	 * Furthermore modesetting teardown happens beforehand so the
 	 * hardware should be off already */
-	BUG_ON(dev_priv->overlay->active);
+	WARN_ON(dev_priv->overlay->active);
 
 	drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
 	kfree(dev_priv->overlay);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4724638..a80bfd5 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -67,7 +67,7 @@
 
 	gen9_init_clock_gating(dev);
 
-	if (INTEL_REVID(dev) == SKL_REVID_A0) {
+	if (INTEL_REVID(dev) <= SKL_REVID_B0) {
 		/*
 		 * WaDisableSDEUnitClockGating:skl
 		 * WaSetGAPSunitClckGateDisable:skl
@@ -75,6 +75,10 @@
 		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 			   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
 			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+		/* WaDisableVFUnitClockGating:skl */
+		I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
+			   GEN6_VFUNIT_CLOCK_GATE_DISABLE);
 	}
 
 	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
@@ -1812,7 +1816,7 @@
 	linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
 				     mode->crtc_clock);
 	ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
-					 intel_ddi_get_cdclk_freq(dev_priv));
+					 dev_priv->display.get_display_clock_speed(dev_priv->dev));
 
 	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
 	       PIPE_WM_LINETIME_TIME(linetime);
@@ -3202,7 +3206,7 @@
 {
 	struct drm_device *dev = dev_priv->dev;
 	struct skl_ddb_allocation *cur_ddb, *new_ddb;
-	bool reallocated[I915_MAX_PIPES] = {false, false, false};
+	bool reallocated[I915_MAX_PIPES] = {};
 	struct intel_crtc *crtc;
 	enum pipe pipe;
 
@@ -3954,6 +3958,8 @@
 		    GEN6_RP_DOWN_IDLE_AVG);
 
 	dev_priv->rps.power = new_power;
+	dev_priv->rps.up_threshold = threshold_up;
+	dev_priv->rps.down_threshold = threshold_down;
 	dev_priv->rps.last_adj = 0;
 }
 
@@ -4025,8 +4031,11 @@
 		      "Odd GPU freq value\n"))
 		val &= ~1;
 
-	if (val != dev_priv->rps.cur_freq)
+	if (val != dev_priv->rps.cur_freq) {
 		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		if (!IS_CHERRYVIEW(dev_priv))
+			gen6_set_rps_thresholds(dev_priv, val);
+	}
 
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
@@ -4075,6 +4084,7 @@
 				& GENFREQSTATUS) == 0, 100))
 		DRM_ERROR("timed out waiting for Punit\n");
 
+	gen6_set_rps_thresholds(dev_priv, val);
 	vlv_force_gfx_clock(dev_priv, false);
 
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
@@ -4105,10 +4115,14 @@
 		dev_priv->rps.last_adj = 0;
 		I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 	}
+
+	while (!list_empty(&dev_priv->rps.clients))
+		list_del_init(dev_priv->rps.clients.next);
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
-void gen6_rps_boost(struct drm_i915_private *dev_priv)
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+		    struct drm_i915_file_private *file_priv)
 {
 	u32 val;
 
@@ -4116,9 +4130,16 @@
 	val = dev_priv->rps.max_freq_softlimit;
 	if (dev_priv->rps.enabled &&
 	    dev_priv->mm.busy &&
-	    dev_priv->rps.cur_freq < val) {
+	    dev_priv->rps.cur_freq < val &&
+	    (file_priv == NULL || list_empty(&file_priv->rps_boost))) {
 		intel_set_rps(dev_priv->dev, val);
 		dev_priv->rps.last_adj = 0;
+
+		if (file_priv != NULL) {
+			list_add(&file_priv->rps_boost, &dev_priv->rps.clients);
+			file_priv->rps_boosts++;
+		} else
+			dev_priv->rps.boosts++;
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
@@ -4349,8 +4370,13 @@
 				   GEN6_RC_CTL_EI_MODE(1) |
 				   rc6_mask);
 
-	/* 3b: Enable Coarse Power Gating only when RC6 is enabled */
-	I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0);
+	/*
+	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
+	 * WaDisableRenderPowerGating:skl,bxt - Render PG need to be disabled with RC6.
+	 */
+	I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
+			GEN9_MEDIA_PG_ENABLE : 0);
+
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
@@ -6791,6 +6817,41 @@
 		return val / GT_FREQUENCY_MULTIPLIER;
 }
 
+struct request_boost {
+	struct work_struct work;
+	struct drm_i915_gem_request *rq;
+};
+
+static void __intel_rps_boost_work(struct work_struct *work)
+{
+	struct request_boost *boost = container_of(work, struct request_boost, work);
+
+	if (!i915_gem_request_completed(boost->rq, true))
+		gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL);
+
+	i915_gem_request_unreference__unlocked(boost->rq);
+	kfree(boost);
+}
+
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+				       struct drm_i915_gem_request *rq)
+{
+	struct request_boost *boost;
+
+	if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+		return;
+
+	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
+	if (boost == NULL)
+		return;
+
+	i915_gem_request_reference(rq);
+	boost->rq = rq;
+
+	INIT_WORK(&boost->work, __intel_rps_boost_work);
+	queue_work(to_i915(dev)->wq, &boost->work);
+}
+
 void intel_pm_setup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -6799,6 +6860,7 @@
 
 	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
 			  intel_gen6_powersave_work);
+	INIT_LIST_HEAD(&dev_priv->rps.clients);
 
 	dev_priv->pm.suspended = false;
 }
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index a8f9348..27608ce 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -117,6 +117,19 @@
 	I915_WRITE(VLV_VSCSDP(pipe), val);
 }
 
+static void skl_psr_setup_su_vsc(struct intel_dp *intel_dp)
+{
+	struct edp_vsc_psr psr_vsc;
+
+	/* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */
+	memset(&psr_vsc, 0, sizeof(psr_vsc));
+	psr_vsc.sdp_header.HB0 = 0;
+	psr_vsc.sdp_header.HB1 = 0x7;
+	psr_vsc.sdp_header.HB2 = 0x3;
+	psr_vsc.sdp_header.HB3 = 0xb;
+	intel_psr_write_vsc(intel_dp, &psr_vsc);
+}
+
 static void hsw_psr_setup_vsc(struct intel_dp *intel_dp)
 {
 	struct edp_vsc_psr psr_vsc;
@@ -133,7 +146,7 @@
 static void vlv_psr_enable_sink(struct intel_dp *intel_dp)
 {
 	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-			   DP_PSR_ENABLE);
+			   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
 }
 
 static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
@@ -165,6 +178,12 @@
 		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
 				   DP_PSR_ENABLE & ~DP_PSR_MAIN_LINK_ACTIVE);
 
+	/* Enable AUX frame sync at sink */
+	if (dev_priv->psr.aux_frame_sync)
+		drm_dp_dpcd_writeb(&intel_dp->aux,
+				DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF,
+				DP_AUX_FRAME_SYNC_ENABLE);
+
 	aux_data_reg = (INTEL_INFO(dev)->gen >= 9) ?
 				DPA_AUX_CH_DATA1 : EDP_PSR_AUX_DATA1(dev);
 	aux_ctl_reg = (INTEL_INFO(dev)->gen >= 9) ?
@@ -183,8 +202,10 @@
 		val |= DP_AUX_CH_CTL_TIME_OUT_1600us;
 		val &= ~DP_AUX_CH_CTL_MESSAGE_SIZE_MASK;
 		val |= (sizeof(aux_msg) << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT);
-		/* Use hardcoded data values for PSR */
+		/* Use hardcoded data values for PSR, frame sync and GTC */
 		val &= ~DP_AUX_CH_CTL_PSR_DATA_AUX_REG_SKL;
+		val &= ~DP_AUX_CH_CTL_FS_DATA_AUX_REG_SKL;
+		val &= ~DP_AUX_CH_CTL_GTC_DATA_AUX_REG_SKL;
 		I915_WRITE(aux_ctl_reg, val);
 	} else {
 		I915_WRITE(aux_ctl_reg,
@@ -232,6 +253,7 @@
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	uint32_t max_sleep_time = 0x1f;
 	/* Lately it was identified that depending on panel idle frame count
 	 * calculated at HW can be off by 1. So let's use what came
@@ -255,6 +277,10 @@
 		   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
 		   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
 		   EDP_PSR_ENABLE);
+
+	if (dev_priv->psr.psr2_support)
+		I915_WRITE(EDP_PSR2_CTL, EDP_PSR2_ENABLE |
+				EDP_SU_TRACK_ENABLE | EDP_PSR2_TP2_TIME_100);
 }
 
 static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
@@ -332,6 +358,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
 
 	if (!HAS_PSR(dev)) {
 		DRM_DEBUG_KMS("PSR not supported on this platform\n");
@@ -364,6 +391,15 @@
 	if (HAS_DDI(dev)) {
 		hsw_psr_setup_vsc(intel_dp);
 
+		if (dev_priv->psr.psr2_support) {
+			/* PSR2 is restricted to work with panel resolutions upto 3200x2000 */
+			if (crtc->config->pipe_src_w > 3200 ||
+				crtc->config->pipe_src_h > 2000)
+				dev_priv->psr.psr2_support = false;
+			else
+				skl_psr_setup_su_vsc(intel_dp);
+		}
+
 		/* Avoid continuous PSR exit by masking memup and hpd */
 		I915_WRITE(EDP_PSR_DEBUG_CTL(dev), EDP_PSR_DEBUG_MASK_MEMUP |
 			   EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 000f608..de8c074 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -853,6 +853,9 @@
 			    GEN6_WIZ_HASHING_MASK,
 			    GEN6_WIZ_HASHING_16x4);
 
+	/* WaProgramL3SqcReg1Default:bdw */
+	WA_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+
 	return 0;
 }
 
@@ -1989,6 +1992,7 @@
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	ringbuf->size = 32 * PAGE_SIZE;
 	ringbuf->ring = ring;
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
@@ -2067,28 +2071,29 @@
 	cleanup_status_page(ring);
 
 	i915_cmd_parser_fini_ring(ring);
+	i915_gem_batch_pool_fini(&ring->batch_pool);
 
 	kfree(ringbuf);
 	ring->buffer = NULL;
 }
 
-static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
+static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
 	struct drm_i915_gem_request *request;
-	int ret;
+	int ret, new_space;
 
 	if (intel_ring_space(ringbuf) >= n)
 		return 0;
 
 	list_for_each_entry(request, &ring->request_list, list) {
-		if (__intel_ring_space(request->postfix, ringbuf->tail,
-				       ringbuf->size) >= n) {
+		new_space = __intel_ring_space(request->postfix, ringbuf->tail,
+				       ringbuf->size);
+		if (new_space >= n)
 			break;
-		}
 	}
 
-	if (&request->list == &ring->request_list)
+	if (WARN_ON(&request->list == &ring->request_list))
 		return -ENOSPC;
 
 	ret = i915_wait_request(request);
@@ -2097,61 +2102,11 @@
 
 	i915_gem_retire_requests_ring(ring);
 
+	WARN_ON(intel_ring_space(ringbuf) < new_space);
+
 	return 0;
 }
 
-static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
-{
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	unsigned long end;
-	int ret;
-
-	ret = intel_ring_wait_request(ring, n);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* force the tail write in case we have been skipping them */
-	__intel_ring_advance(ring);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	ret = 0;
-	trace_i915_ring_wait_begin(ring);
-	do {
-		if (intel_ring_space(ringbuf) >= n)
-			break;
-		ringbuf->head = I915_READ_HEAD(ring);
-		if (intel_ring_space(ringbuf) >= n)
-			break;
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-	trace_i915_ring_wait_end(ring);
-	return ret;
-}
-
 static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 {
 	uint32_t __iomem *virt;
@@ -2198,32 +2153,9 @@
 	return i915_wait_request(req);
 }
 
-static int
-intel_ring_alloc_request(struct intel_engine_cs *ring)
+int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
-	int ret;
-	struct drm_i915_gem_request *request;
-	struct drm_i915_private *dev_private = ring->dev->dev_private;
-
-	if (ring->outstanding_lazy_request)
-		return 0;
-
-	request = kzalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL)
-		return -ENOMEM;
-
-	kref_init(&request->ref);
-	request->ring = ring;
-	request->ringbuf = ring->buffer;
-	request->uniq = dev_private->request_uniq++;
-
-	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
-	if (ret) {
-		kfree(request);
-		return ret;
-	}
-
-	ring->outstanding_lazy_request = request;
+	request->ringbuf = request->ring->buffer;
 	return 0;
 }
 
@@ -2264,7 +2196,7 @@
 		return ret;
 
 	/* Preallocate the olr before touching the ring */
-	ret = intel_ring_alloc_request(ring);
+	ret = i915_gem_request_alloc(ring, ring->default_context);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..39f6dfc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -2,6 +2,7 @@
 #define _INTEL_RINGBUFFER_H_
 
 #include <linux/hashtable.h>
+#include "i915_gem_batch_pool.h"
 
 #define I915_CMD_HASH_ORDER 9
 
@@ -133,6 +134,13 @@
 	struct		drm_device *dev;
 	struct intel_ringbuffer *buffer;
 
+	/*
+	 * A pool of objects to use as shadow copies of client batch buffers
+	 * when the command parser is enabled. Prevents the client from
+	 * modifying the batch contents after software parsing.
+	 */
+	struct i915_gem_batch_pool batch_pool;
+
 	struct intel_hw_status_page status_page;
 
 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
@@ -390,6 +398,8 @@
 void intel_stop_ring_buffer(struct intel_engine_cs *ring);
 void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 
+int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
+
 int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
 int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
 static inline void intel_ring_emit(struct intel_engine_cs *ring,
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index f5b7e1e..10cd332 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2291,10 +2291,11 @@
 	else
 		mapping = &dev_priv->sdvo_mappings[1];
 
-	if (mapping->initialized && intel_gmbus_is_port_valid(mapping->i2c_pin))
+	if (mapping->initialized &&
+	    intel_gmbus_is_valid_pin(dev_priv, mapping->i2c_pin))
 		pin = mapping->i2c_pin;
 	else
-		pin = GMBUS_PORT_DPB;
+		pin = GMBUS_PIN_DPB;
 
 	sdvo->i2c = intel_gmbus_get_adapter(dev_priv, pin);
 
@@ -2426,6 +2427,22 @@
 	}
 }
 
+static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void)
+{
+	struct intel_sdvo_connector *sdvo_connector;
+
+	sdvo_connector = kzalloc(sizeof(*sdvo_connector), GFP_KERNEL);
+	if (!sdvo_connector)
+		return NULL;
+
+	if (intel_connector_init(&sdvo_connector->base) < 0) {
+		kfree(sdvo_connector);
+		return NULL;
+	}
+
+	return sdvo_connector;
+}
+
 static bool
 intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
 {
@@ -2437,7 +2454,7 @@
 
 	DRM_DEBUG_KMS("initialising DVI device %d\n", device);
 
-	intel_sdvo_connector = kzalloc(sizeof(*intel_sdvo_connector), GFP_KERNEL);
+	intel_sdvo_connector = intel_sdvo_connector_alloc();
 	if (!intel_sdvo_connector)
 		return false;
 
@@ -2491,7 +2508,7 @@
 
 	DRM_DEBUG_KMS("initialising TV type %d\n", type);
 
-	intel_sdvo_connector = kzalloc(sizeof(*intel_sdvo_connector), GFP_KERNEL);
+	intel_sdvo_connector = intel_sdvo_connector_alloc();
 	if (!intel_sdvo_connector)
 		return false;
 
@@ -2570,7 +2587,7 @@
 
 	DRM_DEBUG_KMS("initialising LVDS device %d\n", device);
 
-	intel_sdvo_connector = kzalloc(sizeof(*intel_sdvo_connector), GFP_KERNEL);
+	intel_sdvo_connector = intel_sdvo_connector_alloc();
 	if (!intel_sdvo_connector)
 		return false;
 
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index a4c0a04..e3d41c0 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -190,10 +190,13 @@
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	const int pipe = intel_plane->pipe;
 	const int plane = intel_plane->plane + 1;
-	u32 plane_ctl, stride_div;
+	u32 plane_ctl, stride_div, stride;
 	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
 	const struct drm_intel_sprite_colorkey *key = &intel_plane->ckey;
 	unsigned long surf_addr;
+	u32 tile_height, plane_offset, plane_size;
+	unsigned int rotation;
+	int x_offset, y_offset;
 
 	plane_ctl = PLANE_CTL_ENABLE |
 		PLANE_CTL_PIPE_CSC_ENABLE;
@@ -254,8 +257,20 @@
 		MISSING_CASE(fb->modifier[0]);
 	}
 
-	if (drm_plane->state->rotation == BIT(DRM_ROTATE_180))
+	rotation = drm_plane->state->rotation;
+	switch (rotation) {
+	case BIT(DRM_ROTATE_90):
+		plane_ctl |= PLANE_CTL_ROTATE_90;
+		break;
+
+	case BIT(DRM_ROTATE_180):
 		plane_ctl |= PLANE_CTL_ROTATE_180;
+		break;
+
+	case BIT(DRM_ROTATE_270):
+		plane_ctl |= PLANE_CTL_ROTATE_270;
+		break;
+	}
 
 	intel_update_sprite_watermarks(drm_plane, crtc, src_w, src_h,
 				       pixel_size, true,
@@ -283,10 +298,26 @@
 
 	surf_addr = intel_plane_obj_offset(intel_plane, obj);
 
-	I915_WRITE(PLANE_OFFSET(pipe, plane), (y << 16) | x);
-	I915_WRITE(PLANE_STRIDE(pipe, plane), fb->pitches[0] / stride_div);
+	if (intel_rotation_90_or_270(rotation)) {
+		/* stride: Surface height in tiles */
+		tile_height = intel_tile_height(dev, fb->bits_per_pixel,
+							fb->modifier[0]);
+		stride = DIV_ROUND_UP(fb->height, tile_height);
+		plane_size = (src_w << 16) | src_h;
+		x_offset = stride * tile_height - y - (src_h + 1);
+		y_offset = x;
+	} else {
+		stride = fb->pitches[0] / stride_div;
+		plane_size = (src_h << 16) | src_w;
+		x_offset = x;
+		y_offset = y;
+	}
+	plane_offset = y_offset << 16 | x_offset;
+
+	I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset);
+	I915_WRITE(PLANE_STRIDE(pipe, plane), stride);
 	I915_WRITE(PLANE_POS(pipe, plane), (crtc_y << 16) | crtc_x);
-	I915_WRITE(PLANE_SIZE(pipe, plane), (crtc_h << 16) | crtc_w);
+	I915_WRITE(PLANE_SIZE(pipe, plane), plane_size);
 	I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl);
 	I915_WRITE(PLANE_SURF(pipe, plane), surf_addr);
 	POSTING_READ(PLANE_SURF(pipe, plane));
@@ -1006,10 +1037,10 @@
 	}
 
 	if (state->visible) {
-		src->x1 = src_x;
-		src->x2 = src_x + src_w;
-		src->y1 = src_y;
-		src->y2 = src_y + src_h;
+		src->x1 = src_x << 16;
+		src->x2 = (src_x + src_w) << 16;
+		src->y1 = src_y << 16;
+		src->y2 = (src_y + src_h) << 16;
 	}
 
 	dst->x1 = crtc_x;
@@ -1081,10 +1112,10 @@
 			crtc_y = state->dst.y1;
 			crtc_w = drm_rect_width(&state->dst);
 			crtc_h = drm_rect_height(&state->dst);
-			src_x = state->src.x1;
-			src_y = state->src.y1;
-			src_w = drm_rect_width(&state->src);
-			src_h = drm_rect_height(&state->src);
+			src_x = state->src.x1 >> 16;
+			src_y = state->src.y1 >> 16;
+			src_w = drm_rect_width(&state->src) >> 16;
+			src_h = drm_rect_height(&state->src) >> 16;
 			intel_plane->update_plane(plane, crtc, fb,
 						  crtc_x, crtc_y, crtc_w, crtc_h,
 						  src_x, src_y, src_w, src_h);
@@ -1139,11 +1170,11 @@
 	if (!plane->crtc || !plane->state->fb)
 		return 0;
 
-	return plane->funcs->update_plane(plane, plane->crtc, plane->state->fb,
-				  plane->state->crtc_x, plane->state->crtc_y,
-				  plane->state->crtc_w, plane->state->crtc_h,
-				  plane->state->src_x, plane->state->src_y,
-				  plane->state->src_w, plane->state->src_h);
+	return drm_plane_helper_update(plane, plane->crtc, plane->state->fb,
+				       plane->state->crtc_x, plane->state->crtc_y,
+				       plane->state->crtc_w, plane->state->crtc_h,
+				       plane->state->src_x, plane->state->src_y,
+				       plane->state->src_w, plane->state->src_h);
 }
 
 static uint32_t ilk_plane_formats[] = {
@@ -1263,6 +1294,7 @@
 		intel_plane->max_downscale = 1;
 		intel_plane->update_plane = skl_update_plane;
 		intel_plane->disable_plane = skl_disable_plane;
+		state->scaler_id = -1;
 
 		plane_formats = skl_plane_formats;
 		num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1276,6 +1308,7 @@
 	intel_plane->plane = plane;
 	intel_plane->check_plane = intel_check_sprite_plane;
 	intel_plane->commit_plane = intel_commit_sprite_plane;
+	intel_plane->ckey.flags = I915_SET_COLORKEY_NONE;
 	possible_crtcs = (1 << pipe);
 	ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs,
 				       &intel_plane_funcs,
@@ -1286,16 +1319,7 @@
 		goto out;
 	}
 
-	if (!dev->mode_config.rotation_property)
-		dev->mode_config.rotation_property =
-			drm_mode_create_rotation_property(dev,
-							  BIT(DRM_ROTATE_0) |
-							  BIT(DRM_ROTATE_180));
-
-	if (dev->mode_config.rotation_property)
-		drm_object_attach_property(&intel_plane->base.base,
-					   dev->mode_config.rotation_property,
-					   state->base.rotation);
+	intel_create_rotation_property(dev, intel_plane);
 
 	drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs);
 
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index bc1d9d7..8b9d325 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1621,7 +1621,7 @@
 		return;
 	}
 
-	intel_connector = kzalloc(sizeof(*intel_connector), GFP_KERNEL);
+	intel_connector = intel_connector_alloc();
 	if (!intel_connector) {
 		kfree(intel_tv);
 		return;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index ab5cc94..d96d15f 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -375,6 +375,26 @@
 	intel_disable_gt_powersave(dev);
 }
 
+static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
+					 enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *domain;
+	enum forcewake_domain_id id;
+
+	if (!dev_priv->uncore.funcs.force_wake_get)
+		return;
+
+	fw_domains &= dev_priv->uncore.fw_domains;
+
+	for_each_fw_domain_mask(domain, fw_domains, dev_priv, id) {
+		if (domain->wake_count++)
+			fw_domains &= ~(1 << id);
+	}
+
+	if (fw_domains)
+		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
+}
+
 /**
  * intel_uncore_forcewake_get - grab forcewake domain references
  * @dev_priv: i915 device instance
@@ -392,27 +412,57 @@
 				enum forcewake_domains fw_domains)
 {
 	unsigned long irqflags;
-	struct intel_uncore_forcewake_domain *domain;
-	enum forcewake_domain_id id;
 
 	if (!dev_priv->uncore.funcs.force_wake_get)
 		return;
 
 	WARN_ON(dev_priv->pm.suspended);
 
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	__intel_uncore_forcewake_get(dev_priv, fw_domains);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+/**
+ * intel_uncore_forcewake_get__locked - grab forcewake domain references
+ * @dev_priv: i915 device instance
+ * @fw_domains: forcewake domains to get reference on
+ *
+ * See intel_uncore_forcewake_get(). This variant places the onus
+ * on the caller to explicitly handle the dev_priv->uncore.lock spinlock.
+ */
+void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains fw_domains)
+{
+	assert_spin_locked(&dev_priv->uncore.lock);
+
+	if (!dev_priv->uncore.funcs.force_wake_get)
+		return;
+
+	__intel_uncore_forcewake_get(dev_priv, fw_domains);
+}
+
+static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
+					 enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *domain;
+	enum forcewake_domain_id id;
+
+	if (!dev_priv->uncore.funcs.force_wake_put)
+		return;
+
 	fw_domains &= dev_priv->uncore.fw_domains;
 
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
 	for_each_fw_domain_mask(domain, fw_domains, dev_priv, id) {
-		if (domain->wake_count++)
-			fw_domains &= ~(1 << id);
+		if (WARN_ON(domain->wake_count == 0))
+			continue;
+
+		if (--domain->wake_count)
+			continue;
+
+		domain->wake_count++;
+		fw_domain_arm_timer(domain);
 	}
-
-	if (fw_domains)
-		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
 /**
@@ -427,30 +477,34 @@
 				enum forcewake_domains fw_domains)
 {
 	unsigned long irqflags;
-	struct intel_uncore_forcewake_domain *domain;
-	enum forcewake_domain_id id;
 
 	if (!dev_priv->uncore.funcs.force_wake_put)
 		return;
 
-	fw_domains &= dev_priv->uncore.fw_domains;
-
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
-	for_each_fw_domain_mask(domain, fw_domains, dev_priv, id) {
-		if (WARN_ON(domain->wake_count == 0))
-			continue;
-
-		if (--domain->wake_count)
-			continue;
-
-		domain->wake_count++;
-		fw_domain_arm_timer(domain);
-	}
-
+	__intel_uncore_forcewake_put(dev_priv, fw_domains);
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+/**
+ * intel_uncore_forcewake_put__locked - grab forcewake domain references
+ * @dev_priv: i915 device instance
+ * @fw_domains: forcewake domains to get reference on
+ *
+ * See intel_uncore_forcewake_put(). This variant places the onus
+ * on the caller to explicitly handle the dev_priv->uncore.lock spinlock.
+ */
+void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains fw_domains)
+{
+	assert_spin_locked(&dev_priv->uncore.lock);
+
+	if (!dev_priv->uncore.funcs.force_wake_put)
+		return;
+
+	__intel_uncore_forcewake_put(dev_priv, fw_domains);
+}
+
 void assert_forcewakes_inactive(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore_forcewake_domain *domain;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 2e80ad1..150b93c 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -1263,6 +1263,7 @@
 			  bool is_primary);
 extern void drm_plane_cleanup(struct drm_plane *plane);
 extern unsigned int drm_plane_index(struct drm_plane *plane);
+extern struct drm_plane * drm_plane_from_index(struct drm_device *dev, int idx);
 extern void drm_plane_force_disable(struct drm_plane *plane);
 extern int drm_plane_check_pixel_format(const struct drm_plane *plane,
 					u32 format);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..4851d66 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -996,6 +996,7 @@
 /* flags */
 #define I915_OVERLAY_UPDATE_ATTRS	(1<<0)
 #define I915_OVERLAY_UPDATE_GAMMA	(1<<1)
+#define I915_OVERLAY_DISABLE_DEST_COLORKEY	(1<<2)
 struct drm_intel_overlay_attrs {
 	__u32 flags;
 	__u32 color_key;