Merge tag 'drm-intel-next-2017-12-01' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

[airlied: fix conflict in intel_dsi.c]

drm-intel-next-2017-12-01:

- Init clock gate fix (Ville)
- Execlists event handling corrections (Chris, Michel)
- Improvements on GPU Cache invalidation and context switch (Chris)
- More perf OA changes (Lionel)
- More selftests improvements and fixes (Chris, Matthew)
- Clean-up on modules parameters (Chris)
- Clean-up around old ringbuffer submission and hw semaphore on old platforms (Chris)
- More Cannonlake stabilization effort (David, James)
- Display planes clean-up and improvements (Ville)
- New PMU interface for perf queries... (Tvrtko)
- ... and other subsequent PMU changes and fixes (Tvrtko, Chris)
- Remove success dmesg noise from rotation (Chris)
- New DMC for Kabylake (Anusha)
- Fixes around atomic commits (Daniel)
- GuC updates and fixes (Sagar, Michal, Chris)
- Couple gmbus/i2c fixes (Ville)
- Use exponential backoff for all our wait_for() (Chris)
- Fixes for i915/fbdev (Chris)
- Backlight fixes (Arnd)
- Updates on shrinker (Chris)
- Make Hotplug enable more robuts (Chris)
- Disable huge pages (TPH) on lack of a needed workaround (Joonas)
- New GuC images for SKL, KBL, BXT (Sagar)
- Add HW Workaround for Geminilake performance (Valtteri)
- Fixes for PPS timings (Imre)
- More IPS fixes (Maarten)
- Many fixes for Display Port on gen2-gen4 (Ville)
- Retry GPU reset making the recover from hang more robust (Chris)

* tag 'drm-intel-next-2017-12-01' of git://anongit.freedesktop.org/drm/drm-intel: (101 commits)
  drm/i915: Update DRIVER_DATE to 20171201
  drm/i915/cnl: Mask previous DDI - PLL mapping
  drm/i915: Remove unsafe i915.enable_rc6
  drm/i915: Sleep and retry a GPU reset if at first we don't succeed
  drm/i915: Interlaced DP output doesn't work on VLV/CHV
  drm/i915: Pass crtc state to intel_pipe_{enable,disable}()
  drm/i915: Wait for pipe to start on i830 as well
  drm/i915: Fix vblank timestamp/frame counter jumps on gen2
  drm/i915: Fix deadlock in i830_disable_pipe()
  drm/i915: Fix has_audio readout for DDI A
  drm/i915: Don't add the "force audio" property to DP connectors that don't support audio
  drm/i915: Disable DP audio for g4x
  drm/i915/selftests: Wake the device before executing requests on the GPU
  drm/i915: Set fake_vma.size as well as fake_vma.node.size for capture
  drm/i915: Tidy up signed/unsigned comparison
  drm/i915: Enable IPS with only sprite plane visible too, v4.
  drm/i915: Make ips_enabled a property depending on whether IPS is enabled, v3.
  drm/i915: Avoid PPS HW/SW state mismatch due to rounding
  drm/i915: Skip switch-to-kernel-context on suspend when wedged
  drm/i915/glk: Apply WaProgramL3SqcReg1DefaultForPerf for GLK too
  ...
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 9e53edbc..fa36491 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -18,6 +18,7 @@
 config DRM_I915_DEBUG
         bool "Enable additional driver debugging"
         depends on DRM_I915
+        select DEBUG_FS
         select PREEMPT_COUNT
         select I2C_CHARDEV
         select DRM_DP_AUX_CHARDEV
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 49b9535..091aef2 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -46,6 +46,7 @@
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 0672178..dac12c2 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -294,8 +294,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
 		 * write.
 		 */
 		if (mmio->in_context &&
-				((ctx_ctrl & inhibit_mask) != inhibit_mask) &&
-				i915_modparams.enable_execlists)
+		    (ctx_ctrl & inhibit_mask) != inhibit_mask)
 			continue;
 
 		if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index b11629b..ccb5ba0 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -26,6 +26,7 @@
  */
 
 #include "i915_drv.h"
+#include "intel_ringbuffer.h"
 
 /**
  * DOC: batch buffer command parser
@@ -940,7 +941,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
 		return;
 	}
 
-	engine->needs_cmd_parser = true;
+	engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
 }
 
 /**
@@ -952,7 +953,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
 {
-	if (!engine->needs_cmd_parser)
+	if (!intel_engine_needs_cmd_parser(engine))
 		return;
 
 	fini_hash_table(engine);
@@ -1350,7 +1351,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
 
 	/* If the command parser is not enabled, report 0 - unsupported */
 	for_each_engine(engine, dev_priv, id) {
-		if (engine->needs_cmd_parser) {
+		if (intel_engine_needs_cmd_parser(engine)) {
 			active = true;
 			break;
 		}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index df3852c..2829447 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1151,13 +1151,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
 		rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
 		rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
-		if (INTEL_GEN(dev_priv) >= 9)
-			cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
-		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-			cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
-		else
-			cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
-		cagf = intel_gpu_freq(dev_priv, cagf);
+		cagf = intel_gpu_freq(dev_priv,
+				      intel_get_cagf(dev_priv, rpstat));
 
 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
@@ -1989,75 +1984,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 	return 0;
 }
 
-static void i915_dump_lrc_obj(struct seq_file *m,
-			      struct i915_gem_context *ctx,
-			      struct intel_engine_cs *engine)
-{
-	struct i915_vma *vma = ctx->engine[engine->id].state;
-	struct page *page;
-	int j;
-
-	seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
-
-	if (!vma) {
-		seq_puts(m, "\tFake context\n");
-		return;
-	}
-
-	if (vma->flags & I915_VMA_GLOBAL_BIND)
-		seq_printf(m, "\tBound in GGTT at 0x%08x\n",
-			   i915_ggtt_offset(vma));
-
-	if (i915_gem_object_pin_pages(vma->obj)) {
-		seq_puts(m, "\tFailed to get pages for context object\n\n");
-		return;
-	}
-
-	page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN);
-	if (page) {
-		u32 *reg_state = kmap_atomic(page);
-
-		for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
-			seq_printf(m,
-				   "\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
-				   j * 4,
-				   reg_state[j], reg_state[j + 1],
-				   reg_state[j + 2], reg_state[j + 3]);
-		}
-		kunmap_atomic(reg_state);
-	}
-
-	i915_gem_object_unpin_pages(vma->obj);
-	seq_putc(m, '\n');
-}
-
-static int i915_dump_lrc(struct seq_file *m, void *unused)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
-	enum intel_engine_id id;
-	int ret;
-
-	if (!i915_modparams.enable_execlists) {
-		seq_printf(m, "Logical Ring Contexts are disabled\n");
-		return 0;
-	}
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link)
-		for_each_engine(engine, dev_priv, id)
-			i915_dump_lrc_obj(m, ctx, engine);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	return 0;
-}
-
 static const char *swizzle_string(unsigned swizzle)
 {
 	switch (swizzle) {
@@ -3304,69 +3230,6 @@ static int i915_shrinker_info(struct seq_file *m, void *unused)
 	return 0;
 }
 
-static int i915_semaphore_status(struct seq_file *m, void *unused)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
-	int num_rings = INTEL_INFO(dev_priv)->num_rings;
-	enum intel_engine_id id;
-	int j, ret;
-
-	if (!i915_modparams.semaphores) {
-		seq_puts(m, "Semaphores are disabled\n");
-		return 0;
-	}
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-	intel_runtime_pm_get(dev_priv);
-
-	if (IS_BROADWELL(dev_priv)) {
-		struct page *page;
-		uint64_t *seqno;
-
-		page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0);
-
-		seqno = (uint64_t *)kmap_atomic(page);
-		for_each_engine(engine, dev_priv, id) {
-			uint64_t offset;
-
-			seq_printf(m, "%s\n", engine->name);
-
-			seq_puts(m, "  Last signal:");
-			for (j = 0; j < num_rings; j++) {
-				offset = id * I915_NUM_ENGINES + j;
-				seq_printf(m, "0x%08llx (0x%02llx) ",
-					   seqno[offset], offset * 8);
-			}
-			seq_putc(m, '\n');
-
-			seq_puts(m, "  Last wait:  ");
-			for (j = 0; j < num_rings; j++) {
-				offset = id + (j * I915_NUM_ENGINES);
-				seq_printf(m, "0x%08llx (0x%02llx) ",
-					   seqno[offset], offset * 8);
-			}
-			seq_putc(m, '\n');
-
-		}
-		kunmap_atomic(seqno);
-	} else {
-		seq_puts(m, "  Last signal:");
-		for_each_engine(engine, dev_priv, id)
-			for (j = 0; j < num_rings; j++)
-				seq_printf(m, "0x%08x\n",
-					   I915_READ(engine->semaphore.mbox.signal[j]));
-		seq_putc(m, '\n');
-	}
-
-	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
-	return 0;
-}
-
 static int i915_shared_dplls_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -4833,7 +4696,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_vbt", i915_vbt, 0},
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
 	{"i915_context_status", i915_context_status, 0},
-	{"i915_dump_lrc", i915_dump_lrc, 0},
 	{"i915_forcewake_domains", i915_forcewake_domains, 0},
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
@@ -4847,7 +4709,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_display_info", i915_display_info, 0},
 	{"i915_engine_info", i915_engine_info, 0},
 	{"i915_shrinker_info", i915_shrinker_info, 0},
-	{"i915_semaphore_status", i915_semaphore_status, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
 	{"i915_wa_registers", i915_wa_registers, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5170a8e..7faf20a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -48,6 +48,7 @@
 
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "i915_pmu.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
 #include "intel_uc.h"
@@ -321,7 +322,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = USES_PPGTT(dev_priv);
 		break;
 	case I915_PARAM_HAS_SEMAPHORES:
-		value = i915_modparams.semaphores;
+		value = HAS_LEGACY_SEMAPHORES(dev_priv);
 		break;
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
@@ -371,9 +372,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) {
 			value |= I915_SCHEDULER_CAP_ENABLED;
 			value |= I915_SCHEDULER_CAP_PRIORITY;
-
-			if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
-			    i915_modparams.enable_execlists)
+			if (HAS_LOGICAL_RING_PREEMPTION(dev_priv))
 				value |= I915_SCHEDULER_CAP_PREEMPTION;
 		}
 		break;
@@ -694,8 +693,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	/* Only enable hotplug handling once the fbdev is fully set up. */
 	intel_hpd_init(dev_priv);
 
-	drm_kms_helper_poll_init(dev);
-
 	return 0;
 
 cleanup_gem:
@@ -936,8 +933,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 
 	intel_detect_preproduction_hw(dev_priv);
 
-	i915_perf_init(dev_priv);
-
 	return 0;
 
 err_irq:
@@ -954,7 +949,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
  */
 static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 {
-	i915_perf_fini(dev_priv);
 	i915_gem_load_cleanup(dev_priv);
 	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
@@ -1057,10 +1051,6 @@ static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv)
 
 static void intel_sanitize_options(struct drm_i915_private *dev_priv)
 {
-	i915_modparams.enable_execlists =
-		intel_sanitize_enable_execlists(dev_priv,
-						i915_modparams.enable_execlists);
-
 	/*
 	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
 	 * user's requested state against the hardware/driver capabilities.  We
@@ -1072,11 +1062,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
 					    i915_modparams.enable_ppgtt);
 	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915_modparams.enable_ppgtt);
 
-	i915_modparams.semaphores =
-		intel_sanitize_semaphores(dev_priv, i915_modparams.semaphores);
-	DRM_DEBUG_DRIVER("use GPU semaphores? %s\n",
-			 yesno(i915_modparams.semaphores));
-
 	intel_uc_sanitize_options(dev_priv);
 
 	intel_gvt_sanitize_options(dev_priv);
@@ -1101,6 +1086,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 
 	intel_sanitize_options(dev_priv);
 
+	i915_perf_init(dev_priv);
+
 	ret = i915_ggtt_probe_hw(dev_priv);
 	if (ret)
 		return ret;
@@ -1206,6 +1193,8 @@ static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 
+	i915_perf_fini(dev_priv);
+
 	if (pdev->msi_enabled)
 		pci_disable_msi(pdev);
 
@@ -1224,7 +1213,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 
-	i915_gem_shrinker_init(dev_priv);
+	i915_gem_shrinker_register(dev_priv);
+	i915_pmu_register(dev_priv);
 
 	/*
 	 * Notify a valid surface after modesetting,
@@ -1263,6 +1253,13 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 	 * cannot run before the connectors are registered.
 	 */
 	intel_fbdev_initial_config_async(dev);
+
+	/*
+	 * We need to coordinate the hotplugs with the asynchronous fbdev
+	 * configuration, for which we use the fbdev->async_cookie.
+	 */
+	if (INTEL_INFO(dev_priv)->num_pipes)
+		drm_kms_helper_poll_init(dev);
 }
 
 /**
@@ -1274,17 +1271,25 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 	intel_fbdev_unregister(dev_priv);
 	intel_audio_deinit(dev_priv);
 
+	/*
+	 * After flushing the fbdev (incl. a late async config which will
+	 * have delayed queuing of a hotplug event), then flush the hotplug
+	 * events.
+	 */
+	drm_kms_helper_poll_fini(&dev_priv->drm);
+
 	intel_gpu_ips_teardown();
 	acpi_video_unregister();
 	intel_opregion_unregister(dev_priv);
 
 	i915_perf_unregister(dev_priv);
+	i915_pmu_unregister(dev_priv);
 
 	i915_teardown_sysfs(dev_priv);
 	i915_guc_log_unregister(dev_priv);
 	drm_dev_unregister(&dev_priv->drm);
 
-	i915_gem_shrinker_cleanup(dev_priv);
+	i915_gem_shrinker_unregister(dev_priv);
 }
 
 /**
@@ -1872,7 +1877,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	int ret;
+	int i;
 
+	might_sleep();
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
@@ -1895,12 +1902,20 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 		goto error;
 	}
 
-	ret = intel_gpu_reset(i915, ALL_ENGINES);
+	if (!intel_has_gpu_reset(i915)) {
+		DRM_DEBUG_DRIVER("GPU reset disabled\n");
+		goto error;
+	}
+
+	for (i = 0; i < 3; i++) {
+		ret = intel_gpu_reset(i915, ALL_ENGINES);
+		if (ret == 0)
+			break;
+
+		msleep(100);
+	}
 	if (ret) {
-		if (ret != -ENODEV)
-			DRM_ERROR("Failed to reset chip: %i\n", ret);
-		else
-			DRM_DEBUG_DRIVER("GPU reset disabled\n");
+		dev_err(i915->drm.dev, "Failed to reset chip\n");
 		goto error;
 	}
 
@@ -2512,7 +2527,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && intel_rc6_enabled())))
+	if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv))))
 		return -ENODEV;
 
 	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 36bb492..594fd14 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/hash.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/perf_event.h>
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
@@ -79,8 +80,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20171117"
-#define DRIVER_TIMESTAMP	1510958822
+#define DRIVER_DATE		"20171201"
+#define DRIVER_TIMESTAMP	1512176839
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -304,9 +305,9 @@ static inline bool transcoder_is_dsi(enum transcoder transcoder)
 
 /*
  * Global legacy plane identifier. Valid only for primary/sprite
- * planes on pre-g4x, and only for primary planes on g4x+.
+ * planes on pre-g4x, and only for primary planes on g4x-bdw.
  */
-enum plane {
+enum i9xx_plane_id {
 	PLANE_A,
 	PLANE_B,
 	PLANE_C,
@@ -560,13 +561,13 @@ struct i915_hotplug {
 	for_each_power_well_rev(__dev_priv, __power_well)		        \
 		for_each_if ((__power_well)->domains & (__domain_mask))
 
-#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \
+#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \
 	for ((__i) = 0; \
 	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
 		     ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
-		      (plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \
+		      (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
 	     (__i)++) \
-		for_each_if (plane_state)
+		for_each_if (plane)
 
 #define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \
 	for ((__i) = 0; \
@@ -576,7 +577,6 @@ struct i915_hotplug {
 	     (__i)++) \
 		for_each_if (crtc)
 
-
 #define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \
 	for ((__i) = 0; \
 	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
@@ -698,7 +698,8 @@ struct drm_i915_display_funcs {
 			  struct intel_cdclk_state *cdclk_state);
 	void (*set_cdclk)(struct drm_i915_private *dev_priv,
 			  const struct intel_cdclk_state *cdclk_state);
-	int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane);
+	int (*get_fifo_size)(struct drm_i915_private *dev_priv,
+			     enum i9xx_plane_id i9xx_plane);
 	int (*compute_pipe_wm)(struct intel_crtc_state *cstate);
 	int (*compute_intermediate_wm)(struct drm_device *dev,
 				       struct intel_crtc *intel_crtc,
@@ -942,7 +943,6 @@ struct i915_gpu_state {
 	u64 fence[I915_MAX_NUM_FENCES];
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
-	struct drm_i915_error_object *semaphore;
 
 	struct drm_i915_error_engine {
 		int engine_id;
@@ -1009,6 +1009,7 @@ struct i915_gpu_state {
 		long user_bo_count;
 
 		struct drm_i915_error_object *wa_ctx;
+		struct drm_i915_error_object *default_state;
 
 		struct drm_i915_error_request {
 			long jiffies;
@@ -1145,7 +1146,7 @@ struct intel_fbc {
 
 		struct {
 			enum pipe pipe;
-			enum plane plane;
+			enum i9xx_plane_id i9xx_plane;
 			unsigned int fence_y_offset;
 		} crtc;
 
@@ -2291,7 +2292,8 @@ struct drm_i915_private {
 	struct i915_gem_context *kernel_context;
 	/* Context only to be used for injecting preemption commands */
 	struct i915_gem_context *preempt_context;
-	struct i915_vma *semaphore;
+	struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
+					    [MAX_ENGINE_INSTANCE + 1];
 
 	struct drm_dma_handle *status_page_dmah;
 	struct resource mch_res;
@@ -2619,7 +2621,6 @@ struct drm_i915_private {
 
 			bool periodic;
 			int period_exponent;
-			int timestamp_frequency;
 
 			struct i915_oa_config test_config;
 
@@ -2764,6 +2765,8 @@ struct drm_i915_private {
 		int	irq;
 	} lpe_audio;
 
+	struct i915_pmu pmu;
+
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -3142,6 +3145,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_BLT(dev_priv)	HAS_ENGINE(dev_priv, BCS)
 #define HAS_VEBOX(dev_priv)	HAS_ENGINE(dev_priv, VECS)
 
+#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN7(dev_priv)
+
 #define HAS_LLC(dev_priv)	((dev_priv)->info.has_llc)
 #define HAS_SNOOP(dev_priv)	((dev_priv)->info.has_snoop)
 #define HAS_EDRAM(dev_priv)	(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
@@ -3154,6 +3159,9 @@ intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_contexts)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+
+#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
+
 #define USES_PPGTT(dev_priv)		(i915_modparams.enable_ppgtt)
 #define USES_FULL_PPGTT(dev_priv)	(i915_modparams.enable_ppgtt >= 2)
 #define USES_FULL_48BIT_PPGTT(dev_priv)	(i915_modparams.enable_ppgtt == 3)
@@ -3205,8 +3213,10 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_DDI(dev_priv)		 ((dev_priv)->info.has_ddi)
 #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg)
 #define HAS_PSR(dev_priv)		 ((dev_priv)->info.has_psr)
+
 #define HAS_RC6(dev_priv)		 ((dev_priv)->info.has_rc6)
 #define HAS_RC6p(dev_priv)		 ((dev_priv)->info.has_rc6p)
+#define HAS_RC6pp(dev_priv)		 (false) /* HW was never validated */
 
 #define HAS_CSR(dev_priv)	((dev_priv)->info.has_csr)
 
@@ -3302,8 +3312,6 @@ intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 				int enable_ppgtt);
 
-bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value);
-
 /* i915_drv.c */
 void __printf(3, 4)
 __i915_printk(struct drm_i915_private *dev_priv, const char *level,
@@ -3905,7 +3913,7 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
 				phys_addr_t size);
 
 /* i915_gem_shrinker.c */
-unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
+unsigned long i915_gem_shrink(struct drm_i915_private *i915,
 			      unsigned long target,
 			      unsigned long *nr_scanned,
 			      unsigned flags);
@@ -3914,9 +3922,9 @@ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
 #define I915_SHRINK_BOUND 0x4
 #define I915_SHRINK_ACTIVE 0x8
 #define I915_SHRINK_VMAPS 0x10
-unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
-void i915_gem_shrinker_init(struct drm_i915_private *dev_priv);
-void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv);
+unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
+void i915_gem_shrinker_register(struct drm_i915_private *i915);
+void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
 
 
 /* i915_gem_tiling.c */
@@ -4223,9 +4231,17 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 
 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
-u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
+u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
 			   const i915_reg_t reg);
 
+u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1);
+
+static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
+					 const i915_reg_t reg)
+{
+	return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000);
+}
+
 #define I915_READ8(reg)		dev_priv->uncore.funcs.mmio_readb(dev_priv, (reg), true)
 #define I915_WRITE8(reg, val)	dev_priv->uncore.funcs.mmio_writeb(dev_priv, (reg), (val), true)
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 61ba321..e083f24 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3371,7 +3371,9 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	synchronize_irq(dev_priv->drm.irq);
 
 	intel_engines_park(dev_priv);
-	i915_gem_timelines_mark_idle(dev_priv);
+	i915_gem_timelines_park(dev_priv);
+
+	i915_pmu_gt_parked(dev_priv);
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
@@ -4772,17 +4774,19 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	ret = i915_gem_switch_to_kernel_context(dev_priv);
-	if (ret)
-		goto err_unlock;
+	if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
+		ret = i915_gem_switch_to_kernel_context(dev_priv);
+		if (ret)
+			goto err_unlock;
 
-	ret = i915_gem_wait_for_idle(dev_priv,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED);
-	if (ret && ret != -EIO)
-		goto err_unlock;
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
+		if (ret && ret != -EIO)
+			goto err_unlock;
 
-	assert_kernel_context_is_current(dev_priv);
+		assert_kernel_context_is_current(dev_priv);
+	}
 	i915_gem_contexts_lost(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -4997,25 +5001,6 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
-bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
-{
-	if (INTEL_INFO(dev_priv)->gen < 6)
-		return false;
-
-	/* TODO: make semaphores and Execlists play nicely together */
-	if (i915_modparams.enable_execlists)
-		return false;
-
-	if (value >= 0)
-		return value;
-
-	/* Enable semaphores on SNB when IO remapping is off */
-	if (IS_GEN6(dev_priv) && intel_vtd_active())
-		return false;
-
-	return true;
-}
-
 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 {
 	struct i915_gem_context *ctx;
@@ -5045,7 +5030,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			goto out_ctx;
 		}
 
-		err = i915_switch_context(rq);
+		err = 0;
 		if (engine->init_context)
 			err = engine->init_context(rq);
 
@@ -5134,8 +5119,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	/*
 	 * We need to fallback to 4K pages since gvt gtt handling doesn't
 	 * support huge page entries - we will need to check either hypervisor
@@ -5147,26 +5130,27 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
 
-	if (!i915_modparams.enable_execlists) {
-		dev_priv->gt.resume = intel_legacy_submission_resume;
-		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
-	} else {
+	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 		dev_priv->gt.resume = intel_lr_context_resume;
 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
+	} else {
+		dev_priv->gt.resume = intel_legacy_submission_resume;
+		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	}
 
+	ret = i915_gem_init_userptr(dev_priv);
+	if (ret)
+		return ret;
+
 	/* This is just a security blanket to placate dragons.
 	 * On some systems, we very sporadically observe that the first TLBs
 	 * used by the CS may be stale, despite us poking the TLB reset. If
 	 * we hold the forcewake during initialisation these problems
 	 * just magically go away.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
-	ret = i915_gem_init_userptr(dev_priv);
-	if (ret)
-		goto out_unlock;
-
 	ret = i915_gem_init_ggtt(dev_priv);
 	if (ret)
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 2db0406..ce3139e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -460,14 +460,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
 	init_llist_head(&dev_priv->contexts.free_list);
 
-	if (intel_vgpu_active(dev_priv) &&
-	    HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
-		if (!i915_modparams.enable_execlists) {
-			DRM_INFO("Only EXECLIST mode is supported in vgpu.\n");
-			return -EINVAL;
-		}
-	}
-
 	/* Using the simple ida interface, the max is limited by sizeof(int) */
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
 	ida_init(&dev_priv->contexts.hw_ida);
@@ -515,6 +507,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
 
 	for_each_engine(engine, dev_priv, id) {
 		engine->legacy_active_context = NULL;
+		engine->legacy_active_ppgtt = NULL;
 
 		if (!engine->last_retired_context)
 			continue;
@@ -574,300 +567,6 @@ void i915_gem_context_close(struct drm_file *file)
 	idr_destroy(&file_priv->context_idr);
 }
 
-static inline int
-mi_set_context(struct drm_i915_gem_request *req, u32 flags)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	struct intel_engine_cs *engine = req->engine;
-	enum intel_engine_id id;
-	const int num_rings =
-		/* Use an extended w/a on gen7 if signalling from other rings */
-		(i915_modparams.semaphores && INTEL_GEN(dev_priv) == 7) ?
-		INTEL_INFO(dev_priv)->num_rings - 1 :
-		0;
-	int len;
-	u32 *cs;
-
-	flags |= MI_MM_SPACE_GTT;
-	if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
-		/* These flags are for resource streamer on HSW+ */
-		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
-	else
-		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
-
-	len = 4;
-	if (INTEL_GEN(dev_priv) >= 7)
-		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
-
-	cs = intel_ring_begin(req, len);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-	if (INTEL_GEN(dev_priv) >= 7) {
-		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-		if (num_rings) {
-			struct intel_engine_cs *signaller;
-
-			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
-			for_each_engine(signaller, dev_priv, id) {
-				if (signaller == engine)
-					continue;
-
-				*cs++ = i915_mmio_reg_offset(
-					   RING_PSMI_CTL(signaller->mmio_base));
-				*cs++ = _MASKED_BIT_ENABLE(
-						GEN6_PSMI_SLEEP_MSG_DISABLE);
-			}
-		}
-	}
-
-	*cs++ = MI_NOOP;
-	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags;
-	/*
-	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
-	 * WaMiSetContext_Hang:snb,ivb,vlv
-	 */
-	*cs++ = MI_NOOP;
-
-	if (INTEL_GEN(dev_priv) >= 7) {
-		if (num_rings) {
-			struct intel_engine_cs *signaller;
-			i915_reg_t last_reg = {}; /* keep gcc quiet */
-
-			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
-			for_each_engine(signaller, dev_priv, id) {
-				if (signaller == engine)
-					continue;
-
-				last_reg = RING_PSMI_CTL(signaller->mmio_base);
-				*cs++ = i915_mmio_reg_offset(last_reg);
-				*cs++ = _MASKED_BIT_DISABLE(
-						GEN6_PSMI_SLEEP_MSG_DISABLE);
-			}
-
-			/* Insert a delay before the next switch! */
-			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-			*cs++ = i915_mmio_reg_offset(last_reg);
-			*cs++ = i915_ggtt_offset(engine->scratch);
-			*cs++ = MI_NOOP;
-		}
-		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	}
-
-	intel_ring_advance(req, cs);
-
-	return 0;
-}
-
-static int remap_l3(struct drm_i915_gem_request *req, int slice)
-{
-	u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice];
-	int i;
-
-	if (!remap_info)
-		return 0;
-
-	cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/*
-	 * Note: We do not worry about the concurrent register cacheline hang
-	 * here because no other code should access these registers other than
-	 * at initialization time.
-	 */
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
-	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
-		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
-		*cs++ = remap_info[i];
-	}
-	*cs++ = MI_NOOP;
-	intel_ring_advance(req, cs);
-
-	return 0;
-}
-
-static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
-				   struct intel_engine_cs *engine,
-				   struct i915_gem_context *to)
-{
-	if (to->remap_slice)
-		return false;
-
-	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
-		return false;
-
-	return to == engine->legacy_active_context;
-}
-
-static bool
-needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine)
-{
-	struct i915_gem_context *from = engine->legacy_active_context;
-
-	if (!ppgtt)
-		return false;
-
-	/* Always load the ppgtt on first use */
-	if (!from)
-		return true;
-
-	/* Same context without new entries, skip */
-	if ((!from->ppgtt || from->ppgtt == ppgtt) &&
-	    !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
-		return false;
-
-	if (engine->id != RCS)
-		return true;
-
-	if (INTEL_GEN(engine->i915) < 8)
-		return true;
-
-	return false;
-}
-
-static bool
-needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
-		   struct i915_gem_context *to,
-		   u32 hw_flags)
-{
-	if (!ppgtt)
-		return false;
-
-	if (!IS_GEN8(to->i915))
-		return false;
-
-	if (hw_flags & MI_RESTORE_INHIBIT)
-		return true;
-
-	return false;
-}
-
-static int do_rcs_switch(struct drm_i915_gem_request *req)
-{
-	struct i915_gem_context *to = req->ctx;
-	struct intel_engine_cs *engine = req->engine;
-	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
-	struct i915_gem_context *from = engine->legacy_active_context;
-	u32 hw_flags;
-	int ret, i;
-
-	GEM_BUG_ON(engine->id != RCS);
-
-	if (skip_rcs_switch(ppgtt, engine, to))
-		return 0;
-
-	if (needs_pd_load_pre(ppgtt, engine)) {
-		/* Older GENs and non render rings still want the load first,
-		 * "PP_DCLV followed by PP_DIR_BASE register through Load
-		 * Register Immediate commands in Ring Buffer before submitting
-		 * a context."*/
-		trace_switch_mm(engine, to);
-		ret = ppgtt->switch_mm(ppgtt, req);
-		if (ret)
-			return ret;
-	}
-
-	if (i915_gem_context_is_kernel(to))
-		/*
-		 * The kernel context(s) is treated as pure scratch and is not
-		 * expected to retain any state (as we sacrifice it during
-		 * suspend and on resume it may be corrupted). This is ok,
-		 * as nothing actually executes using the kernel context; it
-		 * is purely used for flushing user contexts.
-		 */
-		hw_flags = MI_RESTORE_INHIBIT;
-	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
-		hw_flags = MI_FORCE_RESTORE;
-	else
-		hw_flags = 0;
-
-	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
-		ret = mi_set_context(req, hw_flags);
-		if (ret)
-			return ret;
-
-		engine->legacy_active_context = to;
-	}
-
-	/* GEN8 does *not* require an explicit reload if the PDPs have been
-	 * setup, and we do not wish to move them.
-	 */
-	if (needs_pd_load_post(ppgtt, to, hw_flags)) {
-		trace_switch_mm(engine, to);
-		ret = ppgtt->switch_mm(ppgtt, req);
-		/* The hardware context switch is emitted, but we haven't
-		 * actually changed the state - so it's probably safe to bail
-		 * here. Still, let the user know something dangerous has
-		 * happened.
-		 */
-		if (ret)
-			return ret;
-	}
-
-	if (ppgtt)
-		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
-
-	for (i = 0; i < MAX_L3_SLICES; i++) {
-		if (!(to->remap_slice & (1<<i)))
-			continue;
-
-		ret = remap_l3(req, i);
-		if (ret)
-			return ret;
-
-		to->remap_slice &= ~(1<<i);
-	}
-
-	return 0;
-}
-
-/**
- * i915_switch_context() - perform a GPU context switch.
- * @req: request for which we'll execute the context switch
- *
- * The context life cycle is simple. The context refcount is incremented and
- * decremented by 1 and create and destroy. If the context is in use by the GPU,
- * it will have a refcount > 1. This allows us to destroy the context abstract
- * object while letting the normal object tracking destroy the backing BO.
- *
- * This function should not be used in execlists mode.  Instead the context is
- * switched by writing to the ELSP and requests keep a reference to their
- * context.
- */
-int i915_switch_context(struct drm_i915_gem_request *req)
-{
-	struct intel_engine_cs *engine = req->engine;
-
-	lockdep_assert_held(&req->i915->drm.struct_mutex);
-	if (i915_modparams.enable_execlists)
-		return 0;
-
-	if (!req->ctx->engine[engine->id].state) {
-		struct i915_gem_context *to = req->ctx;
-		struct i915_hw_ppgtt *ppgtt =
-			to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
-
-		if (needs_pd_load_pre(ppgtt, engine)) {
-			int ret;
-
-			trace_switch_mm(engine, to);
-			ret = ppgtt->switch_mm(ppgtt, req);
-			if (ret)
-				return ret;
-
-			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
-		}
-
-		engine->legacy_active_context = to;
-		return 0;
-	}
-
-	return do_rcs_switch(req);
-}
-
 static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
 {
 	struct i915_gem_timeline *timeline;
@@ -899,7 +598,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *req;
-		int ret;
 
 		if (engine_has_idle_kernel_context(engine))
 			continue;
@@ -922,10 +620,14 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 								 GFP_KERNEL);
 		}
 
-		ret = i915_switch_context(req);
-		i915_add_request(req);
-		if (ret)
-			return ret;
+		/*
+		 * Force a flush after the switch to ensure that all rendering
+		 * and operations prior to switching to the kernel context hits
+		 * memory. This should be guaranteed by the previous request,
+		 * but an extra layer of paranoia before we declare the system
+		 * idle (on suspend etc) is advisable!
+		 */
+		__i915_add_request(req, true);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 53ccb27..70ccd63 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -271,7 +271,7 @@ static inline u64 gen8_noncanonical_addr(u64 address)
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
-	return eb->engine->needs_cmd_parser && eb->batch_len;
+	return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
 }
 
 static int eb_create(struct i915_execbuffer *eb)
@@ -1111,14 +1111,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_request;
 
-	err = eb->engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (err)
-		goto err_request;
-
-	err = i915_switch_context(rq);
-	if (err)
-		goto err_request;
-
 	err = eb->engine->emit_bb_start(rq,
 					batch->node.start, PAGE_SIZE,
 					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
@@ -1818,8 +1810,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	i915_gem_chipset_flush(eb->i915);
 
-	/* Unconditionally invalidate GPU caches and TLBs. */
-	return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE);
+	return 0;
 }
 
 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
@@ -1965,10 +1956,6 @@ static int eb_submit(struct i915_execbuffer *eb)
 	if (err)
 		return err;
 
-	err = i915_switch_context(eb->request);
-	if (err)
-		return err;
-
 	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		err = i915_reset_gen7_sol_offsets(eb->request);
 		if (err)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 64e8ae1..f3c35e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -178,7 +178,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 		return 0;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 8 && i915_modparams.enable_execlists) {
+	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 		if (has_full_48bit_ppgtt)
 			return 3;
 
@@ -2162,7 +2162,7 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
 	/* In the case of execlists, PPGTT is enabled by the context descriptor
 	 * and the PDPs are contained within the context itself.  We don't
 	 * need to do anything here. */
-	if (i915_modparams.enable_execlists)
+	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
 		return 0;
 
 	if (!USES_PPGTT(dev_priv))
@@ -3737,9 +3737,6 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
 				  rot_info->plane[i].stride, st, sg);
 	}
 
-	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
-		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
-
 	kvfree(page_addr_list);
 
 	return st;
@@ -3749,8 +3746,8 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
 err_st_alloc:
 	kvfree(page_addr_list);
 
-	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+	DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+			 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
 
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index c2723a0..f7fc0df 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -208,10 +208,6 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *rq)
 	if (err)
 		goto err_unpin;
 
-	err = engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (err)
-		goto err_unpin;
-
 	err = engine->emit_bb_start(rq,
 				    so.batch_offset, so.batch_size,
 				    I915_DISPATCH_SECURE);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e0d6221..a90bdd2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -258,6 +258,7 @@ static void mark_busy(struct drm_i915_private *i915)
 	i915_update_gfx_val(i915);
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_busy(i915);
+	i915_pmu_gt_unparked(i915);
 
 	intel_engines_unpark(i915);
 
@@ -624,6 +625,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		goto err_unpin;
 
+	ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST);
+	if (ret)
+		goto err_unreserve;
+
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
@@ -703,22 +708,30 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
 	GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz);
 
-	ret = engine->request_alloc(req);
-	if (ret)
-		goto err_ctx;
-
-	/* Record the position of the start of the request so that
+	/*
+	 * Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
 	req->head = req->ring->emit;
 
+	/* Unconditionally invalidate GPU caches and TLBs. */
+	ret = engine->emit_flush(req, EMIT_INVALIDATE);
+	if (ret)
+		goto err_unwind;
+
+	ret = engine->request_alloc(req);
+	if (ret)
+		goto err_unwind;
+
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
 	return req;
 
-err_ctx:
+err_unwind:
+	req->ring->emit = req->head;
+
 	/* Make sure we didn't add ourselves to external state before freeing */
 	GEM_BUG_ON(!list_empty(&req->active_list));
 	GEM_BUG_ON(!list_empty(&req->priotree.signalers_list));
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 3770e33..9029ed0 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -35,9 +35,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 
-static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
 {
-	switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
+	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_RECURSIVE:
 		*unlock = false;
 		return true;
@@ -47,7 +47,7 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 		preempt_disable();
 		do {
 			cpu_relax();
-			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+			if (mutex_trylock(&i915->drm.struct_mutex)) {
 				*unlock = true;
 				break;
 			}
@@ -63,12 +63,12 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 	BUG();
 }
 
-static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
+static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
 {
 	if (!unlock)
 		return;
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 static bool swap_available(void)
@@ -118,7 +118,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 
 /**
  * i915_gem_shrink - Shrink buffer object caches
- * @dev_priv: i915 device
+ * @i915: i915 device
  * @target: amount of memory to make available, in pages
  * @nr_scanned: optional output for number of pages scanned (incremental)
  * @flags: control flags for selecting cache types
@@ -142,7 +142,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
  * The number of pages of backing storage actually released.
  */
 unsigned long
-i915_gem_shrink(struct drm_i915_private *dev_priv,
+i915_gem_shrink(struct drm_i915_private *i915,
 		unsigned long target,
 		unsigned long *nr_scanned,
 		unsigned flags)
@@ -151,15 +151,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		struct list_head *list;
 		unsigned int bit;
 	} phases[] = {
-		{ &dev_priv->mm.unbound_list, I915_SHRINK_UNBOUND },
-		{ &dev_priv->mm.bound_list, I915_SHRINK_BOUND },
+		{ &i915->mm.unbound_list, I915_SHRINK_UNBOUND },
+		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
 		{ NULL, 0 },
 	}, *phase;
 	unsigned long count = 0;
 	unsigned long scanned = 0;
 	bool unlock;
 
-	if (!shrinker_lock(dev_priv, &unlock))
+	if (!shrinker_lock(i915, &unlock))
 		return 0;
 
 	/*
@@ -172,10 +172,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	 * we will free as much as we can and hope to get a second chance.
 	 */
 	if (flags & I915_SHRINK_ACTIVE)
-		i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
+		i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
 
-	trace_i915_gem_shrink(dev_priv, target, flags);
-	i915_gem_retire_requests(dev_priv);
+	trace_i915_gem_shrink(i915, target, flags);
+	i915_gem_retire_requests(i915);
 
 	/*
 	 * Unbinding of objects will require HW access; Let us not wake the
@@ -183,7 +183,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	 * we will force the wake during oom-notifier.
 	 */
 	if ((flags & I915_SHRINK_BOUND) &&
-	    !intel_runtime_pm_get_if_in_use(dev_priv))
+	    !intel_runtime_pm_get_if_in_use(i915))
 		flags &= ~I915_SHRINK_BOUND;
 
 	/*
@@ -221,7 +221,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		 * to be able to shrink their pages, so they remain on
 		 * the unbound/bound list until actually freed.
 		 */
-		spin_lock(&dev_priv->mm.obj_lock);
+		spin_lock(&i915->mm.obj_lock);
 		while (count < target &&
 		       (obj = list_first_entry_or_null(phase->list,
 						       typeof(*obj),
@@ -244,7 +244,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			if (!can_release_pages(obj))
 				continue;
 
-			spin_unlock(&dev_priv->mm.obj_lock);
+			spin_unlock(&i915->mm.obj_lock);
 
 			if (unsafe_drop_pages(obj)) {
 				/* May arrive from get_pages on another bo */
@@ -258,18 +258,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			}
 			scanned += obj->base.size >> PAGE_SHIFT;
 
-			spin_lock(&dev_priv->mm.obj_lock);
+			spin_lock(&i915->mm.obj_lock);
 		}
 		list_splice_tail(&still_in_list, phase->list);
-		spin_unlock(&dev_priv->mm.obj_lock);
+		spin_unlock(&i915->mm.obj_lock);
 	}
 
 	if (flags & I915_SHRINK_BOUND)
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put(i915);
 
-	i915_gem_retire_requests(dev_priv);
+	i915_gem_retire_requests(i915);
 
-	shrinker_unlock(dev_priv, unlock);
+	shrinker_unlock(i915, unlock);
 
 	if (nr_scanned)
 		*nr_scanned += scanned;
@@ -278,7 +278,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 /**
  * i915_gem_shrink_all - Shrink buffer object caches completely
- * @dev_priv: i915 device
+ * @i915: i915 device
  *
  * This is a simple wraper around i915_gem_shrink() to aggressively shrink all
  * caches completely. It also first waits for and retires all outstanding
@@ -290,16 +290,16 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
  * Returns:
  * The number of pages of backing storage actually released.
  */
-unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
+unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 {
 	unsigned long freed;
 
-	intel_runtime_pm_get(dev_priv);
-	freed = i915_gem_shrink(dev_priv, -1UL, NULL,
+	intel_runtime_pm_get(i915);
+	freed = i915_gem_shrink(i915, -1UL, NULL,
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_ACTIVE);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(i915);
 
 	return freed;
 }
@@ -347,53 +347,53 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 static unsigned long
 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 {
-	struct drm_i915_private *dev_priv =
+	struct drm_i915_private *i915 =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
 	unsigned long freed;
 	bool unlock;
 
 	sc->nr_scanned = 0;
 
-	if (!shrinker_lock(dev_priv, &unlock))
+	if (!shrinker_lock(i915, &unlock))
 		return SHRINK_STOP;
 
-	freed = i915_gem_shrink(dev_priv,
+	freed = i915_gem_shrink(i915,
 				sc->nr_to_scan,
 				&sc->nr_scanned,
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_PURGEABLE);
 	if (freed < sc->nr_to_scan)
-		freed += i915_gem_shrink(dev_priv,
+		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
 					 &sc->nr_scanned,
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
 	if (freed < sc->nr_to_scan && current_is_kswapd()) {
-		intel_runtime_pm_get(dev_priv);
-		freed += i915_gem_shrink(dev_priv,
+		intel_runtime_pm_get(i915);
+		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
 					 &sc->nr_scanned,
 					 I915_SHRINK_ACTIVE |
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put(i915);
 	}
 
-	shrinker_unlock(dev_priv, unlock);
+	shrinker_unlock(i915, unlock);
 
 	return sc->nr_scanned ? freed : SHRINK_STOP;
 }
 
 static bool
-shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
+shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
 			      int timeout_ms)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
 
 	do {
-		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
-		    shrinker_lock(dev_priv, unlock))
+		if (i915_gem_wait_for_idle(i915, 0) == 0 &&
+		    shrinker_lock(i915, unlock))
 			break;
 
 		schedule_timeout_killable(1);
@@ -412,32 +412,32 @@ shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
 static int
 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 {
-	struct drm_i915_private *dev_priv =
+	struct drm_i915_private *i915 =
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
 
-	freed_pages = i915_gem_shrink_all(dev_priv);
+	freed_pages = i915_gem_shrink_all(i915);
 
 	/* Because we may be allocating inside our own driver, we cannot
 	 * assert that there are no objects with pinned pages that are not
 	 * being pointed to by hardware.
 	 */
 	unbound = bound = unevictable = 0;
-	spin_lock(&dev_priv->mm.obj_lock);
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
+	spin_lock(&i915->mm.obj_lock);
+	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) {
 		if (!can_release_pages(obj))
 			unevictable += obj->base.size >> PAGE_SHIFT;
 		else
 			unbound += obj->base.size >> PAGE_SHIFT;
 	}
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
+	list_for_each_entry(obj, &i915->mm.bound_list, mm.link) {
 		if (!can_release_pages(obj))
 			unevictable += obj->base.size >> PAGE_SHIFT;
 		else
 			bound += obj->base.size >> PAGE_SHIFT;
 	}
-	spin_unlock(&dev_priv->mm.obj_lock);
+	spin_unlock(&i915->mm.obj_lock);
 
 	if (freed_pages || unbound || bound)
 		pr_info("Purging GPU memory, %lu pages freed, "
@@ -455,74 +455,74 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 static int
 i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
 {
-	struct drm_i915_private *dev_priv =
+	struct drm_i915_private *i915 =
 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
 	bool unlock;
 	int ret;
 
-	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
+	if (!shrinker_lock_uninterruptible(i915, &unlock, 5000))
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
-	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
+	ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
 	if (ret)
 		goto out;
 
-	intel_runtime_pm_get(dev_priv);
-	freed_pages += i915_gem_shrink(dev_priv, -1UL, NULL,
+	intel_runtime_pm_get(i915);
+	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 				       I915_SHRINK_BOUND |
 				       I915_SHRINK_UNBOUND |
 				       I915_SHRINK_ACTIVE |
 				       I915_SHRINK_VMAPS);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(i915);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
-				 &dev_priv->ggtt.base.inactive_list, vm_link) {
+				 &i915->ggtt.base.inactive_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
 		if (vma->iomap && i915_vma_unbind(vma) == 0)
 			freed_pages += count;
 	}
 
 out:
-	shrinker_unlock(dev_priv, unlock);
+	shrinker_unlock(i915, unlock);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
 }
 
 /**
- * i915_gem_shrinker_init - Initialize i915 shrinker
- * @dev_priv: i915 device
+ * i915_gem_shrinker_register - Register the i915 shrinker
+ * @i915: i915 device
  *
  * This function registers and sets up the i915 shrinker and OOM handler.
  */
-void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
+void i915_gem_shrinker_register(struct drm_i915_private *i915)
 {
-	dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
-	dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
-	dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
-	dev_priv->mm.shrinker.batch = 4096;
-	WARN_ON(register_shrinker(&dev_priv->mm.shrinker));
+	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
+	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
+	i915->mm.shrinker.seeks = DEFAULT_SEEKS;
+	i915->mm.shrinker.batch = 4096;
+	WARN_ON(register_shrinker(&i915->mm.shrinker));
 
-	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
-	WARN_ON(register_oom_notifier(&dev_priv->mm.oom_notifier));
+	i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
+	WARN_ON(register_oom_notifier(&i915->mm.oom_notifier));
 
-	dev_priv->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
-	WARN_ON(register_vmap_purge_notifier(&dev_priv->mm.vmap_notifier));
+	i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
+	WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier));
 }
 
 /**
- * i915_gem_shrinker_cleanup - Clean up i915 shrinker
- * @dev_priv: i915 device
+ * i915_gem_shrinker_unregister - Unregisters the i915 shrinker
+ * @i915: i915 device
  *
  * This function unregisters the i915 shrinker and OOM handler.
  */
-void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv)
+void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
 {
-	WARN_ON(unregister_vmap_purge_notifier(&dev_priv->mm.vmap_notifier));
-	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
-	unregister_shrinker(&dev_priv->mm.shrinker);
+	WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
+	WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
+	unregister_shrinker(&i915->mm.shrinker);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index c597ce2..c01905d 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -107,8 +107,8 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
 }
 
 /**
- * i915_gem_timelines_mark_idle -- called when the driver idles
- * @i915 - the drm_i915_private device
+ * i915_gem_timelines_park - called when the driver idles
+ * @i915: the drm_i915_private device
  *
  * When the driver is completely idle, we know that all of our sync points
  * have been signaled and our tracking is then entirely redundant. Any request
@@ -116,7 +116,7 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
  * the fence is signaled and therefore we will not even look them up in the
  * sync point map.
  */
-void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
+void i915_gem_timelines_park(struct drm_i915_private *i915)
 {
 	struct i915_gem_timeline *timeline;
 	int i;
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index bfb5eb9..b5a2240 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -93,7 +93,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 			   struct i915_gem_timeline *tl,
 			   const char *name);
 int i915_gem_timeline_init__global(struct drm_i915_private *i915);
-void i915_gem_timelines_mark_idle(struct drm_i915_private *i915);
+void i915_gem_timelines_park(struct drm_i915_private *i915);
 void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
 
 static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7481c8e..48418fb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -791,9 +791,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 		print_error_obj(m, dev_priv->engine[i],
 				"WA batchbuffer", ee->wa_batchbuffer);
-	}
 
-	print_error_obj(m, NULL, "Semaphores", error->semaphore);
+		print_error_obj(m, dev_priv->engine[i],
+				"NULL context", ee->default_state);
+	}
 
 	if (error->overlay)
 		intel_overlay_print_error_state(m, error->overlay);
@@ -903,8 +904,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
 			kfree(ee->waiters);
 	}
 
-	i915_error_object_free(error->semaphore);
-
 	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
 		kfree(error->active_bo[i]);
 	kfree(error->pinned_bo);
@@ -1116,34 +1115,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine,
 	return idx;
 }
 
-static void gen8_record_semaphore_state(struct i915_gpu_state *error,
-					struct intel_engine_cs *engine,
-					struct drm_i915_error_engine *ee)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	struct intel_engine_cs *to;
-	enum intel_engine_id id;
-
-	if (!error->semaphore)
-		return;
-
-	for_each_engine(to, dev_priv, id) {
-		int idx;
-		u16 signal_offset;
-		u32 *tmp;
-
-		if (engine == to)
-			continue;
-
-		signal_offset =
-			(GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
-		tmp = error->semaphore->pages[0];
-		idx = gen8_engine_sync_index(engine, to);
-
-		ee->semaphore_mboxes[idx] = tmp[signal_offset];
-	}
-}
-
 static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
 					struct drm_i915_error_engine *ee)
 {
@@ -1218,7 +1189,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	if (INTEL_GEN(dev_priv) >= 6) {
 		ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
 		if (INTEL_GEN(dev_priv) >= 8) {
-			gen8_record_semaphore_state(error, engine, ee);
 			ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
 		} else {
 			gen6_record_semaphore_state(engine, ee);
@@ -1447,15 +1417,30 @@ static void request_record_user_bo(struct drm_i915_gem_request *request,
 	ee->user_bo_count = count;
 }
 
+static struct drm_i915_error_object *
+capture_object(struct drm_i915_private *dev_priv,
+	       struct drm_i915_gem_object *obj)
+{
+	if (obj && i915_gem_object_has_pages(obj)) {
+		struct i915_vma fake = {
+			.node = { .start = U64_MAX, .size = obj->base.size },
+			.size = obj->base.size,
+			.pages = obj->mm.pages,
+			.obj = obj,
+		};
+
+		return i915_error_object_create(dev_priv, &fake);
+	} else {
+		return NULL;
+	}
+}
+
 static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				  struct i915_gpu_state *error)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	int i;
 
-	error->semaphore =
-		i915_error_object_create(dev_priv, dev_priv->semaphore);
-
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
 		struct intel_engine_cs *engine = dev_priv->engine[i];
 		struct drm_i915_error_engine *ee = &error->engine[i];
@@ -1521,6 +1506,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 
 		ee->wa_ctx =
 			i915_error_object_create(dev_priv, engine->wa_ctx.vma);
+
+		ee->default_state =
+			capture_object(dev_priv, engine->default_state);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4fb183a..7cac07d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3068,7 +3068,7 @@ static void vlv_display_irq_reset(struct drm_i915_private *dev_priv)
 	i9xx_pipestat_irq_reset(dev_priv);
 
 	GEN3_IRQ_RESET(VLV_);
-	dev_priv->irq_mask = ~0;
+	dev_priv->irq_mask = ~0u;
 }
 
 static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
@@ -3093,7 +3093,7 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
 			I915_LPE_PIPE_C_INTERRUPT;
 
-	WARN_ON(dev_priv->irq_mask != ~0);
+	WARN_ON(dev_priv->irq_mask != ~0u);
 
 	dev_priv->irq_mask = ~enable_mask;
 
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index b4faeb6..7bc5386 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -46,17 +46,6 @@ i915_param_named_unsafe(panel_ignore_lid, int, 0600,
 	"Override lid status (0=autodetect, 1=autodetect disabled [default], "
 	"-1=force lid closed, -2=force lid open)");
 
-i915_param_named_unsafe(semaphores, int, 0400,
-	"Use semaphores for inter-ring sync "
-	"(default: -1 (use per-chip defaults))");
-
-i915_param_named_unsafe(enable_rc6, int, 0400,
-	"Enable power-saving render C-state 6. "
-	"Different stages can be selected via bitmask values "
-	"(0 = disable; 1 = enable rc6; 2 = enable deep rc6; 4 = enable deepest rc6). "
-	"For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. "
-	"default: -1 (use per-chip default)");
-
 i915_param_named_unsafe(enable_dc, int, 0400,
 	"Enable power-saving display C-states. "
 	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)");
@@ -99,10 +88,6 @@ i915_param_named_unsafe(enable_ppgtt, int, 0400,
 	"Override PPGTT usage. "
 	"(-1=auto [default], 0=disabled, 1=aliasing, 2=full, 3=full with extended address space)");
 
-i915_param_named_unsafe(enable_execlists, int, 0400,
-	"Override execlists usage. "
-	"(-1=auto [default], 0=disabled, 1=enabled)");
-
 i915_param_named_unsafe(enable_psr, int, 0600,
 	"Enable PSR "
 	"(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c729226..c48c88b 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -31,15 +31,12 @@
 	param(char *, vbt_firmware, NULL) \
 	param(int, modeset, -1) \
 	param(int, panel_ignore_lid, 1) \
-	param(int, semaphores, -1) \
 	param(int, lvds_channel_mode, 0) \
 	param(int, panel_use_ssc, -1) \
 	param(int, vbt_sdvo_panel_type, -1) \
-	param(int, enable_rc6, -1) \
 	param(int, enable_dc, -1) \
 	param(int, enable_fbc, -1) \
 	param(int, enable_ppgtt, -1) \
-	param(int, enable_execlists, -1) \
 	param(int, enable_psr, -1) \
 	param(int, disable_power_well, -1) \
 	param(int, enable_ips, 1) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 6458c30..fa67d3d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -209,6 +209,8 @@ static const struct intel_device_info intel_gm45_info __initconst = {
 	.has_hotplug = 1, \
 	.ring_mask = RENDER_RING | BSD_RING, \
 	.has_snoop = true, \
+	/* ilk does support rc6, but we do not implement [power] contexts */ \
+	.has_rc6 = 0, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 00be015..f0cfdec 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1216,9 +1216,9 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 
-	if (i915_modparams.enable_execlists)
+	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 		dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
-	else {
+	} else {
 		struct intel_engine_cs *engine = dev_priv->engine[RCS];
 		struct intel_ring *ring;
 		int ret;
@@ -1262,7 +1262,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 
-	if (i915_modparams.enable_execlists) {
+	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
 	} else {
 		struct intel_engine_cs *engine = dev_priv->engine[RCS];
@@ -1726,10 +1726,9 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
 							 GFP_KERNEL);
 	}
 
-	ret = i915_switch_context(req);
 	i915_add_request(req);
 
-	return ret;
+	return 0;
 }
 
 /*
@@ -2691,8 +2690,8 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 
 static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
 {
-	return div_u64(1000000000ULL * (2ULL << exponent),
-		       dev_priv->perf.oa.timestamp_frequency);
+	return div64_u64(1000000000ULL * (2ULL << exponent),
+			 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz);
 }
 
 /**
@@ -3007,7 +3006,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
-		if (flex_eu_regs[i].reg == addr)
+		if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
 			return true;
 	}
 	return false;
@@ -3015,38 +3014,47 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
 
 static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr)
 {
-	return (addr >= OASTARTTRIG1.reg && addr <= OASTARTTRIG8.reg) ||
-		(addr >= OAREPORTTRIG1.reg && addr <= OAREPORTTRIG8.reg) ||
-		(addr >= OACEC0_0.reg && addr <= OACEC7_1.reg);
+	return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
+		addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
+		(addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
+		 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
+		(addr >= i915_mmio_reg_offset(OACEC0_0) &&
+		 addr <= i915_mmio_reg_offset(OACEC7_1));
 }
 
 static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
 {
-	return addr == HALF_SLICE_CHICKEN2.reg ||
-		(addr >= MICRO_BP0_0.reg && addr <= NOA_WRITE.reg) ||
-		(addr >= OA_PERFCNT1_LO.reg && addr <= OA_PERFCNT2_HI.reg) ||
-		(addr >= OA_PERFMATRIX_LO.reg && addr <= OA_PERFMATRIX_HI.reg);
+	return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
+		(addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
+		 addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
+		(addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
+		 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
+		(addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
+		 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
 }
 
 static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
 {
 	return gen7_is_valid_mux_addr(dev_priv, addr) ||
-		addr == WAIT_FOR_RC6_EXIT.reg ||
-		(addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg);
+		addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
+		(addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
+		 addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
 }
 
 static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
 {
 	return gen8_is_valid_mux_addr(dev_priv, addr) ||
-		(addr >= OA_PERFCNT3_LO.reg && addr <= OA_PERFCNT4_HI.reg);
+		(addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
+		 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
 }
 
 static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
 {
 	return gen7_is_valid_mux_addr(dev_priv, addr) ||
 		(addr >= 0x25100 && addr <= 0x2FF90) ||
-		(addr >= HSW_MBVID2_NOA0.reg && addr <= HSW_MBVID2_NOA9.reg) ||
-		addr == HSW_MBVID2_MISR0.reg;
+		(addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
+		 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
+		addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
 }
 
 static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
@@ -3061,14 +3069,14 @@ static uint32_t mask_reg_value(u32 reg, u32 val)
 	 * WaDisableSTUnitPowerOptimization workaround. Make sure the value
 	 * programmed by userspace doesn't change this.
 	 */
-	if (HALF_SLICE_CHICKEN2.reg == reg)
+	if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
 		val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
 
 	/* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
 	 * indicated by its name and a bunch of selection fields used by OA
 	 * configs.
 	 */
-	if (WAIT_FOR_RC6_EXIT.reg == reg)
+	if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
 		val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
 
 	return val;
@@ -3415,8 +3423,6 @@ static struct ctl_table dev_root[] = {
  */
 void i915_perf_init(struct drm_i915_private *dev_priv)
 {
-	dev_priv->perf.oa.timestamp_frequency = 0;
-
 	if (IS_HASWELL(dev_priv)) {
 		dev_priv->perf.oa.ops.is_valid_b_counter_reg =
 			gen7_is_valid_b_counter_addr;
@@ -3432,10 +3438,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 		dev_priv->perf.oa.ops.oa_hw_tail_read =
 			gen7_oa_hw_tail_read;
 
-		dev_priv->perf.oa.timestamp_frequency = 12500000;
-
 		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
-	} else if (i915_modparams.enable_execlists) {
+	} else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
 		/* Note: that although we could theoretically also support the
 		 * legacy ringbuffer mode on BDW (and earlier iterations of
 		 * this driver, before upstreaming did this) it didn't seem
@@ -3477,23 +3481,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 
 				dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
 			}
-
-			switch (dev_priv->info.platform) {
-			case INTEL_BROADWELL:
-				dev_priv->perf.oa.timestamp_frequency = 12500000;
-				break;
-			case INTEL_BROXTON:
-			case INTEL_GEMINILAKE:
-				dev_priv->perf.oa.timestamp_frequency = 19200000;
-				break;
-			case INTEL_SKYLAKE:
-			case INTEL_KABYLAKE:
-			case INTEL_COFFEELAKE:
-				dev_priv->perf.oa.timestamp_frequency = 12000000;
-				break;
-			default:
-				break;
-			}
 		} else if (IS_GEN10(dev_priv)) {
 			dev_priv->perf.oa.ops.is_valid_b_counter_reg =
 				gen7_is_valid_b_counter_addr;
@@ -3509,15 +3496,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
 
 			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
-
-			/* Default frequency, although we need to read it from
-			 * the register as it might vary between parts.
-			 */
-			dev_priv->perf.oa.timestamp_frequency = 12000000;
 		}
 	}
 
-	if (dev_priv->perf.oa.timestamp_frequency) {
+	if (dev_priv->perf.oa.ops.enable_metric_set) {
 		hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
 				CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
@@ -3527,8 +3509,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 		mutex_init(&dev_priv->perf.lock);
 		spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
-		oa_sample_rate_hard_limit =
-			dev_priv->perf.oa.timestamp_frequency / 2;
+		oa_sample_rate_hard_limit = 1000 *
+			(INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
 		dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
 
 		mutex_init(&dev_priv->perf.metrics_lock);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
new file mode 100644
index 0000000..55a8a1e
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -0,0 +1,865 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/perf_event.h>
+#include <linux/pm_runtime.h>
+
+#include "i915_drv.h"
+#include "i915_pmu.h"
+#include "intel_ringbuffer.h"
+
+/* Frequency for the sampling timer for events which need it. */
+#define FREQUENCY 200
+#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
+
+#define ENGINE_SAMPLE_MASK \
+	(BIT(I915_SAMPLE_BUSY) | \
+	 BIT(I915_SAMPLE_WAIT) | \
+	 BIT(I915_SAMPLE_SEMA))
+
+#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
+
+static cpumask_t i915_pmu_cpumask;
+
+static u8 engine_config_sample(u64 config)
+{
+	return config & I915_PMU_SAMPLE_MASK;
+}
+
+static u8 engine_event_sample(struct perf_event *event)
+{
+	return engine_config_sample(event->attr.config);
+}
+
+static u8 engine_event_class(struct perf_event *event)
+{
+	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
+}
+
+static u8 engine_event_instance(struct perf_event *event)
+{
+	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
+}
+
+static bool is_engine_config(u64 config)
+{
+	return config < __I915_PMU_OTHER(0);
+}
+
+static unsigned int config_enabled_bit(u64 config)
+{
+	if (is_engine_config(config))
+		return engine_config_sample(config);
+	else
+		return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
+}
+
+static u64 config_enabled_mask(u64 config)
+{
+	return BIT_ULL(config_enabled_bit(config));
+}
+
+static bool is_engine_event(struct perf_event *event)
+{
+	return is_engine_config(event->attr.config);
+}
+
+static unsigned int event_enabled_bit(struct perf_event *event)
+{
+	return config_enabled_bit(event->attr.config);
+}
+
+static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
+{
+	u64 enable;
+
+	/*
+	 * Only some counters need the sampling timer.
+	 *
+	 * We start with a bitmask of all currently enabled events.
+	 */
+	enable = i915->pmu.enable;
+
+	/*
+	 * Mask out all the ones which do not need the timer, or in
+	 * other words keep all the ones that could need the timer.
+	 */
+	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
+		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
+		  ENGINE_SAMPLE_MASK;
+
+	/*
+	 * When the GPU is idle per-engine counters do not need to be
+	 * running so clear those bits out.
+	 */
+	if (!gpu_active)
+		enable &= ~ENGINE_SAMPLE_MASK;
+	/*
+	 * Also there is software busyness tracking available we do not
+	 * need the timer for I915_SAMPLE_BUSY counter.
+	 *
+	 * Use RCS as proxy for all engines.
+	 */
+	else if (intel_engine_supports_stats(i915->engine[RCS]))
+		enable &= ~BIT(I915_SAMPLE_BUSY);
+
+	/*
+	 * If some bits remain it means we need the sampling timer running.
+	 */
+	return enable;
+}
+
+void i915_pmu_gt_parked(struct drm_i915_private *i915)
+{
+	if (!i915->pmu.base.event_init)
+		return;
+
+	spin_lock_irq(&i915->pmu.lock);
+	/*
+	 * Signal sampling timer to stop if only engine events are enabled and
+	 * GPU went idle.
+	 */
+	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
+	spin_unlock_irq(&i915->pmu.lock);
+}
+
+static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
+{
+	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
+		i915->pmu.timer_enabled = true;
+		hrtimer_start_range_ns(&i915->pmu.timer,
+				       ns_to_ktime(PERIOD), 0,
+				       HRTIMER_MODE_REL_PINNED);
+	}
+}
+
+void i915_pmu_gt_unparked(struct drm_i915_private *i915)
+{
+	if (!i915->pmu.base.event_init)
+		return;
+
+	spin_lock_irq(&i915->pmu.lock);
+	/*
+	 * Re-enable sampling timer when GPU goes active.
+	 */
+	__i915_pmu_maybe_start_timer(i915);
+	spin_unlock_irq(&i915->pmu.lock);
+}
+
+static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
+{
+	if (!fw)
+		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+
+	return true;
+}
+
+static void
+update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
+{
+	sample->cur += mul_u32_u32(val, unit);
+}
+
+static void engines_sample(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	bool fw = false;
+
+	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
+		return;
+
+	if (!dev_priv->gt.awake)
+		return;
+
+	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+		return;
+
+	for_each_engine(engine, dev_priv, id) {
+		u32 current_seqno = intel_engine_get_seqno(engine);
+		u32 last_seqno = intel_engine_last_submit(engine);
+		u32 val;
+
+		val = !i915_seqno_passed(current_seqno, last_seqno);
+
+		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
+			      PERIOD, val);
+
+		if (val && (engine->pmu.enable &
+		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
+			fw = grab_forcewake(dev_priv, fw);
+
+			val = I915_READ_FW(RING_CTL(engine->mmio_base));
+		} else {
+			val = 0;
+		}
+
+		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
+			      PERIOD, !!(val & RING_WAIT));
+
+		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
+			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+	}
+
+	if (fw)
+		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+
+	intel_runtime_pm_put(dev_priv);
+}
+
+static void frequency_sample(struct drm_i915_private *dev_priv)
+{
+	if (dev_priv->pmu.enable &
+	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
+		u32 val;
+
+		val = dev_priv->gt_pm.rps.cur_freq;
+		if (dev_priv->gt.awake &&
+		    intel_runtime_pm_get_if_in_use(dev_priv)) {
+			val = intel_get_cagf(dev_priv,
+					     I915_READ_NOTRACE(GEN6_RPSTAT1));
+			intel_runtime_pm_put(dev_priv);
+		}
+
+		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
+			      1, intel_gpu_freq(dev_priv, val));
+	}
+
+	if (dev_priv->pmu.enable &
+	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
+		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
+			      intel_gpu_freq(dev_priv,
+					     dev_priv->gt_pm.rps.cur_freq));
+	}
+}
+
+static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
+{
+	struct drm_i915_private *i915 =
+		container_of(hrtimer, struct drm_i915_private, pmu.timer);
+
+	if (!READ_ONCE(i915->pmu.timer_enabled))
+		return HRTIMER_NORESTART;
+
+	engines_sample(i915);
+	frequency_sample(i915);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
+	return HRTIMER_RESTART;
+}
+
+static u64 count_interrupts(struct drm_i915_private *i915)
+{
+	/* open-coded kstat_irqs() */
+	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
+	u64 sum = 0;
+	int cpu;
+
+	if (!desc || !desc->kstat_irqs)
+		return 0;
+
+	for_each_possible_cpu(cpu)
+		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
+
+	return sum;
+}
+
+static void i915_pmu_event_destroy(struct perf_event *event)
+{
+	WARN_ON(event->parent);
+}
+
+static int engine_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+
+	if (!intel_engine_lookup_user(i915, engine_event_class(event),
+				      engine_event_instance(event)))
+		return -ENODEV;
+
+	switch (engine_event_sample(event)) {
+	case I915_SAMPLE_BUSY:
+	case I915_SAMPLE_WAIT:
+		break;
+	case I915_SAMPLE_SEMA:
+		if (INTEL_GEN(i915) < 6)
+			return -ENODEV;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int i915_pmu_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	int ret;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* unsupported modes and filters */
+	if (event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/* only allow running on one cpu at a time */
+	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
+		return -EINVAL;
+
+	if (is_engine_event(event)) {
+		ret = engine_event_init(event);
+	} else {
+		ret = 0;
+		switch (event->attr.config) {
+		case I915_PMU_ACTUAL_FREQUENCY:
+			if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+				 /* Requires a mutex for sampling! */
+				ret = -ENODEV;
+		case I915_PMU_REQUESTED_FREQUENCY:
+			if (INTEL_GEN(i915) < 6)
+				ret = -ENODEV;
+			break;
+		case I915_PMU_INTERRUPTS:
+			break;
+		case I915_PMU_RC6_RESIDENCY:
+			if (!HAS_RC6(i915))
+				ret = -ENODEV;
+			break;
+		default:
+			ret = -ENOENT;
+			break;
+		}
+	}
+	if (ret)
+		return ret;
+
+	if (!event->parent)
+		event->destroy = i915_pmu_event_destroy;
+
+	return 0;
+}
+
+static u64 __i915_pmu_event_read(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	u64 val = 0;
+
+	if (is_engine_event(event)) {
+		u8 sample = engine_event_sample(event);
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+
+		if (WARN_ON_ONCE(!engine)) {
+			/* Do nothing */
+		} else if (sample == I915_SAMPLE_BUSY &&
+			   engine->pmu.busy_stats) {
+			val = ktime_to_ns(intel_engine_get_busy_time(engine));
+		} else {
+			val = engine->pmu.sample[sample].cur;
+		}
+	} else {
+		switch (event->attr.config) {
+		case I915_PMU_ACTUAL_FREQUENCY:
+			val =
+			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
+				   FREQUENCY);
+			break;
+		case I915_PMU_REQUESTED_FREQUENCY:
+			val =
+			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
+				   FREQUENCY);
+			break;
+		case I915_PMU_INTERRUPTS:
+			val = count_interrupts(i915);
+			break;
+		case I915_PMU_RC6_RESIDENCY:
+			intel_runtime_pm_get(i915);
+			val = intel_rc6_residency_ns(i915,
+						     IS_VALLEYVIEW(i915) ?
+						     VLV_GT_RENDER_RC6 :
+						     GEN6_GT_GFX_RC6);
+			if (HAS_RC6p(i915))
+				val += intel_rc6_residency_ns(i915,
+							      GEN6_GT_GFX_RC6p);
+			if (HAS_RC6pp(i915))
+				val += intel_rc6_residency_ns(i915,
+							      GEN6_GT_GFX_RC6pp);
+			intel_runtime_pm_put(i915);
+			break;
+		}
+	}
+
+	return val;
+}
+
+static void i915_pmu_event_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev, new;
+
+again:
+	prev = local64_read(&hwc->prev_count);
+	new = __i915_pmu_event_read(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
+		goto again;
+
+	local64_add(new - prev, &event->count);
+}
+
+static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
+{
+	return intel_engine_supports_stats(engine) &&
+	       (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
+}
+
+static void i915_pmu_enable(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	unsigned int bit = event_enabled_bit(event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&i915->pmu.lock, flags);
+
+	/*
+	 * Update the bitmask of enabled events and increment
+	 * the event reference counter.
+	 */
+	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
+	GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
+	i915->pmu.enable |= BIT_ULL(bit);
+	i915->pmu.enable_count[bit]++;
+
+	/*
+	 * Start the sampling timer if needed and not already enabled.
+	 */
+	__i915_pmu_maybe_start_timer(i915);
+
+	/*
+	 * For per-engine events the bitmask and reference counting
+	 * is stored per engine.
+	 */
+	if (is_engine_event(event)) {
+		u8 sample = engine_event_sample(event);
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+		GEM_BUG_ON(!engine);
+		engine->pmu.enable |= BIT(sample);
+
+		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
+		if (engine->pmu.enable_count[sample]++ == 0) {
+			/*
+			 * Enable engine busy stats tracking if needed or
+			 * alternatively cancel the scheduled disable.
+			 *
+			 * If the delayed disable was pending, cancel it and
+			 * in this case do not enable since it already is.
+			 */
+			if (engine_needs_busy_stats(engine) &&
+			    !engine->pmu.busy_stats) {
+				engine->pmu.busy_stats = true;
+				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+					intel_enable_engine_stats(engine);
+			}
+		}
+	}
+
+	/*
+	 * Store the current counter value so we can report the correct delta
+	 * for all listeners. Even when the event was already enabled and has
+	 * an existing non-zero value.
+	 */
+	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
+
+	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+	intel_disable_engine_stats(engine);
+}
+
+static void i915_pmu_disable(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	unsigned int bit = event_enabled_bit(event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&i915->pmu.lock, flags);
+
+	if (is_engine_event(event)) {
+		u8 sample = engine_event_sample(event);
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(i915,
+						  engine_event_class(event),
+						  engine_event_instance(event));
+		GEM_BUG_ON(!engine);
+		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
+		/*
+		 * Decrement the reference count and clear the enabled
+		 * bitmask when the last listener on an event goes away.
+		 */
+		if (--engine->pmu.enable_count[sample] == 0) {
+			engine->pmu.enable &= ~BIT(sample);
+			if (!engine_needs_busy_stats(engine) &&
+			    engine->pmu.busy_stats) {
+				engine->pmu.busy_stats = false;
+				/*
+				 * We request a delayed disable to handle the
+				 * rapid on/off cycles on events, which can
+				 * happen when tools like perf stat start, in a
+				 * nicer way.
+				 *
+				 * In addition, this also helps with busy stats
+				 * accuracy with background CPU offline/online
+				 * migration events.
+				 */
+				queue_delayed_work(system_wq,
+						   &engine->pmu.disable_busy_stats,
+						   round_jiffies_up_relative(HZ));
+			}
+		}
+	}
+
+	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
+	GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
+	/*
+	 * Decrement the reference count and clear the enabled
+	 * bitmask when the last listener on an event goes away.
+	 */
+	if (--i915->pmu.enable_count[bit] == 0) {
+		i915->pmu.enable &= ~BIT_ULL(bit);
+		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
+	}
+
+	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+}
+
+static void i915_pmu_event_start(struct perf_event *event, int flags)
+{
+	i915_pmu_enable(event);
+	event->hw.state = 0;
+}
+
+static void i915_pmu_event_stop(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_UPDATE)
+		i915_pmu_event_read(event);
+	i915_pmu_disable(event);
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static int i915_pmu_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		i915_pmu_event_start(event, flags);
+
+	return 0;
+}
+
+static void i915_pmu_event_del(struct perf_event *event, int flags)
+{
+	i915_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int i915_pmu_event_event_idx(struct perf_event *event)
+{
+	return 0;
+}
+
+struct i915_str_attribute {
+	struct device_attribute attr;
+	const char *str;
+};
+
+static ssize_t i915_pmu_format_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct i915_str_attribute *eattr;
+
+	eattr = container_of(attr, struct i915_str_attribute, attr);
+	return sprintf(buf, "%s\n", eattr->str);
+}
+
+#define I915_PMU_FORMAT_ATTR(_name, _config) \
+	(&((struct i915_str_attribute[]) { \
+		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
+		  .str = _config, } \
+	})[0].attr.attr)
+
+static struct attribute *i915_pmu_format_attrs[] = {
+	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
+	NULL,
+};
+
+static const struct attribute_group i915_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = i915_pmu_format_attrs,
+};
+
+struct i915_ext_attribute {
+	struct device_attribute attr;
+	unsigned long val;
+};
+
+static ssize_t i915_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct i915_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct i915_ext_attribute, attr);
+	return sprintf(buf, "config=0x%lx\n", eattr->val);
+}
+
+#define I915_EVENT_ATTR(_name, _config) \
+	(&((struct i915_ext_attribute[]) { \
+		{ .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \
+		  .val = _config, } \
+	})[0].attr.attr)
+
+#define I915_EVENT_STR(_name, _str) \
+	(&((struct perf_pmu_events_attr[]) { \
+		{ .attr	     = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
+		  .id	     = 0, \
+		  .event_str = _str, } \
+	})[0].attr.attr)
+
+#define I915_EVENT(_name, _config, _unit) \
+	I915_EVENT_ATTR(_name, _config), \
+	I915_EVENT_STR(_name.unit, _unit)
+
+#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \
+	I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \
+	I915_EVENT_STR(_name.unit, "ns")
+
+#define I915_ENGINE_EVENTS(_name, _class, _instance) \
+	I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
+	I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
+	I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT)
+
+static struct attribute *i915_pmu_events_attrs[] = {
+	I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0),
+	I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0),
+	I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0),
+	I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1),
+	I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+
+	I915_EVENT(actual-frequency,    I915_PMU_ACTUAL_FREQUENCY,    "MHz"),
+	I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"),
+
+	I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS),
+
+	I915_EVENT(rc6-residency,   I915_PMU_RC6_RESIDENCY,   "ns"),
+
+	NULL,
+};
+
+static const struct attribute_group i915_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = i915_pmu_events_attrs,
+};
+
+static ssize_t
+i915_pmu_get_attr_cpumask(struct device *dev,
+			  struct device_attribute *attr,
+			  char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
+}
+
+static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
+
+static struct attribute *i915_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group i915_pmu_cpumask_attr_group = {
+	.attrs = i915_cpumask_attrs,
+};
+
+static const struct attribute_group *i915_pmu_attr_groups[] = {
+	&i915_pmu_format_attr_group,
+	&i915_pmu_events_attr_group,
+	&i915_pmu_cpumask_attr_group,
+	NULL
+};
+
+static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+
+	GEM_BUG_ON(!pmu->base.event_init);
+
+	/* Select the first online CPU as a designated reader. */
+	if (!cpumask_weight(&i915_pmu_cpumask))
+		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
+
+	return 0;
+}
+
+static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+	unsigned int target;
+
+	GEM_BUG_ON(!pmu->base.event_init);
+
+	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
+		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+		/* Migrate events if there is a valid target */
+		if (target < nr_cpu_ids) {
+			cpumask_set_cpu(target, &i915_pmu_cpumask);
+			perf_pmu_migrate_context(&pmu->base, cpu, target);
+		}
+	}
+
+	return 0;
+}
+
+static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
+
+static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
+{
+	enum cpuhp_state slot;
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/x86/intel/i915:online",
+				      i915_pmu_cpu_online,
+				      i915_pmu_cpu_offline);
+	if (ret < 0)
+		return ret;
+
+	slot = ret;
+	ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
+	if (ret) {
+		cpuhp_remove_multi_state(slot);
+		return ret;
+	}
+
+	cpuhp_slot = slot;
+	return 0;
+}
+
+static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
+{
+	WARN_ON(cpuhp_slot == CPUHP_INVALID);
+	WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
+	cpuhp_remove_multi_state(cpuhp_slot);
+}
+
+void i915_pmu_register(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int ret;
+
+	if (INTEL_GEN(i915) <= 2) {
+		DRM_INFO("PMU not supported for this GPU.");
+		return;
+	}
+
+	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
+	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
+	i915->pmu.base.event_init	= i915_pmu_event_init;
+	i915->pmu.base.add		= i915_pmu_event_add;
+	i915->pmu.base.del		= i915_pmu_event_del;
+	i915->pmu.base.start		= i915_pmu_event_start;
+	i915->pmu.base.stop		= i915_pmu_event_stop;
+	i915->pmu.base.read		= i915_pmu_event_read;
+	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
+
+	spin_lock_init(&i915->pmu.lock);
+	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	i915->pmu.timer.function = i915_sample;
+
+	for_each_engine(engine, i915, id)
+		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+				  __disable_busy_stats);
+
+	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
+	if (ret)
+		goto err;
+
+	ret = i915_pmu_register_cpuhp_state(i915);
+	if (ret)
+		goto err_unreg;
+
+	return;
+
+err_unreg:
+	perf_pmu_unregister(&i915->pmu.base);
+err:
+	i915->pmu.base.event_init = NULL;
+	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
+}
+
+void i915_pmu_unregister(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (!i915->pmu.base.event_init)
+		return;
+
+	WARN_ON(i915->pmu.enable);
+
+	hrtimer_cancel(&i915->pmu.timer);
+
+	for_each_engine(engine, i915, id) {
+		GEM_BUG_ON(engine->pmu.busy_stats);
+		flush_delayed_work(&engine->pmu.disable_busy_stats);
+	}
+
+	i915_pmu_unregister_cpuhp_state(i915);
+
+	perf_pmu_unregister(&i915->pmu.base);
+	i915->pmu.base.event_init = NULL;
+}
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
new file mode 100644
index 0000000..40c154d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#ifndef __I915_PMU_H__
+#define __I915_PMU_H__
+
+enum {
+	__I915_SAMPLE_FREQ_ACT = 0,
+	__I915_SAMPLE_FREQ_REQ,
+	__I915_NUM_PMU_SAMPLERS
+};
+
+/**
+ * How many different events we track in the global PMU mask.
+ *
+ * It is also used to know to needed number of event reference counters.
+ */
+#define I915_PMU_MASK_BITS \
+	((1 << I915_PMU_SAMPLE_BITS) + \
+	 (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0)))
+
+struct i915_pmu_sample {
+	u64 cur;
+};
+
+struct i915_pmu {
+	/**
+	 * @node: List node for CPU hotplug handling.
+	 */
+	struct hlist_node node;
+	/**
+	 * @base: PMU base.
+	 */
+	struct pmu base;
+	/**
+	 * @lock: Lock protecting enable mask and ref count handling.
+	 */
+	spinlock_t lock;
+	/**
+	 * @timer: Timer for internal i915 PMU sampling.
+	 */
+	struct hrtimer timer;
+	/**
+	 * @enable: Bitmask of all currently enabled events.
+	 *
+	 * Bits are derived from uAPI event numbers in a way that low 16 bits
+	 * correspond to engine event _sample_ _type_ (I915_SAMPLE_QUEUED is
+	 * bit 0), and higher bits correspond to other events (for instance
+	 * I915_PMU_ACTUAL_FREQUENCY is bit 16 etc).
+	 *
+	 * In other words, low 16 bits are not per engine but per engine
+	 * sampler type, while the upper bits are directly mapped to other
+	 * event types.
+	 */
+	u64 enable;
+	/**
+	 * @enable_count: Reference counts for the enabled events.
+	 *
+	 * Array indices are mapped in the same way as bits in the @enable field
+	 * and they are used to control sampling on/off when multiple clients
+	 * are using the PMU API.
+	 */
+	unsigned int enable_count[I915_PMU_MASK_BITS];
+	/**
+	 * @timer_enabled: Should the internal sampling timer be running.
+	 */
+	bool timer_enabled;
+	/**
+	 * @sample: Current and previous (raw) counters for sampling events.
+	 *
+	 * These counters are updated from the i915 PMU sampling timer.
+	 *
+	 * Only global counters are held here, while the per-engine ones are in
+	 * struct intel_engine_cs.
+	 */
+	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
+};
+
+#ifdef CONFIG_PERF_EVENTS
+void i915_pmu_register(struct drm_i915_private *i915);
+void i915_pmu_unregister(struct drm_i915_private *i915);
+void i915_pmu_gt_parked(struct drm_i915_private *i915);
+void i915_pmu_gt_unparked(struct drm_i915_private *i915);
+#else
+static inline void i915_pmu_register(struct drm_i915_private *i915) {}
+static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
+static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
+static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 96c80fa..09bf043 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -186,6 +186,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define VIDEO_ENHANCEMENT_CLASS	2
 #define COPY_ENGINE_CLASS	3
 #define OTHER_CLASS		4
+#define MAX_ENGINE_CLASS	4
+
+#define MAX_ENGINE_INSTANCE    1
 
 /* PCI config space */
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 791759f..c74a20b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -42,14 +42,30 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 static u32 calc_residency(struct drm_i915_private *dev_priv,
 			  i915_reg_t reg)
 {
-	return DIV_ROUND_CLOSEST_ULL(intel_rc6_residency_us(dev_priv, reg),
-				     1000);
+	u64 res;
+
+	intel_runtime_pm_get(dev_priv);
+	res = intel_rc6_residency_us(dev_priv, reg);
+	intel_runtime_pm_put(dev_priv);
+
+	return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
 
 static ssize_t
 show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%x\n", intel_rc6_enabled());
+	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
+	unsigned int mask;
+
+	mask = 0;
+	if (HAS_RC6(dev_priv))
+		mask |= BIT(0);
+	if (HAS_RC6p(dev_priv))
+		mask |= BIT(1);
+	if (HAS_RC6pp(dev_priv))
+		mask |= BIT(2);
+
+	return snprintf(buf, PAGE_SIZE, "%x\n", mask);
 }
 
 static ssize_t
@@ -252,14 +268,9 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 		freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 		ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff);
 	} else {
-		u32 rpstat = I915_READ(GEN6_RPSTAT1);
-		if (INTEL_GEN(dev_priv) >= 9)
-			ret = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
-		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-			ret = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
-		else
-			ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
-		ret = intel_gpu_freq(dev_priv, ret);
+		ret = intel_gpu_freq(dev_priv,
+				     intel_get_cagf(dev_priv,
+						    I915_READ(GEN6_RPSTAT1)));
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index e8884c2..9c5ceb9 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1896,7 +1896,7 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate);
 
 	/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
-	if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
+	if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state))
 		min_cdclk = DIV_ROUND_UP(min_cdclk * 100, 95);
 
 	/* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz,
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 77d8b3d..07e4f7b 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -40,9 +40,9 @@
 #define I915_CSR_CNL "i915/cnl_dmc_ver1_06.bin"
 #define CNL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 6)
 
-#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
+#define I915_CSR_KBL "i915/kbl_dmc_ver1_04.bin"
 MODULE_FIRMWARE(I915_CSR_KBL);
-#define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
+#define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 4)
 
 #define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin"
 MODULE_FIRMWARE(I915_CSR_SKL);
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index eff3b51..369f780 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2098,6 +2098,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
 	if (IS_CANNONLAKE(dev_priv)) {
 		/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
 		val = I915_READ(DPCLKA_CFGCR0);
+		val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port);
 		val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->id, port);
 		I915_WRITE(DPCLKA_CFGCR0, val);
 
@@ -2513,17 +2514,17 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
 	udelay(600);
 }
 
-bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
-				 struct intel_crtc *intel_crtc)
+static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
+				       enum transcoder cpu_transcoder)
 {
-	u32 temp;
+	if (cpu_transcoder == TRANSCODER_EDP)
+		return false;
 
-	if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
-		temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
-		if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe))
-			return true;
-	}
-	return false;
+	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO))
+		return false;
+
+	return I915_READ(HSW_AUD_PIN_ELD_CP_VLD) &
+		AUDIO_OUTPUT_ENABLE(cpu_transcoder);
 }
 
 void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv,
@@ -2616,7 +2617,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 	}
 
 	pipe_config->has_audio =
-		intel_ddi_is_audio_enabled(dev_priv, intel_crtc);
+		intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder);
 
 	if (encoder->type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.bpp &&
 	    pipe_config->pipe_bpp > dev_priv->vbt.edp.bpp) {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2007c69..1f7e312 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -489,7 +489,7 @@ static const struct intel_limit intel_limits_bxt = {
 };
 
 static bool
-needs_modeset(struct drm_crtc_state *state)
+needs_modeset(const struct drm_crtc_state *state)
 {
 	return drm_atomic_crtc_needs_modeset(state);
 }
@@ -998,7 +998,8 @@ enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv,
 	return crtc->config->cpu_transcoder;
 }
 
-static bool pipe_dsl_stopped(struct drm_i915_private *dev_priv, enum pipe pipe)
+static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv,
+				    enum pipe pipe)
 {
 	i915_reg_t reg = PIPEDSL(pipe);
 	u32 line1, line2;
@@ -1013,32 +1014,38 @@ static bool pipe_dsl_stopped(struct drm_i915_private *dev_priv, enum pipe pipe)
 	msleep(5);
 	line2 = I915_READ(reg) & line_mask;
 
-	return line1 == line2;
+	return line1 != line2;
 }
 
-/*
- * intel_wait_for_pipe_off - wait for pipe to turn off
- * @crtc: crtc whose pipe to wait for
- *
- * After disabling a pipe, we can't wait for vblank in the usual way,
- * spinning on the vblank interrupt status bit, since we won't actually
- * see an interrupt when the pipe is disabled.
- *
- * On Gen4 and above:
- *   wait for the pipe register state bit to turn off
- *
- * Otherwise:
- *   wait for the display line value to settle (it usually
- *   ends up stopping at the start of the next frame).
- *
- */
-static void intel_wait_for_pipe_off(struct intel_crtc *crtc)
+static void wait_for_pipe_scanline_moving(struct intel_crtc *crtc, bool state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
 
+	/* Wait for the display line to settle/start moving */
+	if (wait_for(pipe_scanline_is_moving(dev_priv, pipe) == state, 100))
+		DRM_ERROR("pipe %c scanline %s wait timed out\n",
+			  pipe_name(pipe), onoff(state));
+}
+
+static void intel_wait_for_pipe_scanline_stopped(struct intel_crtc *crtc)
+{
+	wait_for_pipe_scanline_moving(crtc, false);
+}
+
+static void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc)
+{
+	wait_for_pipe_scanline_moving(crtc, true);
+}
+
+static void
+intel_wait_for_pipe_off(const struct intel_crtc_state *old_crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
 	if (INTEL_GEN(dev_priv) >= 4) {
+		enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
 		i915_reg_t reg = PIPECONF(cpu_transcoder);
 
 		/* Wait for the Pipe State to go off */
@@ -1047,9 +1054,7 @@ static void intel_wait_for_pipe_off(struct intel_crtc *crtc)
 					    100))
 			WARN(1, "pipe_off wait timed out\n");
 	} else {
-		/* Wait for the display line to settle */
-		if (wait_for(pipe_dsl_stopped(dev_priv, pipe), 100))
-			WARN(1, "pipe_off wait timed out\n");
+		intel_wait_for_pipe_scanline_stopped(crtc);
 	}
 }
 
@@ -1190,23 +1195,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe)
 	     pipe_name(pipe));
 }
 
-static void assert_cursor(struct drm_i915_private *dev_priv,
-			  enum pipe pipe, bool state)
-{
-	bool cur_state;
-
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-		cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
-	else
-		cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
-
-	I915_STATE_WARN(cur_state != state,
-	     "cursor on pipe %c assertion failure (expected %s, current %s)\n",
-			pipe_name(pipe), onoff(state), onoff(cur_state));
-}
-#define assert_cursor_enabled(d, p) assert_cursor(d, p, true)
-#define assert_cursor_disabled(d, p) assert_cursor(d, p, false)
-
 void assert_pipe(struct drm_i915_private *dev_priv,
 		 enum pipe pipe, bool state)
 {
@@ -1234,77 +1222,25 @@ void assert_pipe(struct drm_i915_private *dev_priv,
 			pipe_name(pipe), onoff(state), onoff(cur_state));
 }
 
-static void assert_plane(struct drm_i915_private *dev_priv,
-			 enum plane plane, bool state)
+static void assert_plane(struct intel_plane *plane, bool state)
 {
-	u32 val;
-	bool cur_state;
+	bool cur_state = plane->get_hw_state(plane);
 
-	val = I915_READ(DSPCNTR(plane));
-	cur_state = !!(val & DISPLAY_PLANE_ENABLE);
 	I915_STATE_WARN(cur_state != state,
-	     "plane %c assertion failure (expected %s, current %s)\n",
-			plane_name(plane), onoff(state), onoff(cur_state));
+			"%s assertion failure (expected %s, current %s)\n",
+			plane->base.name, onoff(state), onoff(cur_state));
 }
 
-#define assert_plane_enabled(d, p) assert_plane(d, p, true)
-#define assert_plane_disabled(d, p) assert_plane(d, p, false)
+#define assert_plane_enabled(p) assert_plane(p, true)
+#define assert_plane_disabled(p) assert_plane(p, false)
 
-static void assert_planes_disabled(struct drm_i915_private *dev_priv,
-				   enum pipe pipe)
+static void assert_planes_disabled(struct intel_crtc *crtc)
 {
-	int i;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_plane *plane;
 
-	/* Primary planes are fixed to pipes on gen4+ */
-	if (INTEL_GEN(dev_priv) >= 4) {
-		u32 val = I915_READ(DSPCNTR(pipe));
-		I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE,
-		     "plane %c assertion failure, should be disabled but not\n",
-		     plane_name(pipe));
-		return;
-	}
-
-	/* Need to check both planes against the pipe */
-	for_each_pipe(dev_priv, i) {
-		u32 val = I915_READ(DSPCNTR(i));
-		enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >>
-			DISPPLANE_SEL_PIPE_SHIFT;
-		I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe,
-		     "plane %c assertion failure, should be off on pipe %c but is still active\n",
-		     plane_name(i), pipe_name(pipe));
-	}
-}
-
-static void assert_sprites_disabled(struct drm_i915_private *dev_priv,
-				    enum pipe pipe)
-{
-	int sprite;
-
-	if (INTEL_GEN(dev_priv) >= 9) {
-		for_each_sprite(dev_priv, pipe, sprite) {
-			u32 val = I915_READ(PLANE_CTL(pipe, sprite));
-			I915_STATE_WARN(val & PLANE_CTL_ENABLE,
-			     "plane %d assertion failure, should be off on pipe %c but is still active\n",
-			     sprite, pipe_name(pipe));
-		}
-	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		for_each_sprite(dev_priv, pipe, sprite) {
-			u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite));
-			I915_STATE_WARN(val & SP_ENABLE,
-			     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
-			     sprite_name(pipe, sprite), pipe_name(pipe));
-		}
-	} else if (INTEL_GEN(dev_priv) >= 7) {
-		u32 val = I915_READ(SPRCTL(pipe));
-		I915_STATE_WARN(val & SPRITE_ENABLE,
-		     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
-		     plane_name(pipe), pipe_name(pipe));
-	} else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
-		u32 val = I915_READ(DVSCNTR(pipe));
-		I915_STATE_WARN(val & DVS_ENABLE,
-		     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
-		     plane_name(pipe), pipe_name(pipe));
-	}
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane)
+		assert_plane_disabled(plane);
 }
 
 static void assert_vblank_disabled(struct drm_crtc *crtc)
@@ -1878,27 +1814,18 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc)
 		return crtc->pipe;
 }
 
-/**
- * intel_enable_pipe - enable a pipe, asserting requirements
- * @crtc: crtc responsible for the pipe
- *
- * Enable @crtc's pipe, making sure that various hardware specific requirements
- * are met, if applicable, e.g. PLL enabled, LVDS pairs enabled, etc.
- */
-static void intel_enable_pipe(struct intel_crtc *crtc)
+static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state)
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
-	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
 	i915_reg_t reg;
 	u32 val;
 
 	DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe));
 
-	assert_planes_disabled(dev_priv, pipe);
-	assert_cursor_disabled(dev_priv, pipe);
-	assert_sprites_disabled(dev_priv, pipe);
+	assert_planes_disabled(crtc);
 
 	/*
 	 * A pipe without a PLL won't actually be able to drive bits from
@@ -1906,12 +1833,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
 	 * need the check.
 	 */
 	if (HAS_GMCH_DISPLAY(dev_priv)) {
-		if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI))
+		if (intel_crtc_has_type(new_crtc_state, INTEL_OUTPUT_DSI))
 			assert_dsi_pll_enabled(dev_priv);
 		else
 			assert_pll_enabled(dev_priv, pipe);
 	} else {
-		if (crtc->config->has_pch_encoder) {
+		if (new_crtc_state->has_pch_encoder) {
 			/* if driving the PCH, we need FDI enabled */
 			assert_fdi_rx_pll_enabled(dev_priv,
 						  intel_crtc_pch_transcoder(crtc));
@@ -1933,31 +1860,21 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
 	POSTING_READ(reg);
 
 	/*
-	 * Until the pipe starts DSL will read as 0, which would cause
-	 * an apparent vblank timestamp jump, which messes up also the
-	 * frame count when it's derived from the timestamps. So let's
-	 * wait for the pipe to start properly before we call
-	 * drm_crtc_vblank_on()
+	 * Until the pipe starts PIPEDSL reads will return a stale value,
+	 * which causes an apparent vblank timestamp jump when PIPEDSL
+	 * resets to its proper value. That also messes up the frame count
+	 * when it's derived from the timestamps. So let's wait for the
+	 * pipe to start properly before we call drm_crtc_vblank_on()
 	 */
-	if (dev->max_vblank_count == 0 &&
-	    wait_for(intel_get_crtc_scanline(crtc) != crtc->scanline_offset, 50))
-		DRM_ERROR("pipe %c didn't start\n", pipe_name(pipe));
+	if (dev_priv->drm.max_vblank_count == 0)
+		intel_wait_for_pipe_scanline_moving(crtc);
 }
 
-/**
- * intel_disable_pipe - disable a pipe, asserting requirements
- * @crtc: crtc whose pipes is to be disabled
- *
- * Disable the pipe of @crtc, making sure that various hardware
- * specific requirements are met, if applicable, e.g. plane
- * disabled, panel fitter off, etc.
- *
- * Will wait until the pipe has shut down before returning.
- */
-static void intel_disable_pipe(struct intel_crtc *crtc)
+static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state)
 {
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
+	enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 val;
@@ -1968,9 +1885,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc)
 	 * Make sure planes won't keep trying to pump pixels to us,
 	 * or we might hang the display.
 	 */
-	assert_planes_disabled(dev_priv, pipe);
-	assert_cursor_disabled(dev_priv, pipe);
-	assert_sprites_disabled(dev_priv, pipe);
+	assert_planes_disabled(crtc);
 
 	reg = PIPECONF(cpu_transcoder);
 	val = I915_READ(reg);
@@ -1981,7 +1896,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc)
 	 * Double wide has implications for planes
 	 * so best keep it disabled when not needed.
 	 */
-	if (crtc->config->double_wide)
+	if (old_crtc_state->double_wide)
 		val &= ~PIPECONF_DOUBLE_WIDE;
 
 	/* Don't disable pipe or pipe PLLs if needed */
@@ -1990,7 +1905,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc)
 
 	I915_WRITE(reg, val);
 	if ((val & PIPECONF_ENABLE) == 0)
-		intel_wait_for_pipe_off(crtc);
+		intel_wait_for_pipe_off(old_crtc_state);
 }
 
 static unsigned int intel_tile_size(const struct drm_i915_private *dev_priv)
@@ -2799,6 +2714,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state,
 		      crtc_state->active_planes);
 }
 
+static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
+					 struct intel_plane *plane)
+{
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct intel_plane_state *plane_state =
+		to_intel_plane_state(plane->base.state);
+
+	intel_set_plane_visible(crtc_state, plane_state, false);
+
+	if (plane->id == PLANE_PRIMARY)
+		intel_pre_disable_primary_noatomic(&crtc->base);
+
+	trace_intel_disable_plane(&plane->base, crtc);
+	plane->disable_plane(plane, crtc);
+}
+
 static void
 intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 			     struct intel_initial_plane_config *plane_config)
@@ -2856,12 +2788,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	 * simplest solution is to just disable the primary plane now and
 	 * pretend the BIOS never had it enabled.
 	 */
-	intel_set_plane_visible(to_intel_crtc_state(crtc_state),
-				to_intel_plane_state(plane_state),
-				false);
-	intel_pre_disable_primary_noatomic(&intel_crtc->base);
-	trace_intel_disable_plane(primary, intel_crtc);
-	intel_plane->disable_plane(intel_plane, intel_crtc);
+	intel_plane_disable_noatomic(intel_crtc, intel_plane);
 
 	return;
 
@@ -3281,16 +3208,16 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 	return 0;
 }
 
-static void i9xx_update_primary_plane(struct intel_plane *primary,
-				      const struct intel_crtc_state *crtc_state,
-				      const struct intel_plane_state *plane_state)
+static void i9xx_update_plane(struct intel_plane *plane,
+			      const struct intel_crtc_state *crtc_state,
+			      const struct intel_plane_state *plane_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	const struct drm_framebuffer *fb = plane_state->base.fb;
-	enum plane plane = primary->plane;
+	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
 	u32 linear_offset;
 	u32 dspcntr = plane_state->ctl;
-	i915_reg_t reg = DSPCNTR(plane);
+	i915_reg_t reg = DSPCNTR(i9xx_plane);
 	int x = plane_state->main.x;
 	int y = plane_state->main.y;
 	unsigned long irqflags;
@@ -3309,34 +3236,34 @@ static void i9xx_update_primary_plane(struct intel_plane *primary,
 		/* pipesrc and dspsize control the size that is scaled from,
 		 * which should always be the user's requested size.
 		 */
-		I915_WRITE_FW(DSPSIZE(plane),
+		I915_WRITE_FW(DSPSIZE(i9xx_plane),
 			      ((crtc_state->pipe_src_h - 1) << 16) |
 			      (crtc_state->pipe_src_w - 1));
-		I915_WRITE_FW(DSPPOS(plane), 0);
-	} else if (IS_CHERRYVIEW(dev_priv) && plane == PLANE_B) {
-		I915_WRITE_FW(PRIMSIZE(plane),
+		I915_WRITE_FW(DSPPOS(i9xx_plane), 0);
+	} else if (IS_CHERRYVIEW(dev_priv) && i9xx_plane == PLANE_B) {
+		I915_WRITE_FW(PRIMSIZE(i9xx_plane),
 			      ((crtc_state->pipe_src_h - 1) << 16) |
 			      (crtc_state->pipe_src_w - 1));
-		I915_WRITE_FW(PRIMPOS(plane), 0);
-		I915_WRITE_FW(PRIMCNSTALPHA(plane), 0);
+		I915_WRITE_FW(PRIMPOS(i9xx_plane), 0);
+		I915_WRITE_FW(PRIMCNSTALPHA(i9xx_plane), 0);
 	}
 
 	I915_WRITE_FW(reg, dspcntr);
 
-	I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]);
+	I915_WRITE_FW(DSPSTRIDE(i9xx_plane), fb->pitches[0]);
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-		I915_WRITE_FW(DSPSURF(plane),
+		I915_WRITE_FW(DSPSURF(i9xx_plane),
 			      intel_plane_ggtt_offset(plane_state) +
 			      dspaddr_offset);
-		I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
+		I915_WRITE_FW(DSPOFFSET(i9xx_plane), (y << 16) | x);
 	} else if (INTEL_GEN(dev_priv) >= 4) {
-		I915_WRITE_FW(DSPSURF(plane),
+		I915_WRITE_FW(DSPSURF(i9xx_plane),
 			      intel_plane_ggtt_offset(plane_state) +
 			      dspaddr_offset);
-		I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x);
-		I915_WRITE_FW(DSPLINOFF(plane), linear_offset);
+		I915_WRITE_FW(DSPTILEOFF(i9xx_plane), (y << 16) | x);
+		I915_WRITE_FW(DSPLINOFF(i9xx_plane), linear_offset);
 	} else {
-		I915_WRITE_FW(DSPADDR(plane),
+		I915_WRITE_FW(DSPADDR(i9xx_plane),
 			      intel_plane_ggtt_offset(plane_state) +
 			      dspaddr_offset);
 	}
@@ -3345,25 +3272,49 @@ static void i9xx_update_primary_plane(struct intel_plane *primary,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-static void i9xx_disable_primary_plane(struct intel_plane *primary,
-				       struct intel_crtc *crtc)
+static void i9xx_disable_plane(struct intel_plane *plane,
+			       struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
-	enum plane plane = primary->plane;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
-	I915_WRITE_FW(DSPCNTR(plane), 0);
-	if (INTEL_INFO(dev_priv)->gen >= 4)
-		I915_WRITE_FW(DSPSURF(plane), 0);
+	I915_WRITE_FW(DSPCNTR(i9xx_plane), 0);
+	if (INTEL_GEN(dev_priv) >= 4)
+		I915_WRITE_FW(DSPSURF(i9xx_plane), 0);
 	else
-		I915_WRITE_FW(DSPADDR(plane), 0);
-	POSTING_READ_FW(DSPCNTR(plane));
+		I915_WRITE_FW(DSPADDR(i9xx_plane), 0);
+	POSTING_READ_FW(DSPCNTR(i9xx_plane));
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool i9xx_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	/*
+	 * Not 100% correct for planes that can move between pipes,
+	 * but that's only the case for gen2-4 which don't have any
+	 * display power wells.
+	 */
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(DSPCNTR(i9xx_plane)) & DISPLAY_PLANE_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32
 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane)
 {
@@ -4870,7 +4821,7 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	if (!crtc->config->ips_enabled)
+	if (!crtc_state->ips_enabled)
 		return;
 
 	/*
@@ -4878,8 +4829,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
 	 * This function is called from post_plane_update, which is run after
 	 * a vblank wait.
 	 */
+	WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR)));
 
-	assert_plane_enabled(dev_priv, crtc->plane);
 	if (IS_BROADWELL(dev_priv)) {
 		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
@@ -4913,7 +4864,6 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
 	if (!crtc_state->ips_enabled)
 		return;
 
-	assert_plane_enabled(dev_priv, crtc->plane);
 	if (IS_BROADWELL(dev_priv)) {
 		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
@@ -4967,14 +4917,6 @@ intel_post_enable_primary(struct drm_crtc *crtc,
 	int pipe = intel_crtc->pipe;
 
 	/*
-	 * FIXME IPS should be fine as long as one plane is
-	 * enabled, but in practice it seems to have problems
-	 * when going from primary only to sprite only and vice
-	 * versa.
-	 */
-	hsw_enable_ips(new_crtc_state);
-
-	/*
 	 * Gen2 reports pipe underruns whenever all planes are disabled.
 	 * So don't enable underrun reporting before at least some planes
 	 * are enabled.
@@ -4989,34 +4931,6 @@ intel_post_enable_primary(struct drm_crtc *crtc,
 	intel_check_pch_fifo_underruns(dev_priv);
 }
 
-/* FIXME move all this to pre_plane_update() with proper state tracking */
-static void
-intel_pre_disable_primary(struct drm_crtc *crtc,
-			  const struct intel_crtc_state *old_crtc_state)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-
-	/*
-	 * Gen2 reports pipe underruns whenever all planes are disabled.
-	 * So diasble underrun reporting before all the planes get disabled.
-	 * FIXME: Need to fix the logic to work when we turn off all planes
-	 * but leave the pipe running.
-	 */
-	if (IS_GEN2(dev_priv))
-		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
-
-	/*
-	 * FIXME IPS should be fine as long as one plane is
-	 * enabled, but in practice it seems to have problems
-	 * when going from primary only to sprite only and vice
-	 * versa.
-	 */
-	hsw_disable_ips(old_crtc_state);
-}
-
 /* FIXME get rid of this and use pre_plane_update */
 static void
 intel_pre_disable_primary_noatomic(struct drm_crtc *crtc)
@@ -5026,7 +4940,14 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc)
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
 
-	intel_pre_disable_primary(crtc, to_intel_crtc_state(crtc->state));
+	/*
+	 * Gen2 reports pipe underruns whenever all planes are disabled.
+	 * So disable underrun reporting before all the planes get disabled.
+	 */
+	if (IS_GEN2(dev_priv))
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
+	hsw_disable_ips(to_intel_crtc_state(crtc->state));
 
 	/*
 	 * Vblank time updates from the shadow to live plane control register
@@ -5042,6 +4963,38 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc)
 		intel_wait_for_vblank(dev_priv, pipe);
 }
 
+static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state,
+				       const struct intel_crtc_state *new_crtc_state)
+{
+	if (!old_crtc_state->ips_enabled)
+		return false;
+
+	if (needs_modeset(&new_crtc_state->base))
+		return true;
+
+	return !new_crtc_state->ips_enabled;
+}
+
+static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state,
+				       const struct intel_crtc_state *new_crtc_state)
+{
+	if (!new_crtc_state->ips_enabled)
+		return false;
+
+	if (needs_modeset(&new_crtc_state->base))
+		return true;
+
+	/*
+	 * We can't read out IPS on broadwell, assume the worst and
+	 * forcibly enable IPS on the first fastset.
+	 */
+	if (new_crtc_state->update_pipe &&
+	    old_crtc_state->base.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED)
+		return true;
+
+	return !old_crtc_state->ips_enabled;
+}
+
 static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
@@ -5058,6 +5011,9 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
 	if (pipe_config->update_wm_post && pipe_config->base.active)
 		intel_update_watermarks(crtc);
 
+	if (hsw_post_update_enable_ips(old_crtc_state, pipe_config))
+		hsw_enable_ips(pipe_config);
+
 	if (old_pri_state) {
 		struct intel_plane_state *primary_state =
 			intel_atomic_get_new_plane_state(to_intel_atomic_state(old_state),
@@ -5088,6 +5044,9 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
 	struct intel_atomic_state *old_intel_state =
 		to_intel_atomic_state(old_state);
 
+	if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config))
+		hsw_disable_ips(old_crtc_state);
+
 	if (old_pri_state) {
 		struct intel_plane_state *primary_state =
 			intel_atomic_get_new_plane_state(old_intel_state,
@@ -5096,10 +5055,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
 			to_intel_plane_state(old_pri_state);
 
 		intel_fbc_pre_update(crtc, pipe_config, primary_state);
-
-		if (old_primary_state->base.visible &&
+		/*
+		 * Gen2 reports pipe underruns whenever all planes are disabled.
+		 * So disable underrun reporting before all the planes get disabled.
+		 */
+		if (IS_GEN2(dev_priv) && old_primary_state->base.visible &&
 		    (modeset || !primary_state->base.visible))
-			intel_pre_disable_primary(&crtc->base, old_crtc_state);
+			intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
 	}
 
 	/*
@@ -5362,7 +5324,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config,
 
 	if (dev_priv->display.initial_watermarks != NULL)
 		dev_priv->display.initial_watermarks(old_intel_state, intel_crtc->config);
-	intel_enable_pipe(intel_crtc);
+	intel_enable_pipe(pipe_config);
 
 	if (intel_crtc->config->has_pch_encoder)
 		ironlake_pch_enable(pipe_config);
@@ -5481,7 +5443,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
 
 	/* XXX: Do the pipe assertions at the right place for BXT DSI. */
 	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_enable_pipe(intel_crtc);
+		intel_enable_pipe(pipe_config);
 
 	if (intel_crtc->config->has_pch_encoder)
 		lpt_pch_enable(pipe_config);
@@ -5547,7 +5509,7 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state,
 	drm_crtc_vblank_off(crtc);
 	assert_vblank_disabled(crtc);
 
-	intel_disable_pipe(intel_crtc);
+	intel_disable_pipe(old_crtc_state);
 
 	ironlake_pfit_disable(intel_crtc, false);
 
@@ -5599,7 +5561,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state,
 
 	/* XXX: Do the pipe assertions at the right place for BXT DSI. */
 	if (!transcoder_is_dsi(cpu_transcoder))
-		intel_disable_pipe(intel_crtc);
+		intel_disable_pipe(old_crtc_state);
 
 	if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST))
 		intel_ddi_set_vc_payload_alloc(intel_crtc->config, false);
@@ -5777,7 +5739,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config,
 
 	dev_priv->display.initial_watermarks(old_intel_state,
 					     pipe_config);
-	intel_enable_pipe(intel_crtc);
+	intel_enable_pipe(pipe_config);
 
 	assert_vblank_disabled(crtc);
 	drm_crtc_vblank_on(crtc);
@@ -5836,7 +5798,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config,
 						     intel_crtc->config);
 	else
 		intel_update_watermarks(intel_crtc);
-	intel_enable_pipe(intel_crtc);
+	intel_enable_pipe(pipe_config);
 
 	assert_vblank_disabled(crtc);
 	drm_crtc_vblank_on(crtc);
@@ -5880,7 +5842,7 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
 	drm_crtc_vblank_off(crtc);
 	assert_vblank_disabled(crtc);
 
-	intel_disable_pipe(intel_crtc);
+	intel_disable_pipe(old_crtc_state);
 
 	i9xx_pfit_disable(intel_crtc);
 
@@ -5915,6 +5877,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum intel_display_power_domain domain;
+	struct intel_plane *plane;
 	u64 domains;
 	struct drm_atomic_state *state;
 	struct intel_crtc_state *crtc_state;
@@ -5923,11 +5886,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	if (!intel_crtc->active)
 		return;
 
-	if (crtc->primary->state->visible) {
-		intel_pre_disable_primary_noatomic(crtc);
+	for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) {
+		const struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
 
-		intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary));
-		crtc->primary->state->visible = false;
+		if (plane_state->base.visible)
+			intel_plane_disable_noatomic(intel_crtc, plane);
 	}
 
 	state = drm_atomic_state_alloc(crtc->dev);
@@ -6228,18 +6192,20 @@ static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc,
 	return ret;
 }
 
-static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv,
-				     struct intel_crtc_state *pipe_config)
+bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state)
 {
-	if (pipe_config->ips_force_disable)
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	/* IPS only exists on ULT machines and is tied to pipe A. */
+	if (!hsw_crtc_supports_ips(crtc))
 		return false;
 
-	if (pipe_config->pipe_bpp > 24)
+	if (!i915_modparams.enable_ips)
 		return false;
 
-	/* HSW can handle pixel rate up to cdclk? */
-	if (IS_HASWELL(dev_priv))
-		return true;
+	if (crtc_state->pipe_bpp > 24)
+		return false;
 
 	/*
 	 * We compare against max which means we must take
@@ -6248,19 +6214,36 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv,
 	 *
 	 * Should measure whether using a lower cdclk w/o IPS
 	 */
-	return pipe_config->pixel_rate <=
-		dev_priv->max_cdclk_freq * 95 / 100;
+	if (IS_BROADWELL(dev_priv) &&
+	    crtc_state->pixel_rate > dev_priv->max_cdclk_freq * 95 / 100)
+		return false;
+
+	return true;
 }
 
-static void hsw_compute_ips_config(struct intel_crtc *crtc,
-				   struct intel_crtc_state *pipe_config)
+static bool hsw_compute_ips_config(struct intel_crtc_state *crtc_state)
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv =
+		to_i915(crtc_state->base.crtc->dev);
+	struct intel_atomic_state *intel_state =
+		to_intel_atomic_state(crtc_state->base.state);
 
-	pipe_config->ips_enabled = i915_modparams.enable_ips &&
-		hsw_crtc_supports_ips(crtc) &&
-		pipe_config_supports_ips(dev_priv, pipe_config);
+	if (!hsw_crtc_state_ips_capable(crtc_state))
+		return false;
+
+	if (crtc_state->ips_force_disable)
+		return false;
+
+	/* IPS should be fine as long as at least one plane is enabled. */
+	if (!(crtc_state->active_planes & ~BIT(PLANE_CURSOR)))
+		return false;
+
+	/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
+	if (IS_BROADWELL(dev_priv) &&
+	    crtc_state->pixel_rate > intel_state->cdclk.logical.cdclk * 95 / 100)
+		return false;
+
+	return true;
 }
 
 static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc)
@@ -6378,9 +6361,6 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc,
 
 	intel_crtc_compute_pixel_rate(pipe_config);
 
-	if (HAS_IPS(dev_priv))
-		hsw_compute_ips_config(crtc, pipe_config);
-
 	if (pipe_config->has_pch_encoder)
 		return ironlake_fdi_compute_config(crtc, pipe_config);
 
@@ -7437,15 +7417,16 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *plane = to_intel_plane(crtc->base.primary);
+	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
+	enum pipe pipe = crtc->pipe;
 	u32 val, base, offset;
-	int pipe = crtc->pipe, plane = crtc->plane;
 	int fourcc, pixel_format;
 	unsigned int aligned_height;
 	struct drm_framebuffer *fb;
 	struct intel_framebuffer *intel_fb;
 
-	val = I915_READ(DSPCNTR(plane));
-	if (!(val & DISPLAY_PLANE_ENABLE))
+	if (!plane->get_hw_state(plane))
 		return;
 
 	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
@@ -7458,6 +7439,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
 
 	fb->dev = dev;
 
+	val = I915_READ(DSPCNTR(i9xx_plane));
+
 	if (INTEL_GEN(dev_priv) >= 4) {
 		if (val & DISPPLANE_TILED) {
 			plane_config->tiling = I915_TILING_X;
@@ -7469,14 +7452,17 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
 	fourcc = i9xx_format_to_fourcc(pixel_format);
 	fb->format = drm_format_info(fourcc);
 
-	if (INTEL_GEN(dev_priv) >= 4) {
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
+		offset = I915_READ(DSPOFFSET(i9xx_plane));
+		base = I915_READ(DSPSURF(i9xx_plane)) & 0xfffff000;
+	} else if (INTEL_GEN(dev_priv) >= 4) {
 		if (plane_config->tiling)
-			offset = I915_READ(DSPTILEOFF(plane));
+			offset = I915_READ(DSPTILEOFF(i9xx_plane));
 		else
-			offset = I915_READ(DSPLINOFF(plane));
-		base = I915_READ(DSPSURF(plane)) & 0xfffff000;
+			offset = I915_READ(DSPLINOFF(i9xx_plane));
+		base = I915_READ(DSPSURF(i9xx_plane)) & 0xfffff000;
 	} else {
-		base = I915_READ(DSPADDR(plane));
+		base = I915_READ(DSPADDR(i9xx_plane));
 	}
 	plane_config->base = base;
 
@@ -7484,15 +7470,15 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
 	fb->width = ((val >> 16) & 0xfff) + 1;
 	fb->height = ((val >> 0) & 0xfff) + 1;
 
-	val = I915_READ(DSPSTRIDE(pipe));
+	val = I915_READ(DSPSTRIDE(i9xx_plane));
 	fb->pitches[0] = val & 0xffffffc0;
 
 	aligned_height = intel_fb_align_height(fb, 0, fb->height);
 
 	plane_config->size = fb->pitches[0] * aligned_height;
 
-	DRM_DEBUG_KMS("pipe/plane %c/%d with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n",
-		      pipe_name(pipe), plane, fb->width, fb->height,
+	DRM_DEBUG_KMS("%s/%s with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n",
+		      crtc->base.name, plane->base.name, fb->width, fb->height,
 		      fb->format->cpp[0] * 8, base, fb->pitches[0],
 		      plane_config->size);
 
@@ -8461,13 +8447,18 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *plane = to_intel_plane(crtc->base.primary);
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = crtc->pipe;
 	u32 val, base, offset, stride_mult, tiling, alpha;
-	int pipe = crtc->pipe;
 	int fourcc, pixel_format;
 	unsigned int aligned_height;
 	struct drm_framebuffer *fb;
 	struct intel_framebuffer *intel_fb;
 
+	if (!plane->get_hw_state(plane))
+		return;
+
 	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
 	if (!intel_fb) {
 		DRM_DEBUG_KMS("failed to alloc fb\n");
@@ -8478,14 +8469,12 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 
 	fb->dev = dev;
 
-	val = I915_READ(PLANE_CTL(pipe, 0));
-	if (!(val & PLANE_CTL_ENABLE))
-		goto error;
+	val = I915_READ(PLANE_CTL(pipe, plane_id));
 
 	pixel_format = val & PLANE_CTL_FORMAT_MASK;
 
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
-		alpha = I915_READ(PLANE_COLOR_CTL(pipe, 0));
+		alpha = I915_READ(PLANE_COLOR_CTL(pipe, plane_id));
 		alpha &= PLANE_COLOR_ALPHA_MASK;
 	} else {
 		alpha = val & PLANE_CTL_ALPHA_MASK;
@@ -8521,16 +8510,16 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 		goto error;
 	}
 
-	base = I915_READ(PLANE_SURF(pipe, 0)) & 0xfffff000;
+	base = I915_READ(PLANE_SURF(pipe, plane_id)) & 0xfffff000;
 	plane_config->base = base;
 
-	offset = I915_READ(PLANE_OFFSET(pipe, 0));
+	offset = I915_READ(PLANE_OFFSET(pipe, plane_id));
 
-	val = I915_READ(PLANE_SIZE(pipe, 0));
+	val = I915_READ(PLANE_SIZE(pipe, plane_id));
 	fb->height = ((val >> 16) & 0xfff) + 1;
 	fb->width = ((val >> 0) & 0x1fff) + 1;
 
-	val = I915_READ(PLANE_STRIDE(pipe, 0));
+	val = I915_READ(PLANE_STRIDE(pipe, plane_id));
 	stride_mult = intel_fb_stride_alignment(fb, 0);
 	fb->pitches[0] = (val & 0x3ff) * stride_mult;
 
@@ -8538,8 +8527,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 
 	plane_config->size = fb->pitches[0] * aligned_height;
 
-	DRM_DEBUG_KMS("pipe %c with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n",
-		      pipe_name(pipe), fb->width, fb->height,
+	DRM_DEBUG_KMS("%s/%s with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n",
+		      crtc->base.name, plane->base.name, fb->width, fb->height,
 		      fb->format->cpp[0] * 8, base, fb->pitches[0],
 		      plane_config->size);
 
@@ -8574,74 +8563,6 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc,
 	}
 }
 
-static void
-ironlake_get_initial_plane_config(struct intel_crtc *crtc,
-				  struct intel_initial_plane_config *plane_config)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	u32 val, base, offset;
-	int pipe = crtc->pipe;
-	int fourcc, pixel_format;
-	unsigned int aligned_height;
-	struct drm_framebuffer *fb;
-	struct intel_framebuffer *intel_fb;
-
-	val = I915_READ(DSPCNTR(pipe));
-	if (!(val & DISPLAY_PLANE_ENABLE))
-		return;
-
-	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
-	if (!intel_fb) {
-		DRM_DEBUG_KMS("failed to alloc fb\n");
-		return;
-	}
-
-	fb = &intel_fb->base;
-
-	fb->dev = dev;
-
-	if (INTEL_GEN(dev_priv) >= 4) {
-		if (val & DISPPLANE_TILED) {
-			plane_config->tiling = I915_TILING_X;
-			fb->modifier = I915_FORMAT_MOD_X_TILED;
-		}
-	}
-
-	pixel_format = val & DISPPLANE_PIXFORMAT_MASK;
-	fourcc = i9xx_format_to_fourcc(pixel_format);
-	fb->format = drm_format_info(fourcc);
-
-	base = I915_READ(DSPSURF(pipe)) & 0xfffff000;
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-		offset = I915_READ(DSPOFFSET(pipe));
-	} else {
-		if (plane_config->tiling)
-			offset = I915_READ(DSPTILEOFF(pipe));
-		else
-			offset = I915_READ(DSPLINOFF(pipe));
-	}
-	plane_config->base = base;
-
-	val = I915_READ(PIPESRC(pipe));
-	fb->width = ((val >> 16) & 0xfff) + 1;
-	fb->height = ((val >> 0) & 0xfff) + 1;
-
-	val = I915_READ(DSPSTRIDE(pipe));
-	fb->pitches[0] = val & 0xffffffc0;
-
-	aligned_height = intel_fb_align_height(fb, 0, fb->height);
-
-	plane_config->size = fb->pitches[0] * aligned_height;
-
-	DRM_DEBUG_KMS("pipe %c with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n",
-		      pipe_name(pipe), fb->width, fb->height,
-		      fb->format->cpp[0] * 8, base, fb->pitches[0],
-		      plane_config->size);
-
-	plane_config->fb = intel_fb;
-}
-
 static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 				     struct intel_crtc_state *pipe_config)
 {
@@ -9275,6 +9196,19 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 			ironlake_get_pfit_config(crtc, pipe_config);
 	}
 
+	if (hsw_crtc_supports_ips(crtc)) {
+		if (IS_HASWELL(dev_priv))
+			pipe_config->ips_enabled = I915_READ(IPS_CTL) & IPS_ENABLE;
+		else {
+			/*
+			 * We cannot readout IPS state on broadwell, set to
+			 * true so we can set it to a defined state on first
+			 * commit.
+			 */
+			pipe_config->ips_enabled = true;
+		}
+	}
+
 	if (pipe_config->cpu_transcoder != TRANSCODER_EDP &&
 	    !transcoder_is_dsi(pipe_config->cpu_transcoder)) {
 		pipe_config->pixel_multiplier =
@@ -9500,6 +9434,23 @@ static void i845_disable_cursor(struct intel_plane *plane,
 	i845_update_cursor(plane, NULL, NULL);
 }
 
+static bool i845_cursor_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(PIPE_A);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 			   const struct intel_plane_state *plane_state)
 {
@@ -9693,6 +9644,28 @@ static void i9xx_disable_cursor(struct intel_plane *plane,
 	i9xx_update_cursor(plane, NULL, NULL);
 }
 
+static bool i9xx_cursor_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	/*
+	 * Not 100% correct for planes that can move between pipes,
+	 * but that's only the case for gen2-3 which don't have any
+	 * display power wells.
+	 */
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
 
 /* VESA 640x480x72Hz mode to set on the pipe */
 static const struct drm_display_mode load_detect_mode = {
@@ -10489,6 +10462,9 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 							 pipe_config);
 	}
 
+	if (HAS_IPS(dev_priv))
+		pipe_config->ips_enabled = hsw_compute_ips_config(pipe_config);
+
 	return ret;
 }
 
@@ -11655,6 +11631,18 @@ verify_crtc_state(struct drm_crtc *crtc,
 }
 
 static void
+intel_verify_planes(struct intel_atomic_state *state)
+{
+	struct intel_plane *plane;
+	const struct intel_plane_state *plane_state;
+	int i;
+
+	for_each_new_intel_plane_in_state(state, plane,
+					  plane_state, i)
+		assert_plane(plane, plane_state->base.visible);
+}
+
+static void
 verify_single_dpll_state(struct drm_i915_private *dev_priv,
 			 struct intel_shared_dpll *pll,
 			 struct drm_crtc *crtc,
@@ -12136,7 +12124,7 @@ static int intel_atomic_check(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	intel_fbc_choose_crtc(dev_priv, state);
+	intel_fbc_choose_crtc(dev_priv, intel_state);
 	return calc_watermark_data(state);
 }
 
@@ -12447,6 +12435,9 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 		intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state);
 	}
 
+	if (intel_state->modeset)
+		intel_verify_planes(intel_state);
+
 	if (intel_state->modeset && intel_can_enable_sagv(state))
 		intel_enable_sagv(dev_priv);
 
@@ -13267,9 +13258,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 	 * port is hooked to pipe B. Hence we want plane A feeding pipe B.
 	 */
 	if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4)
-		primary->plane = (enum plane) !pipe;
+		primary->i9xx_plane = (enum i9xx_plane_id) !pipe;
 	else
-		primary->plane = (enum plane) pipe;
+		primary->i9xx_plane = (enum i9xx_plane_id) pipe;
 	primary->id = PLANE_PRIMARY;
 	primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe);
 	primary->check_plane = intel_check_primary_plane;
@@ -13281,6 +13272,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 		primary->update_plane = skl_update_plane;
 		primary->disable_plane = skl_disable_plane;
+		primary->get_hw_state = skl_plane_get_hw_state;
 	} else if (INTEL_GEN(dev_priv) >= 9) {
 		intel_primary_formats = skl_primary_formats;
 		num_formats = ARRAY_SIZE(skl_primary_formats);
@@ -13291,20 +13283,23 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 		primary->update_plane = skl_update_plane;
 		primary->disable_plane = skl_disable_plane;
+		primary->get_hw_state = skl_plane_get_hw_state;
 	} else if (INTEL_GEN(dev_priv) >= 4) {
 		intel_primary_formats = i965_primary_formats;
 		num_formats = ARRAY_SIZE(i965_primary_formats);
 		modifiers = i9xx_format_modifiers;
 
-		primary->update_plane = i9xx_update_primary_plane;
-		primary->disable_plane = i9xx_disable_primary_plane;
+		primary->update_plane = i9xx_update_plane;
+		primary->disable_plane = i9xx_disable_plane;
+		primary->get_hw_state = i9xx_plane_get_hw_state;
 	} else {
 		intel_primary_formats = i8xx_primary_formats;
 		num_formats = ARRAY_SIZE(i8xx_primary_formats);
 		modifiers = i9xx_format_modifiers;
 
-		primary->update_plane = i9xx_update_primary_plane;
-		primary->disable_plane = i9xx_disable_primary_plane;
+		primary->update_plane = i9xx_update_plane;
+		primary->disable_plane = i9xx_disable_plane;
+		primary->get_hw_state = i9xx_plane_get_hw_state;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9)
@@ -13327,7 +13322,8 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 					       intel_primary_formats, num_formats,
 					       modifiers,
 					       DRM_PLANE_TYPE_PRIMARY,
-					       "plane %c", plane_name(primary->plane));
+					       "plane %c",
+					       plane_name(primary->i9xx_plane));
 	if (ret)
 		goto fail;
 
@@ -13387,17 +13383,19 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv,
 	cursor->can_scale = false;
 	cursor->max_downscale = 1;
 	cursor->pipe = pipe;
-	cursor->plane = pipe;
+	cursor->i9xx_plane = (enum i9xx_plane_id) pipe;
 	cursor->id = PLANE_CURSOR;
 	cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe);
 
 	if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
 		cursor->update_plane = i845_update_cursor;
 		cursor->disable_plane = i845_disable_cursor;
+		cursor->get_hw_state = i845_cursor_get_hw_state;
 		cursor->check_plane = i845_check_cursor;
 	} else {
 		cursor->update_plane = i9xx_update_cursor;
 		cursor->disable_plane = i9xx_disable_cursor;
+		cursor->get_hw_state = i9xx_cursor_get_hw_state;
 		cursor->check_plane = i9xx_check_cursor;
 	}
 
@@ -13513,14 +13511,13 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 		goto fail;
 
 	intel_crtc->pipe = pipe;
-	intel_crtc->plane = primary->plane;
 
 	/* initialize shared scalers */
 	intel_crtc_init_scalers(intel_crtc, crtc_state);
 
 	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
-	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
-	dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = intel_crtc;
+	       dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] != NULL);
+	dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] = intel_crtc;
 	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = intel_crtc;
 
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
@@ -14200,7 +14197,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 	} else if (HAS_DDI(dev_priv)) {
 		dev_priv->display.get_pipe_config = haswell_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
-			ironlake_get_initial_plane_config;
+			i9xx_get_initial_plane_config;
 		dev_priv->display.crtc_compute_clock =
 			haswell_crtc_compute_clock;
 		dev_priv->display.crtc_enable = haswell_crtc_enable;
@@ -14208,7 +14205,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 	} else if (HAS_PCH_SPLIT(dev_priv)) {
 		dev_priv->display.get_pipe_config = ironlake_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
-			ironlake_get_initial_plane_config;
+			i9xx_get_initial_plane_config;
 		dev_priv->display.crtc_compute_clock =
 			ironlake_crtc_compute_clock;
 		dev_priv->display.crtc_enable = ironlake_crtc_enable;
@@ -14693,6 +14690,7 @@ int intel_modeset_init(struct drm_device *dev)
 
 void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
+	struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 	/* 640x480@60Hz, ~25175 kHz */
 	struct dpll clock = {
 		.m1 = 18,
@@ -14756,42 +14754,62 @@ void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 	I915_WRITE(PIPECONF(pipe), PIPECONF_ENABLE | PIPECONF_PROGRESSIVE);
 	POSTING_READ(PIPECONF(pipe));
+
+	intel_wait_for_pipe_scanline_moving(crtc);
 }
 
 void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
+	struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
+
 	DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n",
 		      pipe_name(pipe));
 
-	assert_plane_disabled(dev_priv, PLANE_A);
-	assert_plane_disabled(dev_priv, PLANE_B);
+	WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE);
+	WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE);
+	WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE);
+	WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE);
+	WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE);
 
 	I915_WRITE(PIPECONF(pipe), 0);
 	POSTING_READ(PIPECONF(pipe));
 
-	if (wait_for(pipe_dsl_stopped(dev_priv, pipe), 100))
-		DRM_ERROR("pipe %c off wait timed out\n", pipe_name(pipe));
+	intel_wait_for_pipe_scanline_stopped(crtc);
 
 	I915_WRITE(DPLL(pipe), DPLL_VGA_MODE_DIS);
 	POSTING_READ(DPLL(pipe));
 }
 
-static bool
-intel_check_plane_mapping(struct intel_crtc *crtc)
+static bool intel_plane_mapping_ok(struct intel_crtc *crtc,
+				   struct intel_plane *plane)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	u32 val;
+	enum i9xx_plane_id i9xx_plane = plane->i9xx_plane;
+	u32 val = I915_READ(DSPCNTR(i9xx_plane));
 
-	if (INTEL_INFO(dev_priv)->num_pipes == 1)
-		return true;
+	return (val & DISPLAY_PLANE_ENABLE) == 0 ||
+		(val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe);
+}
 
-	val = I915_READ(DSPCNTR(!crtc->plane));
+static void
+intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
 
-	if ((val & DISPLAY_PLANE_ENABLE) &&
-	    (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe))
-		return false;
+	if (INTEL_GEN(dev_priv) >= 4)
+		return;
 
-	return true;
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		struct intel_plane *plane =
+			to_intel_plane(crtc->base.primary);
+
+		if (intel_plane_mapping_ok(crtc, plane))
+			continue;
+
+		DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n",
+			      plane->base.name);
+		intel_plane_disable_noatomic(crtc, plane);
+	}
 }
 
 static bool intel_crtc_has_encoders(struct intel_crtc *crtc)
@@ -14847,33 +14865,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 
 		/* Disable everything but the primary plane */
 		for_each_intel_plane_on_crtc(dev, crtc, plane) {
-			if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
-				continue;
+			const struct intel_plane_state *plane_state =
+				to_intel_plane_state(plane->base.state);
 
-			trace_intel_disable_plane(&plane->base, crtc);
-			plane->disable_plane(plane, crtc);
+			if (plane_state->base.visible &&
+			    plane->base.type != DRM_PLANE_TYPE_PRIMARY)
+				intel_plane_disable_noatomic(crtc, plane);
 		}
 	}
 
-	/* We need to sanitize the plane -> pipe mapping first because this will
-	 * disable the crtc (and hence change the state) if it is wrong. Note
-	 * that gen4+ has a fixed plane -> pipe mapping.  */
-	if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) {
-		bool plane;
-
-		DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n",
-			      crtc->base.base.id, crtc->base.name);
-
-		/* Pipe has the wrong plane attached and the plane is active.
-		 * Temporarily change the plane mapping and disable everything
-		 * ...  */
-		plane = crtc->plane;
-		crtc->base.primary->state->visible = true;
-		crtc->plane = !plane;
-		intel_crtc_disable_noatomic(&crtc->base, ctx);
-		crtc->plane = plane;
-	}
-
 	/* Adjust the state of the output pipe according to whether we
 	 * have active connectors/encoders. */
 	if (crtc->active && !intel_crtc_has_encoders(crtc))
@@ -14947,8 +14947,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 		connector->base.dpms = DRM_MODE_DPMS_OFF;
 		connector->base.encoder = NULL;
 	}
-	/* Enabled encoders without active connectors will be fixed in
-	 * the crtc fixup. */
 }
 
 void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv)
@@ -14978,24 +14976,21 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv)
 	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
 }
 
-static bool primary_get_hw_state(struct intel_plane *plane)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-
-	return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE;
-}
-
 /* FIXME read out full plane state for all planes */
 static void readout_plane_state(struct intel_crtc *crtc)
 {
-	struct intel_plane *primary = to_intel_plane(crtc->base.primary);
-	bool visible;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct intel_plane *plane;
 
-	visible = crtc->active && primary_get_hw_state(primary);
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
+		bool visible = plane->get_hw_state(plane);
 
-	intel_set_plane_visible(to_intel_crtc_state(crtc->base.state),
-				to_intel_plane_state(primary->base.state),
-				visible);
+		intel_set_plane_visible(crtc_state, plane_state, visible);
+	}
 }
 
 static void intel_modeset_readout_hw_state(struct drm_device *dev)
@@ -15203,6 +15198,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	/* HW state is read out, now we need to sanitize this mess. */
 	get_encoder_power_domains(dev_priv);
 
+	intel_sanitize_plane_mapping(dev_priv);
+
 	for_each_intel_encoder(dev, encoder) {
 		intel_sanitize_encoder(encoder);
 	}
@@ -15317,10 +15314,7 @@ static void intel_hpd_poll_fini(struct drm_device *dev)
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 
-	/* First disable polling... */
-	drm_kms_helper_poll_fini(dev);
-
-	/* Then kill the work that may have been queued by hpd. */
+	/* Kill all the work that may have been queued by hpd. */
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
 		if (connector->modeset_retry_work.func)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index bbf2256..35c5299 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1643,7 +1643,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		pipe_config->has_pch_encoder = true;
 
 	pipe_config->has_drrs = false;
-	if (port == PORT_A)
+	if (IS_G4X(dev_priv) || port == PORT_A)
 		pipe_config->has_audio = false;
 	else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
 		pipe_config->has_audio = intel_dp->has_audio;
@@ -1677,6 +1677,10 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 						conn_state->scaling_mode);
 	}
 
+	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	    adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
+		return false;
+
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return false;
 
@@ -4277,6 +4281,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
 	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_connector_state *conn_state =
+		intel_dp->attached_connector->base.state;
 	u8 link_status[DP_LINK_STATUS_SIZE];
 
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
@@ -4286,10 +4292,16 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 		return;
 	}
 
-	if (!intel_encoder->base.crtc)
+	if (!conn_state->crtc)
 		return;
 
-	if (!to_intel_crtc(intel_encoder->base.crtc)->active)
+	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
+
+	if (!conn_state->crtc->state->active)
+		return;
+
+	if (conn_state->commit &&
+	    !try_wait_for_completion(&conn_state->commit->hw_done))
 		return;
 
 	/*
@@ -4364,9 +4376,8 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
 			DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n");
 	}
 
-	drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, NULL);
 	intel_dp_check_link_status(intel_dp);
-	drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex);
+
 	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
 		DRM_DEBUG_KMS("Link Training Compliance Test requested\n");
 		/* Send a Hotplug Uevent to userspace to start modeset */
@@ -4814,8 +4825,19 @@ intel_dp_detect(struct drm_connector *connector,
 		      connector->base.id, connector->name);
 
 	/* If full detect is not performed yet, do a full detect */
-	if (!intel_dp->detect_done)
+	if (!intel_dp->detect_done) {
+		struct drm_crtc *crtc;
+		int ret;
+
+		crtc = connector->state->crtc;
+		if (crtc) {
+			ret = drm_modeset_lock(&crtc->mutex, ctx);
+			if (ret)
+				return ret;
+		}
+
 		status = intel_dp_long_pulse(intel_dp->attached_connector);
+	}
 
 	intel_dp->detect_done = false;
 
@@ -5097,7 +5119,38 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	}
 
 	if (!intel_dp->is_mst) {
-		if (!intel_dp_short_pulse(intel_dp)) {
+		struct drm_modeset_acquire_ctx ctx;
+		struct drm_connector *connector = &intel_dp->attached_connector->base;
+		struct drm_crtc *crtc;
+		int iret;
+		bool handled = false;
+
+		drm_modeset_acquire_init(&ctx, 0);
+retry:
+		iret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, &ctx);
+		if (iret)
+			goto err;
+
+		crtc = connector->state->crtc;
+		if (crtc) {
+			iret = drm_modeset_lock(&crtc->mutex, &ctx);
+			if (iret)
+				goto err;
+		}
+
+		handled = intel_dp_short_pulse(intel_dp);
+
+err:
+		if (iret == -EDEADLK) {
+			drm_modeset_backoff(&ctx);
+			goto retry;
+		}
+
+		drm_modeset_drop_locks(&ctx);
+		drm_modeset_acquire_fini(&ctx);
+		WARN(iret, "Acquiring modeset locks failed with %i\n", iret);
+
+		if (!handled) {
 			intel_dp->detect_done = false;
 			goto put_power;
 		}
@@ -5131,8 +5184,11 @@ static void
 intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
+	enum port port = dp_to_dig_port(intel_dp)->base.port;
 
-	intel_attach_force_audio_property(connector);
+	if (!IS_G4X(dev_priv) && port != PORT_A)
+		intel_attach_force_audio_property(connector);
+
 	intel_attach_broadcast_rgb_property(connector);
 
 	if (intel_dp_is_edp(intel_dp)) {
@@ -5306,6 +5362,12 @@ intel_dp_init_panel_power_sequencer(struct intel_dp *intel_dp)
 	 */
 	final->t8 = 1;
 	final->t9 = 1;
+
+	/*
+	 * HW has only a 100msec granularity for t11_t12 so round it up
+	 * accordingly.
+	 */
+	final->t11_t12 = roundup(final->t11_t12, 100 * 10);
 }
 
 static void
@@ -6034,7 +6096,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	drm_connector_init(dev, connector, &intel_dp_connector_funcs, type);
 	drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs);
 
-	connector->interlace_allowed = true;
+	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
+		connector->interlace_allowed = true;
 	connector->doublescan_allowed = 0;
 
 	intel_dp_init_connector_port_info(intel_dig_port);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index bf8b057..30f791f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -48,8 +48,9 @@
  * having timed out, since the timeout could be due to preemption or similar and
  * we've never had a chance to check the condition before the timeout.
  */
-#define _wait_for(COND, US, W) ({ \
+#define _wait_for(COND, US, Wmin, Wmax) ({ \
 	unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1;	\
+	long wait__ = (Wmin); /* recommended min for usleep is 10 us */	\
 	int ret__;							\
 	might_sleep();							\
 	for (;;) {							\
@@ -62,12 +63,14 @@
 			ret__ = -ETIMEDOUT;				\
 			break;						\
 		}							\
-		usleep_range((W), (W) * 2);				\
+		usleep_range(wait__, wait__ * 2);			\
+		if (wait__ < (Wmax))					\
+			wait__ <<= 1;					\
 	}								\
 	ret__;								\
 })
 
-#define wait_for(COND, MS)	  	_wait_for((COND), (MS) * 1000, 1000)
+#define wait_for(COND, MS)	_wait_for((COND), (MS) * 1000, 10, 1000)
 
 /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
 #if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
@@ -116,7 +119,7 @@
 	int ret__; \
 	BUILD_BUG_ON(!__builtin_constant_p(US)); \
 	if ((US) > 10) \
-		ret__ = _wait_for((COND), (US), 10); \
+		ret__ = _wait_for((COND), (US), 10, 10); \
 	else \
 		ret__ = _wait_for_atomic((COND), (US), 0); \
 	ret__; \
@@ -799,7 +802,6 @@ struct intel_crtc_state {
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
-	enum plane plane;
 	/*
 	 * Whether the crtc and the connected output pipeline is active. Implies
 	 * that crtc->enabled is set, i.e. the current mode configuration has
@@ -844,7 +846,7 @@ struct intel_crtc {
 
 struct intel_plane {
 	struct drm_plane base;
-	u8 plane;
+	enum i9xx_plane_id i9xx_plane;
 	enum plane_id id;
 	enum pipe pipe;
 	bool can_scale;
@@ -866,6 +868,7 @@ struct intel_plane {
 			     const struct intel_plane_state *plane_state);
 	void (*disable_plane)(struct intel_plane *plane,
 			      struct intel_crtc *crtc);
+	bool (*get_hw_state)(struct intel_plane *plane);
 	int (*check_plane)(struct intel_plane *plane,
 			   struct intel_crtc_state *crtc_state,
 			   struct intel_plane_state *state);
@@ -1129,7 +1132,7 @@ intel_get_crtc_for_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
 }
 
 static inline struct intel_crtc *
-intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum plane plane)
+intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum i9xx_plane_id plane)
 {
 	return dev_priv->plane_to_crtc_mapping[plane];
 }
@@ -1285,8 +1288,6 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state);
 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state);
 void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
-bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
-				 struct intel_crtc *intel_crtc);
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config);
 
@@ -1485,6 +1486,7 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock,
 int chv_calc_dpll_params(int refclk, struct dpll *pll_clock);
 
 bool intel_crtc_active(struct intel_crtc *crtc);
+bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state);
 void hsw_enable_ips(const struct intel_crtc_state *crtc_state);
 void hsw_disable_ips(const struct intel_crtc_state *crtc_state);
 enum intel_display_power_domain intel_port_to_power_domain(enum port port);
@@ -1651,7 +1653,7 @@ static inline void intel_fbdev_restore_mode(struct drm_device *dev)
 
 /* intel_fbc.c */
 void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
-			   struct drm_atomic_state *state);
+			   struct intel_atomic_state *state);
 bool intel_fbc_is_active(struct drm_i915_private *dev_priv);
 void intel_fbc_pre_update(struct intel_crtc *crtc,
 			  struct intel_crtc_state *crtc_state,
@@ -1906,15 +1908,10 @@ bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
 				 const struct skl_ddb_entry *ddb,
 				 int ignore);
 bool ilk_disable_lp_wm(struct drm_device *dev);
-int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
 				  struct intel_crtc_state *cstate);
 void intel_init_ipc(struct drm_i915_private *dev_priv);
 void intel_enable_ipc(struct drm_i915_private *dev_priv);
-static inline int intel_rc6_enabled(void)
-{
-	return i915_modparams.enable_rc6;
-}
 
 /* intel_sdvo.c */
 bool intel_sdvo_init(struct drm_i915_private *dev_priv,
@@ -1934,6 +1931,7 @@ void skl_update_plane(struct intel_plane *plane,
 		      const struct intel_crtc_state *crtc_state,
 		      const struct intel_plane_state *plane_state);
 void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc);
+bool skl_plane_get_hw_state(struct intel_plane *plane);
 
 /* intel_tv.c */
 void intel_tv_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 1b60df3..f67d321 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -1670,7 +1670,7 @@ static int intel_dsi_get_panel_orientation(struct intel_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	int orientation = DRM_MODE_PANEL_ORIENTATION_NORMAL;
-	enum plane plane;
+	enum i9xx_plane_id plane;
 	u32 val;
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 9897c7f..86d4c85 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -37,8 +37,6 @@
  * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  */
 #define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
-/* Same as Haswell, but 72064 bytes now. */
-#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
 
 #define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
@@ -164,9 +162,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 		case 9:
 			return GEN9_LR_CONTEXT_RENDER_SIZE;
 		case 8:
-			return i915_modparams.enable_execlists ?
-			       GEN8_LR_CONTEXT_RENDER_SIZE :
-			       GEN8_CXT_TOTAL_SIZE;
+			return GEN8_LR_CONTEXT_RENDER_SIZE;
 		case 7:
 			if (IS_HASWELL(dev_priv))
 				return HSW_CXT_TOTAL_SIZE;
@@ -209,6 +205,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
 	class_info = &intel_engine_classes[info->class];
 
+	if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS))
+		return -EINVAL;
+
+	if (GEM_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
+		return -EINVAL;
+
+	if (GEM_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
+		return -EINVAL;
+
 	GEM_BUG_ON(dev_priv->engine[id]);
 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
 	if (!engine)
@@ -236,8 +241,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
 
+	spin_lock_init(&engine->stats.lock);
+
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 
+	dev_priv->engine_class[info->class][info->instance] = engine;
 	dev_priv->engine[id] = engine;
 	return 0;
 }
@@ -316,7 +324,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 			&intel_engine_classes[engine->class];
 		int (*init)(struct intel_engine_cs *engine);
 
-		if (i915_modparams.enable_execlists)
+		if (HAS_EXECLISTS(dev_priv))
 			init = class_info->init_execlists;
 		else
 			init = class_info->init_legacy;
@@ -366,18 +374,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		if (HAS_VEBOX(dev_priv))
 			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
 	}
-	if (dev_priv->semaphore) {
-		struct page *page = i915_vma_first_page(dev_priv->semaphore);
-		void *semaphores;
-
-		/* Semaphores are in noncoherent memory, flush to be safe */
-		semaphores = kmap_atomic(page);
-		memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
-		       0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
-		drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
-				       I915_NUM_ENGINES * gen8_semaphore_seqno_size);
-		kunmap_atomic(semaphores);
-	}
 
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
@@ -1071,6 +1067,15 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
+	/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
+	if (IS_GEN9_LP(dev_priv)) {
+		u32 val = I915_READ(GEN8_L3SQCREG1);
+
+		val &= ~L3_PRIO_CREDITS_MASK;
+		val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
+		I915_WRITE(GEN8_L3SQCREG1, val);
+	}
+
 	/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
 	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
 				    GEN8_LQSC_FLUSH_COHERENT_LINES));
@@ -1188,7 +1193,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
 static int bxt_init_workarounds(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	u32 val;
 	int ret;
 
 	ret = gen9_init_workarounds(engine);
@@ -1203,12 +1207,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
 	I915_WRITE(FF_SLICE_CS_CHICKEN2,
 		   _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
 
-	/* WaProgramL3SqcReg1DefaultForPerf:bxt */
-	val = I915_READ(GEN8_L3SQCREG1);
-	val &= ~L3_PRIO_CREDITS_MASK;
-	val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
-	I915_WRITE(GEN8_L3SQCREG1, val);
-
 	/* WaToEnableHwFixForPushConstHWBug:bxt */
 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
@@ -1729,6 +1727,15 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 			   I915_READ(RING_MI_MODE(engine->mmio_base)),
 			   I915_READ(RING_MI_MODE(engine->mmio_base)) & (MODE_IDLE) ? " [idle]" : "");
 	}
+	if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
+		drm_printf(m, "\tSYNC_0: 0x%08x\n",
+			   I915_READ(RING_SYNC_0(engine->mmio_base)));
+		drm_printf(m, "\tSYNC_1: 0x%08x\n",
+			   I915_READ(RING_SYNC_1(engine->mmio_base)));
+		if (HAS_VEBOX(dev_priv))
+			drm_printf(m, "\tSYNC_2: 0x%08x\n",
+				   I915_READ(RING_SYNC_2(engine->mmio_base)));
+	}
 
 	rcu_read_unlock();
 
@@ -1739,7 +1746,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
 		   upper_32_bits(addr), lower_32_bits(addr));
 
-	if (i915_modparams.enable_execlists) {
+	if (HAS_EXECLISTS(dev_priv)) {
 		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
 		u32 ptr, read, write;
 		unsigned int idx;
@@ -1823,6 +1830,114 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 	drm_printf(m, "\n");
 }
 
+static u8 user_class_map[] = {
+	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
+	[I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
+	[I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
+	[I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
+};
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
+{
+	if (class >= ARRAY_SIZE(user_class_map))
+		return NULL;
+
+	class = user_class_map[class];
+
+	GEM_BUG_ON(class > MAX_ENGINE_CLASS);
+
+	if (instance > MAX_ENGINE_INSTANCE)
+		return NULL;
+
+	return i915->engine_class[class][instance];
+}
+
+/**
+ * intel_enable_engine_stats() - Enable engine busy tracking on engine
+ * @engine: engine to enable stats collection
+ *
+ * Start collecting the engine busyness data for @engine.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int intel_enable_engine_stats(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (!intel_engine_supports_stats(engine))
+		return -ENODEV;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+	if (engine->stats.enabled == ~0)
+		goto busy;
+	if (engine->stats.enabled++ == 0)
+		engine->stats.enabled_at = ktime_get();
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+
+	return 0;
+
+busy:
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+
+	return -EBUSY;
+}
+
+static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
+{
+	ktime_t total = engine->stats.total;
+
+	/*
+	 * If the engine is executing something at the moment
+	 * add it to the total.
+	 */
+	if (engine->stats.active)
+		total = ktime_add(total,
+				  ktime_sub(ktime_get(), engine->stats.start));
+
+	return total;
+}
+
+/**
+ * intel_engine_get_busy_time() - Return current accumulated engine busyness
+ * @engine: engine to report on
+ *
+ * Returns accumulated time @engine was busy since engine stats were enabled.
+ */
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
+{
+	ktime_t total;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+	total = __intel_engine_get_busy_time(engine);
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+
+	return total;
+}
+
+/**
+ * intel_disable_engine_stats() - Disable engine busy tracking on engine
+ * @engine: engine to disable stats collection
+ *
+ * Stops collecting the engine busyness data for @engine.
+ */
+void intel_disable_engine_stats(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (!intel_engine_supports_stats(engine))
+		return;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+	WARN_ON_ONCE(engine->stats.enabled == 0);
+	if (--engine->stats.enabled == 0) {
+		engine->stats.total = __intel_engine_get_busy_time(engine);
+		engine->stats.active = 0;
+	}
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 1a0f5e0..4aefc65 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -151,7 +151,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv)
 
 		/* Set it up... */
 		fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
-		fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.plane);
+		fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.i9xx_plane);
 		I915_WRITE(FBC_CONTROL2, fbc_ctl2);
 		I915_WRITE(FBC_FENCE_OFF, params->crtc.fence_y_offset);
 	}
@@ -177,7 +177,7 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv)
 	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
 	u32 dpfc_ctl;
 
-	dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane) | DPFC_SR_EN;
+	dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane) | DPFC_SR_EN;
 	if (params->fb.format->cpp[0] == 2)
 		dpfc_ctl |= DPFC_CTL_LIMIT_2X;
 	else
@@ -224,7 +224,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
 	u32 dpfc_ctl;
 	int threshold = dev_priv->fbc.threshold;
 
-	dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane);
+	dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane);
 	if (params->fb.format->cpp[0] == 2)
 		threshold++;
 
@@ -306,7 +306,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 
 	dpfc_ctl = 0;
 	if (IS_IVYBRIDGE(dev_priv))
-		dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.plane);
+		dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.i9xx_plane);
 
 	if (params->fb.format->cpp[0] == 2)
 		threshold++;
@@ -890,7 +890,7 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc,
 	params->vma = cache->vma;
 
 	params->crtc.pipe = crtc->pipe;
-	params->crtc.plane = crtc->plane;
+	params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane;
 	params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc);
 
 	params->fb.format = cache->fb.format;
@@ -1054,11 +1054,11 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv,
  * enable FBC for the chosen CRTC. If it does, it will set dev_priv->fbc.crtc.
  */
 void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
-			   struct drm_atomic_state *state)
+			   struct intel_atomic_state *state)
 {
 	struct intel_fbc *fbc = &dev_priv->fbc;
-	struct drm_plane *plane;
-	struct drm_plane_state *plane_state;
+	struct intel_plane *plane;
+	struct intel_plane_state *plane_state;
 	bool crtc_chosen = false;
 	int i;
 
@@ -1066,7 +1066,7 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
 
 	/* Does this atomic commit involve the CRTC currently tied to FBC? */
 	if (fbc->crtc &&
-	    !drm_atomic_get_existing_crtc_state(state, &fbc->crtc->base))
+	    !intel_atomic_get_new_crtc_state(state, fbc->crtc))
 		goto out;
 
 	if (!intel_fbc_can_enable(dev_priv))
@@ -1076,25 +1076,22 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
 	 * plane. We could go for fancier schemes such as checking the plane
 	 * size, but this would just affect the few platforms that don't tie FBC
 	 * to pipe or plane A. */
-	for_each_new_plane_in_state(state, plane, plane_state, i) {
-		struct intel_plane_state *intel_plane_state =
-			to_intel_plane_state(plane_state);
-		struct intel_crtc_state *intel_crtc_state;
-		struct intel_crtc *crtc = to_intel_crtc(plane_state->crtc);
+	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+		struct intel_crtc_state *crtc_state;
+		struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc);
 
-		if (!intel_plane_state->base.visible)
+		if (!plane_state->base.visible)
 			continue;
 
 		if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A)
 			continue;
 
-		if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A)
+		if (fbc_on_plane_a_only(dev_priv) && plane->i9xx_plane != PLANE_A)
 			continue;
 
-		intel_crtc_state = to_intel_crtc_state(
-			drm_atomic_get_existing_crtc_state(state, &crtc->base));
+		crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
 
-		intel_crtc_state->enable_fbc = true;
+		crtc_state->enable_fbc = true;
 		crtc_chosen = true;
 		break;
 	}
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index 823d0c2..d08e760 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -231,8 +231,7 @@ int intel_guc_sample_forcewake(struct intel_guc *guc)
 
 	action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
 	/* WaRsDisableCoarsePowerGating:skl,bxt */
-	if (!intel_rc6_enabled() ||
-	    NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
+	if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
 		action[1] = 0;
 	else
 		/* bit 0 and 1 are for Render and Media domain separately */
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 75c4cfe..5985672 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -30,8 +30,8 @@
 #include "intel_guc_fwif.h"
 #include "intel_guc_ct.h"
 #include "intel_guc_log.h"
+#include "intel_guc_reg.h"
 #include "intel_uc_fw.h"
-#include "i915_guc_reg.h"
 #include "i915_vma.h"
 
 struct guc_preempt_work {
diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c
index 69ba015..89862fa 100644
--- a/drivers/gpu/drm/i915/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/intel_guc_fw.c
@@ -30,14 +30,14 @@
 #include "intel_guc_fw.h"
 #include "i915_drv.h"
 
-#define SKL_FW_MAJOR 6
-#define SKL_FW_MINOR 1
+#define SKL_FW_MAJOR 9
+#define SKL_FW_MINOR 33
 
-#define BXT_FW_MAJOR 8
-#define BXT_FW_MINOR 7
+#define BXT_FW_MAJOR 9
+#define BXT_FW_MINOR 29
 
 #define KBL_FW_MAJOR 9
-#define KBL_FW_MINOR 14
+#define KBL_FW_MINOR 39
 
 #define GLK_FW_MAJOR 10
 #define GLK_FW_MINOR 56
@@ -130,14 +130,14 @@ static int guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma)
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct intel_uc_fw *guc_fw = &guc->fw;
 	struct sg_table *sg = vma->pages;
-	u32 rsa[UOS_RSA_SCRATCH_MAX_COUNT];
+	u32 rsa[UOS_RSA_SCRATCH_COUNT];
 	int i;
 
 	if (sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa),
 			       guc_fw->rsa_offset) != sizeof(rsa))
 		return -EINVAL;
 
-	for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++)
+	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
 		I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h
similarity index 97%
rename from drivers/gpu/drm/i915/i915_guc_reg.h
rename to drivers/gpu/drm/i915/intel_guc_reg.h
index bc1ae7d..19a9247 100644
--- a/drivers/gpu/drm/i915/i915_guc_reg.h
+++ b/drivers/gpu/drm/i915/intel_guc_reg.h
@@ -21,8 +21,8 @@
  * IN THE SOFTWARE.
  *
  */
-#ifndef _I915_GUC_REG_H_
-#define _I915_GUC_REG_H_
+#ifndef _INTEL_GUC_REG_H_
+#define _INTEL_GUC_REG_H_
 
 /* Definitions of GuC H/W registers, bits, etc */
 
@@ -52,7 +52,8 @@
 #define SOFT_SCRATCH_COUNT		16
 
 #define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
-#define   UOS_RSA_SCRATCH_MAX_COUNT	  64
+#define UOS_RSA_SCRATCH_COUNT		64
+
 #define DMA_ADDR_0_LOW			_MMIO(0xc300)
 #define DMA_ADDR_0_HIGH			_MMIO(0xc304)
 #define DMA_ADDR_1_LOW			_MMIO(0xc308)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index cbf5a96..912ff14 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -718,12 +718,9 @@ static void guc_submit(struct intel_engine_cs *engine)
 static void port_assign(struct execlist_port *port,
 			struct drm_i915_gem_request *rq)
 {
-	GEM_BUG_ON(rq == port_request(port));
+	GEM_BUG_ON(port_isset(port));
 
-	if (port_isset(port))
-		i915_gem_request_put(port_request(port));
-
-	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+	port_set(port, i915_gem_request_get(rq));
 }
 
 static void guc_dequeue(struct intel_engine_cs *engine)
@@ -743,23 +740,26 @@ static void guc_dequeue(struct intel_engine_cs *engine)
 	if (!rb)
 		goto unlock;
 
-	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && port_isset(port)) {
-		struct guc_preempt_work *preempt_work =
-			&engine->i915->guc.preempt_work[engine->id];
+	if (port_isset(port)) {
+		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
+			struct guc_preempt_work *preempt_work =
+				&engine->i915->guc.preempt_work[engine->id];
 
-		if (rb_entry(rb, struct i915_priolist, node)->priority >
-		    max(port_request(port)->priotree.priority, 0)) {
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_PREEMPT);
-			queue_work(engine->i915->guc.preempt_wq,
-				   &preempt_work->work);
-			goto unlock;
-		} else if (port_isset(last_port)) {
-			goto unlock;
+			if (rb_entry(rb, struct i915_priolist, node)->priority >
+			    max(port_request(port)->priotree.priority, 0)) {
+				execlists_set_active(execlists,
+						     EXECLISTS_ACTIVE_PREEMPT);
+				queue_work(engine->i915->guc.preempt_wq,
+					   &preempt_work->work);
+				goto unlock;
+			}
 		}
 
 		port++;
+		if (port_isset(port))
+			goto unlock;
 	}
+	GEM_BUG_ON(port_isset(port));
 
 	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
@@ -1453,6 +1453,8 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 		execlists->tasklet.func = guc_submission_tasklet;
 		engine->park = guc_submission_park;
 		engine->unpark = guc_submission_unpark;
+
+		engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
index e901192..021fe85 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/intel_guc_submission.h
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef _I915_GUC_SUBMISSION_H_
-#define _I915_GUC_SUBMISSION_H_
+#ifndef _INTEL_GUC_SUBMISSION_H_
+#define _INTEL_GUC_SUBMISSION_H_
 
 #include <linux/spinlock.h>
 
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index b4a7f31..126f7c7 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -95,11 +95,6 @@ int intel_gvt_init(struct drm_i915_private *dev_priv)
 		return 0;
 	}
 
-	if (!i915_modparams.enable_execlists) {
-		DRM_ERROR("i915 GVT-g loading failed due to disabled execlists mode\n");
-		return -EIO;
-	}
-
 	if (i915_modparams.enable_guc_submission) {
 		DRM_ERROR("i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n");
 		return -EIO;
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 12ac270..95bbb5a 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -27,13 +27,9 @@
 static bool
 ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
 {
-	if (INTEL_GEN(engine->i915) >= 8) {
-		return (ipehr >> 23) == 0x1c;
-	} else {
-		ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
-		return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
-				 MI_SEMAPHORE_REGISTER);
-	}
+	ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
+	return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
+			 MI_SEMAPHORE_REGISTER);
 }
 
 static struct intel_engine_cs *
@@ -41,31 +37,20 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
 				 u64 offset)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
 	struct intel_engine_cs *signaller;
 	enum intel_engine_id id;
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		for_each_engine(signaller, dev_priv, id) {
-			if (engine == signaller)
-				continue;
+	for_each_engine(signaller, dev_priv, id) {
+		if (engine == signaller)
+			continue;
 
-			if (offset == signaller->semaphore.signal_ggtt[engine->hw_id])
-				return signaller;
-		}
-	} else {
-		u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
-
-		for_each_engine(signaller, dev_priv, id) {
-			if(engine == signaller)
-				continue;
-
-			if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
-				return signaller;
-		}
+		if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
+			return signaller;
 	}
 
-	DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n",
-			 engine->name, ipehr, offset);
+	DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
+			 engine->name, ipehr);
 
 	return ERR_PTR(-ENODEV);
 }
@@ -135,11 +120,6 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 		return NULL;
 
 	*seqno = ioread32(vaddr + head + 4) + 1;
-	if (INTEL_GEN(dev_priv) >= 8) {
-		offset = ioread32(vaddr + head + 12);
-		offset <<= 32;
-		offset |= ioread32(vaddr + head + 8);
-	}
 	return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
 }
 
@@ -273,7 +253,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 		return ENGINE_WAIT_KICK;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 6 && tmp & RING_WAIT_SEMAPHORE) {
+	if (IS_GEN(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
 		switch (semaphore_passed(engine)) {
 		default:
 			return ENGINE_DEAD;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index be6c39a..2a8160f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,9 +154,7 @@
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
-	 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
-	  GEN8_CTX_STATUS_PREEMPTED | \
-	  GEN8_CTX_STATUS_ELEMENT_SWITCH)
+	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
 #define CTX_LRI_HEADER_0		0x01
 #define CTX_CONTEXT_CONTROL		0x02
@@ -221,37 +219,6 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_ring *ring);
 
 /**
- * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
- * @dev_priv: i915 device private
- * @enable_execlists: value of i915.enable_execlists module parameter.
- *
- * Only certain platforms support Execlists (the prerequisites being
- * support for Logical Ring Contexts and Aliasing PPGTT or better).
- *
- * Return: 1 if Execlists is supported and has to be enabled.
- */
-int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists)
-{
-	/* On platforms with execlist available, vGPU will only
-	 * support execlist mode, no ring buffer mode.
-	 */
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && intel_vgpu_active(dev_priv))
-		return 1;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		return 1;
-
-	if (enable_execlists == 0)
-		return 0;
-
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) &&
-	    USES_PPGTT(dev_priv))
-		return 1;
-
-	return 0;
-}
-
-/**
  * intel_lr_context_descriptor_update() - calculate & cache the descriptor
  * 					  descriptor for a pinned context
  * @ctx: Context to work on
@@ -412,6 +379,20 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
 				   status, rq);
 }
 
+static inline void
+execlists_context_schedule_in(struct drm_i915_gem_request *rq)
+{
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+	intel_engine_context_in(rq->engine);
+}
+
+static inline void
+execlists_context_schedule_out(struct drm_i915_gem_request *rq)
+{
+	intel_engine_context_out(rq->engine);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+}
+
 static void
 execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 {
@@ -463,7 +444,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 		if (rq) {
 			GEM_BUG_ON(count > !n);
 			if (!count++)
-				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+				execlists_context_schedule_in(rq);
 			port_set(&port[n], port_pack(rq, count));
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
@@ -479,6 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 
 		elsp_write(desc, elsp);
 	}
+	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -531,6 +513,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 		elsp_write(0, elsp);
 
 	elsp_write(ce->lrc_desc, elsp);
+	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -577,9 +560,20 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * know the next preemption status we see corresponds
 		 * to this ELSP update.
 		 */
+		GEM_BUG_ON(!port_count(&port[0]));
 		if (port_count(&port[0]) > 1)
 			goto unlock;
 
+		/*
+		 * If we write to ELSP a second time before the HW has had
+		 * a chance to respond to the previous write, we can confuse
+		 * the HW and hit "undefined behaviour". After writing to ELSP,
+		 * we must then wait until we see a context-switch event from
+		 * the HW to indicate that it has had a chance to respond.
+		 */
+		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
+			goto unlock;
+
 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) &&
 		    rb_entry(rb, struct i915_priolist, node)->priority >
 		    max(last->priotree.priority, 0)) {
@@ -713,6 +707,7 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 		struct drm_i915_gem_request *rq = port_request(port);
 
 		GEM_BUG_ON(!execlists->active);
+		intel_engine_context_out(rq->engine);
 		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
 		i915_gem_request_put(rq);
 
@@ -873,10 +868,22 @@ static void execlists_submission_tasklet(unsigned long data)
 			GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n",
 				  engine->name, head,
 				  status, buf[2*head + 1]);
+
+			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+				      GEN8_CTX_STATUS_PREEMPTED))
+				execlists_set_active(execlists,
+						     EXECLISTS_ACTIVE_HWACK);
+			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+				execlists_clear_active(execlists,
+						       EXECLISTS_ACTIVE_HWACK);
+
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
+			/* We should never get a COMPLETED | IDLE_ACTIVE! */
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+
+			if (status & GEN8_CTX_STATUS_COMPLETE &&
 			    buf[2*head + 1] == PREEMPT_ID) {
 				execlists_cancel_port_requests(execlists);
 				execlists_unwind_incomplete_requests(execlists);
@@ -907,9 +914,10 @@ static void execlists_submission_tasklet(unsigned long data)
 			GEM_BUG_ON(count == 0);
 			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+				GEM_BUG_ON(port_isset(&port[1]) &&
+					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
 				GEM_BUG_ON(!i915_gem_request_completed(rq));
-				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-
+				execlists_context_schedule_out(rq);
 				trace_i915_gem_request_out(rq);
 				i915_gem_request_put(rq);
 
@@ -1911,6 +1919,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 
 	engine->park = NULL;
 	engine->unpark = NULL;
+
+	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 17182ce..6d4f9b9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -107,8 +107,4 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
-/* Execlists */
-int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
-				    int enable_execlists);
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index adc51e4..fa6831f 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -543,25 +543,6 @@ static u32 pwm_get_backlight(struct intel_connector *connector)
 	return DIV_ROUND_UP(duty_ns * 100, CRC_PMIC_PWM_PERIOD_NS);
 }
 
-static u32 intel_panel_get_backlight(struct intel_connector *connector)
-{
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	struct intel_panel *panel = &connector->panel;
-	u32 val = 0;
-
-	mutex_lock(&dev_priv->backlight_lock);
-
-	if (panel->backlight.enabled) {
-		val = panel->backlight.get(connector);
-		val = intel_panel_compute_brightness(connector, val);
-	}
-
-	mutex_unlock(&dev_priv->backlight_lock);
-
-	DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val);
-	return val;
-}
-
 static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level)
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
@@ -649,31 +630,6 @@ intel_panel_actually_set_backlight(const struct drm_connector_state *conn_state,
 	panel->backlight.set(conn_state, level);
 }
 
-/* set backlight brightness to level in range [0..max], scaling wrt hw min */
-static void intel_panel_set_backlight(const struct drm_connector_state *conn_state,
-				      u32 user_level, u32 user_max)
-{
-	struct intel_connector *connector = to_intel_connector(conn_state->connector);
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	struct intel_panel *panel = &connector->panel;
-	u32 hw_level;
-
-	if (!panel->backlight.present)
-		return;
-
-	mutex_lock(&dev_priv->backlight_lock);
-
-	WARN_ON(panel->backlight.max == 0);
-
-	hw_level = scale_user_to_hw(connector, user_level, user_max);
-	panel->backlight.level = hw_level;
-
-	if (panel->backlight.enabled)
-		intel_panel_actually_set_backlight(conn_state, hw_level);
-
-	mutex_unlock(&dev_priv->backlight_lock);
-}
-
 /* set backlight brightness to level in range [0..max], assuming hw min is
  * respected.
  */
@@ -1182,6 +1138,50 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state,
 }
 
 #if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE)
+static u32 intel_panel_get_backlight(struct intel_connector *connector)
+{
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+	struct intel_panel *panel = &connector->panel;
+	u32 val = 0;
+
+	mutex_lock(&dev_priv->backlight_lock);
+
+	if (panel->backlight.enabled) {
+		val = panel->backlight.get(connector);
+		val = intel_panel_compute_brightness(connector, val);
+	}
+
+	mutex_unlock(&dev_priv->backlight_lock);
+
+	DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val);
+	return val;
+}
+
+/* set backlight brightness to level in range [0..max], scaling wrt hw min */
+static void intel_panel_set_backlight(const struct drm_connector_state *conn_state,
+				      u32 user_level, u32 user_max)
+{
+	struct intel_connector *connector = to_intel_connector(conn_state->connector);
+	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+	struct intel_panel *panel = &connector->panel;
+	u32 hw_level;
+
+	if (!panel->backlight.present)
+		return;
+
+	mutex_lock(&dev_priv->backlight_lock);
+
+	WARN_ON(panel->backlight.max == 0);
+
+	hw_level = scale_user_to_hw(connector, user_level, user_max);
+	panel->backlight.level = hw_level;
+
+	if (panel->backlight.enabled)
+		intel_panel_actually_set_backlight(conn_state, hw_level);
+
+	mutex_unlock(&dev_priv->backlight_lock);
+}
+
 static int intel_backlight_device_update_status(struct backlight_device *bd)
 {
 	struct intel_connector *connector = bl_get_data(bd);
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index 61641d4..1f5cd57 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -541,8 +541,6 @@ static void hsw_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
 		 * completely disable it.
 		 */
 		pipe_config->ips_force_disable = enable;
-		if (pipe_config->ips_enabled == enable)
-			pipe_config->base.connectors_changed = true;
 	}
 
 	if (IS_HASWELL(dev_priv)) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4d2cd43..67f3262 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -52,9 +52,6 @@
  * which brings the most power savings; deeper states save more power, but
  * require higher latency to switch to and wake up.
  */
-#define INTEL_RC6_ENABLE			(1<<0)
-#define INTEL_RC6p_ENABLE			(1<<1)
-#define INTEL_RC6pp_ENABLE			(1<<2)
 
 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
 {
@@ -512,38 +509,41 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
 	fifo_state->plane[PLANE_CURSOR] = 63;
 }
 
-static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
+static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
+			      enum i9xx_plane_id i9xx_plane)
 {
 	uint32_t dsparb = I915_READ(DSPARB);
 	int size;
 
 	size = dsparb & 0x7f;
-	if (plane)
+	if (i9xx_plane == PLANE_B)
 		size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
-		      plane ? "B" : "A", size);
+	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
+		      dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
 
-static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
+static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
+			      enum i9xx_plane_id i9xx_plane)
 {
 	uint32_t dsparb = I915_READ(DSPARB);
 	int size;
 
 	size = dsparb & 0x1ff;
-	if (plane)
+	if (i9xx_plane == PLANE_B)
 		size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
 	size >>= 1; /* Convert to cachelines */
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
-		      plane ? "B" : "A", size);
+	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
+		      dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
 
-static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
+static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
+			      enum i9xx_plane_id i9xx_plane)
 {
 	uint32_t dsparb = I915_READ(DSPARB);
 	int size;
@@ -551,9 +551,8 @@ static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane)
 	size = dsparb & 0x7f;
 	size >>= 2; /* Convert to cachelines */
 
-	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
-		      plane ? "B" : "A",
-		      size);
+	DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
+		      dsparb, plane_name(i9xx_plane), size);
 
 	return size;
 }
@@ -2277,8 +2276,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 	else
 		wm_info = &i830_a_wm_info;
 
-	fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0);
-	crtc = intel_get_crtc_for_plane(dev_priv, 0);
+	fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
+	crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
 	if (intel_crtc_active(crtc)) {
 		const struct drm_display_mode *adjusted_mode =
 			&crtc->config->base.adjusted_mode;
@@ -2304,8 +2303,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc)
 	if (IS_GEN2(dev_priv))
 		wm_info = &i830_bc_wm_info;
 
-	fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1);
-	crtc = intel_get_crtc_for_plane(dev_priv, 1);
+	fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
+	crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
 	if (intel_crtc_active(crtc)) {
 		const struct drm_display_mode *adjusted_mode =
 			&crtc->config->base.adjusted_mode;
@@ -2417,7 +2416,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
 	adjusted_mode = &crtc->config->base.adjusted_mode;
 	planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
 				       &i845_wm_info,
-				       dev_priv->display.get_fifo_size(dev_priv, 0),
+				       dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
 				       4, pessimal_latency_ns);
 	fwater_lo = I915_READ(FW_BLC) & ~0xfff;
 	fwater_lo |= (3<<8) | planea_wm;
@@ -6415,26 +6414,6 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_RP_CONTROL, 0);
 }
 
-static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode)
-{
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
-			mode = GEN6_RC_CTL_RC6_ENABLE;
-		else
-			mode = 0;
-	}
-	if (HAS_RC6p(dev_priv))
-		DRM_DEBUG_DRIVER("Enabling RC6 states: "
-				 "RC6 %s RC6p %s RC6pp %s\n",
-				 onoff(mode & GEN6_RC_CTL_RC6_ENABLE),
-				 onoff(mode & GEN6_RC_CTL_RC6p_ENABLE),
-				 onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE));
-
-	else
-		DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n",
-				 onoff(mode & GEN6_RC_CTL_RC6_ENABLE));
-}
-
 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
@@ -6497,42 +6476,30 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
 	return enable_rc6;
 }
 
-int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6)
+static bool sanitize_rc6(struct drm_i915_private *i915)
 {
-	/* No RC6 before Ironlake and code is gone for ilk. */
-	if (INTEL_INFO(dev_priv)->gen < 6)
-		return 0;
+	struct intel_device_info *info = mkwrite_device_info(i915);
 
-	if (!enable_rc6)
-		return 0;
+	/* Powersaving is controlled by the host when inside a VM */
+	if (intel_vgpu_active(i915))
+		info->has_rc6 = 0;
 
-	if (IS_GEN9_LP(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) {
+	if (info->has_rc6 &&
+	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
 		DRM_INFO("RC6 disabled by BIOS\n");
-		return 0;
+		info->has_rc6 = 0;
 	}
 
-	/* Respect the kernel parameter if it is set */
-	if (enable_rc6 >= 0) {
-		int mask;
+	/*
+	 * We assume that we do not have any deep rc6 levels if we don't have
+	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
+	 * as the initial coarse check for rc6 in general, moving on to
+	 * progressively finer/deeper levels.
+	 */
+	if (!info->has_rc6 && info->has_rc6p)
+		info->has_rc6p = 0;
 
-		if (HAS_RC6p(dev_priv))
-			mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
-			       INTEL_RC6pp_ENABLE;
-		else
-			mask = INTEL_RC6_ENABLE;
-
-		if ((enable_rc6 & mask) != enable_rc6)
-			DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d "
-					 "(requested %d, valid %d)\n",
-					 enable_rc6 & mask, enable_rc6, mask);
-
-		return enable_rc6 & mask;
-	}
-
-	if (IS_IVYBRIDGE(dev_priv))
-		return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
-
-	return INTEL_RC6_ENABLE;
+	return info->has_rc6;
 }
 
 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
@@ -6602,9 +6569,10 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv)
 {
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
-	/* Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RC_VIDEO_FREQ,
-		GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
+	/* Program defaults and thresholds for RPS */
+	if (IS_GEN9(dev_priv))
+		I915_WRITE(GEN6_RC_VIDEO_FREQ,
+			GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
 
 	/* 1 second timeout*/
 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
@@ -6624,7 +6592,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	u32 rc6_mode, rc6_mask = 0;
+	u32 rc6_mode;
 
 	/* 1a: Software RC state - RC0 */
 	I915_WRITE(GEN6_RC_STATE, 0);
@@ -6665,9 +6633,6 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
 
 	/* 3a: Enable RC6 */
-	if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
-		rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-	DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE));
 	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
 
 	/* WaRsUseTimeoutMode:cnl (pre-prod) */
@@ -6677,7 +6642,9 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
 		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
 
 	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE | rc6_mode | rc6_mask);
+		   GEN6_RC_CTL_HW_ENABLE |
+		   GEN6_RC_CTL_RC6_ENABLE |
+		   rc6_mode);
 
 	/*
 	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
@@ -6686,8 +6653,8 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
 	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
 		I915_WRITE(GEN9_PG_ENABLE, 0);
 	else
-		I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
-				(GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
+		I915_WRITE(GEN9_PG_ENABLE,
+			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -6696,7 +6663,6 @@ static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	uint32_t rc6_mask = 0;
 
 	/* 1a: Software RC state - RC0 */
 	I915_WRITE(GEN6_RC_STATE, 0);
@@ -6718,13 +6684,11 @@ static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
 
 	/* 3: Enable RC6 */
-	if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
-		rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-	intel_print_rc6_info(dev_priv, rc6_mask);
 
-	I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
-			GEN7_RC_CTL_TO_MODE |
-			rc6_mask);
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN6_RC_CTL_HW_ENABLE |
+		   GEN7_RC_CTL_TO_MODE |
+		   GEN6_RC_CTL_RC6_ENABLE);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -6773,9 +6737,8 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	u32 rc6vids, rc6_mask = 0;
+	u32 rc6vids, rc6_mask;
 	u32 gtfifodbg;
-	int rc6_mode;
 	int ret;
 
 	I915_WRITE(GEN6_RC_STATE, 0);
@@ -6810,22 +6773,12 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 
-	/* Check if we are enabling RC6 */
-	rc6_mode = intel_rc6_enabled();
-	if (rc6_mode & INTEL_RC6_ENABLE)
-		rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
-
 	/* We don't use those on Haswell */
-	if (!IS_HASWELL(dev_priv)) {
-		if (rc6_mode & INTEL_RC6p_ENABLE)
-			rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
-
-		if (rc6_mode & INTEL_RC6pp_ENABLE)
-			rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
-	}
-
-	intel_print_rc6_info(dev_priv, rc6_mask);
-
+	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+	if (HAS_RC6p(dev_priv))
+		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+	if (HAS_RC6pp(dev_priv))
+		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
 	I915_WRITE(GEN6_RC_CONTROL,
 		   rc6_mask |
 		   GEN6_RC_CTL_EI_MODE(1) |
@@ -7268,7 +7221,7 @@ static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	u32 gtfifodbg, rc6_mode = 0, pcbr;
+	u32 gtfifodbg, rc6_mode, pcbr;
 
 	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
 					     GT_FIFO_FREE_ENTRIES_CHV);
@@ -7309,10 +7262,9 @@ static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
 	pcbr = I915_READ(VLV_PCBR);
 
 	/* 3: Enable RC6 */
-	if ((intel_rc6_enabled() & INTEL_RC6_ENABLE) &&
-	    (pcbr >> VLV_PCBR_ADDR_SHIFT))
+	rc6_mode = 0;
+	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
 		rc6_mode = GEN7_RC_CTL_TO_MODE;
-
 	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -7364,7 +7316,7 @@ static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	u32 gtfifodbg, rc6_mode = 0;
+	u32 gtfifodbg;
 
 	valleyview_check_pctx(dev_priv);
 
@@ -7397,12 +7349,8 @@ static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
 				      VLV_MEDIA_RC6_COUNT_EN |
 				      VLV_RENDER_RC6_COUNT_EN));
 
-	if (intel_rc6_enabled() & INTEL_RC6_ENABLE)
-		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
-
-	intel_print_rc6_info(dev_priv, rc6_mode);
-
-	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -7929,7 +7877,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
 	 * requirement.
 	 */
-	if (!i915_modparams.enable_rc6) {
+	if (!sanitize_rc6(dev_priv)) {
 		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
 		intel_runtime_pm_get(dev_priv);
 	}
@@ -7982,7 +7930,7 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
 	if (IS_VALLEYVIEW(dev_priv))
 		valleyview_cleanup_gt_powersave(dev_priv);
 
-	if (!i915_modparams.enable_rc6)
+	if (!HAS_RC6(dev_priv))
 		intel_runtime_pm_put(dev_priv);
 }
 
@@ -8149,7 +8097,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->pcu_lock);
 
-	intel_enable_rc6(dev_priv);
+	if (HAS_RC6(dev_priv))
+		intel_enable_rc6(dev_priv);
 	intel_enable_rps(dev_priv);
 	if (HAS_LLC(dev_priv))
 		intel_enable_llc_pstate(dev_priv);
@@ -9291,7 +9240,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 		ret = 0;
 		goto out;
 	}
-	ret = _wait_for(COND, timeout_base_ms * 1000, 10);
+	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
 	if (!ret)
 		goto out;
 
@@ -9393,12 +9342,13 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
 			     const i915_reg_t reg)
 {
 	u32 lower, upper, tmp;
+	unsigned long flags;
 	int loop = 2;
 
 	/* The register accessed do not need forcewake. We borrow
 	 * uncore lock to prevent concurrent access to range reg.
 	 */
-	spin_lock_irq(&dev_priv->uncore.lock);
+	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
 
 	/* vlv and chv residency counters are 40 bits in width.
 	 * With a control bit, we can choose between upper or lower
@@ -9429,39 +9379,51 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
 	 * now.
 	 */
 
-	spin_unlock_irq(&dev_priv->uncore.lock);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 
 	return lower | (u64)upper << 8;
 }
 
-u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
+u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
 			   const i915_reg_t reg)
 {
-	u64 time_hw, units, div;
+	u64 time_hw;
+	u32 mul, div;
 
-	if (!intel_rc6_enabled())
+	if (!HAS_RC6(dev_priv))
 		return 0;
 
-	intel_runtime_pm_get(dev_priv);
-
 	/* On VLV and CHV, residency time is in CZ units rather than 1.28us */
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		units = 1000;
+		mul = 1000000;
 		div = dev_priv->czclk_freq;
-
 		time_hw = vlv_residency_raw(dev_priv, reg);
-	} else if (IS_GEN9_LP(dev_priv)) {
-		units = 1000;
-		div = 1200;		/* 833.33ns */
-
-		time_hw = I915_READ(reg);
 	} else {
-		units = 128000; /* 1.28us */
-		div = 100000;
+		/* 833.33ns units on Gen9LP, 1.28us elsewhere. */
+		if (IS_GEN9_LP(dev_priv)) {
+			mul = 10000;
+			div = 12;
+		} else {
+			mul = 1280;
+			div = 1;
+		}
 
 		time_hw = I915_READ(reg);
 	}
 
-	intel_runtime_pm_put(dev_priv);
-	return DIV_ROUND_UP_ULL(time_hw * units, div);
+	return DIV_ROUND_UP_ULL(time_hw * mul, div);
+}
+
+u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
+{
+	u32 cagf;
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+	else
+		cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+
+	return  cagf;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 12e734b..e208582 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -340,50 +340,6 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 	return 0;
 }
 
-static int
-gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
-{
-	u32 flags;
-	u32 *cs;
-
-	cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	flags = PIPE_CONTROL_CS_STALL;
-
-	if (mode & EMIT_FLUSH) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-	}
-	if (mode & EMIT_INVALIDATE) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
-		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
-		cs = gen8_emit_pipe_control(cs,
-					    PIPE_CONTROL_CS_STALL |
-					    PIPE_CONTROL_STALL_AT_SCOREBOARD,
-					    0);
-	}
-
-	cs = gen8_emit_pipe_control(cs, flags,
-				    i915_ggtt_offset(req->engine->scratch) +
-				    2 * CACHELINE_BYTES);
-
-	intel_ring_advance(req, cs);
-
-	return 0;
-}
-
 static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -427,7 +383,6 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
 	} else if (IS_GEN6(dev_priv)) {
 		mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
 	} else {
-		/* XXX: gen8 returns to sanity */
 		mmio = RING_HWS_PGA(engine->mmio_base);
 	}
 
@@ -437,13 +392,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
 	I915_WRITE(mmio, engine->status_page.ggtt_offset);
 	POSTING_READ(mmio);
 
-	/*
-	 * Flush the TLB for this page
-	 *
-	 * FIXME: These two bits have disappeared on gen8, so a question
-	 * arises: do we still need this and if so how should we go about
-	 * invalidating the TLB?
-	 */
+	/* Flush the TLB for this page */
 	if (IS_GEN(dev_priv, 6, 7)) {
 		i915_reg_t reg = RING_INSTPM(engine->mmio_base);
 
@@ -611,8 +560,6 @@ static void reset_ring_common(struct intel_engine_cs *engine,
 		struct intel_context *ce = &request->ctx->engine[engine->id];
 		struct i915_hw_ppgtt *ppgtt;
 
-		/* FIXME consider gen8 reset */
-
 		if (ce->state) {
 			I915_WRITE(CCID,
 				   i915_ggtt_offset(ce->state) |
@@ -644,6 +591,7 @@ static void reset_ring_common(struct intel_engine_cs *engine,
 			request->ring->head = request->postfix;
 	} else {
 		engine->legacy_active_context = NULL;
+		engine->legacy_active_ppgtt = NULL;
 	}
 }
 
@@ -713,62 +661,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
-static void render_ring_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	i915_vma_unpin_and_release(&dev_priv->semaphore);
-}
-
-static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	struct intel_engine_cs *waiter;
-	enum intel_engine_id id;
-
-	for_each_engine(waiter, dev_priv, id) {
-		u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
-		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
-			continue;
-
-		*cs++ = GFX_OP_PIPE_CONTROL(6);
-		*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE |
-			PIPE_CONTROL_CS_STALL;
-		*cs++ = lower_32_bits(gtt_offset);
-		*cs++ = upper_32_bits(gtt_offset);
-		*cs++ = req->global_seqno;
-		*cs++ = 0;
-		*cs++ = MI_SEMAPHORE_SIGNAL |
-			MI_SEMAPHORE_TARGET(waiter->hw_id);
-		*cs++ = 0;
-	}
-
-	return cs;
-}
-
-static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	struct intel_engine_cs *waiter;
-	enum intel_engine_id id;
-
-	for_each_engine(waiter, dev_priv, id) {
-		u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
-		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
-			continue;
-
-		*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
-		*cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
-		*cs++ = upper_32_bits(gtt_offset);
-		*cs++ = req->global_seqno;
-		*cs++ = MI_SEMAPHORE_SIGNAL |
-			MI_SEMAPHORE_TARGET(waiter->hw_id);
-		*cs++ = 0;
-	}
-
-	return cs;
-}
-
 static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
 {
 	struct drm_i915_private *dev_priv = req->i915;
@@ -851,70 +743,6 @@ static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
 				    req->engine->semaphore.signal(req, cs));
 }
 
-static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
-					u32 *cs)
-{
-	struct intel_engine_cs *engine = req->engine;
-
-	if (engine->semaphore.signal)
-		cs = engine->semaphore.signal(req, cs);
-
-	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
-		PIPE_CONTROL_QW_WRITE;
-	*cs++ = intel_hws_seqno_address(engine);
-	*cs++ = 0;
-	*cs++ = req->global_seqno;
-	/* We're thrashing one dword of HWS. */
-	*cs++ = 0;
-	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
-
-	req->tail = intel_ring_offset(req, cs);
-	assert_ring_tail_valid(req->ring, req->tail);
-}
-
-static const int gen8_render_emit_breadcrumb_sz = 8;
-
-/**
- * intel_ring_sync - sync the waiter to the signaller on seqno
- *
- * @waiter - ring that is waiting
- * @signaller - ring which has, or will signal
- * @seqno - seqno which the waiter will block on
- */
-
-static int
-gen8_ring_sync_to(struct drm_i915_gem_request *req,
-		  struct drm_i915_gem_request *signal)
-{
-	struct drm_i915_private *dev_priv = req->i915;
-	u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id);
-	struct i915_hw_ppgtt *ppgtt;
-	u32 *cs;
-
-	cs = intel_ring_begin(req, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_SAD_GTE_SDD;
-	*cs++ = signal->global_seqno;
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
-	intel_ring_advance(req, cs);
-
-	/* When the !RCS engines idle waiting upon a semaphore, they lose their
-	 * pagetables and we must reload them before executing the batch.
-	 * We do this on the i915_switch_context() following the wait and
-	 * before the dispatch.
-	 */
-	ppgtt = req->ctx->ppgtt;
-	if (ppgtt && req->engine->id != RCS)
-		ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine);
-	return 0;
-}
-
 static int
 gen6_ring_sync_to(struct drm_i915_gem_request *req,
 		  struct drm_i915_gem_request *signal)
@@ -1090,25 +918,6 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine)
 	gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask);
 }
 
-static void
-gen8_irq_enable(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	I915_WRITE_IMR(engine,
-		       ~(engine->irq_enable_mask |
-			 engine->irq_keep_mask));
-	POSTING_READ_FW(RING_IMR(engine->mmio_base));
-}
-
-static void
-gen8_irq_disable(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
-}
-
 static int
 i965_emit_bb_start(struct drm_i915_gem_request *req,
 		   u64 offset, u32 length,
@@ -1576,6 +1385,190 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 		intel_ring_reset(engine->buffer, 0);
 }
 
+static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags)
+{
+	struct drm_i915_private *i915 = rq->i915;
+	struct intel_engine_cs *engine = rq->engine;
+	enum intel_engine_id id;
+	const int num_rings =
+		/* Use an extended w/a on gen7 if signalling from other rings */
+		(HAS_LEGACY_SEMAPHORES(i915) && IS_GEN7(i915)) ?
+		INTEL_INFO(i915)->num_rings - 1 :
+		0;
+	int len;
+	u32 *cs;
+
+	flags |= MI_MM_SPACE_GTT;
+	if (IS_HASWELL(i915))
+		/* These flags are for resource streamer on HSW+ */
+		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
+	else
+		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
+
+	len = 4;
+	if (IS_GEN7(i915))
+		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
+
+	cs = intel_ring_begin(rq, len);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
+	if (IS_GEN7(i915)) {
+		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+		if (num_rings) {
+			struct intel_engine_cs *signaller;
+
+			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
+			for_each_engine(signaller, i915, id) {
+				if (signaller == engine)
+					continue;
+
+				*cs++ = i915_mmio_reg_offset(
+					   RING_PSMI_CTL(signaller->mmio_base));
+				*cs++ = _MASKED_BIT_ENABLE(
+						GEN6_PSMI_SLEEP_MSG_DISABLE);
+			}
+		}
+	}
+
+	*cs++ = MI_NOOP;
+	*cs++ = MI_SET_CONTEXT;
+	*cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags;
+	/*
+	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
+	 * WaMiSetContext_Hang:snb,ivb,vlv
+	 */
+	*cs++ = MI_NOOP;
+
+	if (IS_GEN7(i915)) {
+		if (num_rings) {
+			struct intel_engine_cs *signaller;
+			i915_reg_t last_reg = {}; /* keep gcc quiet */
+
+			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
+			for_each_engine(signaller, i915, id) {
+				if (signaller == engine)
+					continue;
+
+				last_reg = RING_PSMI_CTL(signaller->mmio_base);
+				*cs++ = i915_mmio_reg_offset(last_reg);
+				*cs++ = _MASKED_BIT_DISABLE(
+						GEN6_PSMI_SLEEP_MSG_DISABLE);
+			}
+
+			/* Insert a delay before the next switch! */
+			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+			*cs++ = i915_mmio_reg_offset(last_reg);
+			*cs++ = i915_ggtt_offset(engine->scratch);
+			*cs++ = MI_NOOP;
+		}
+		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int remap_l3(struct drm_i915_gem_request *rq, int slice)
+{
+	u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
+	int i;
+
+	if (!remap_info)
+		return 0;
+
+	cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
+		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+		*cs++ = remap_info[i];
+	}
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int switch_context(struct drm_i915_gem_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_gem_context *to_ctx = rq->ctx;
+	struct i915_hw_ppgtt *to_mm =
+		to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+	struct i915_gem_context *from_ctx = engine->legacy_active_context;
+	struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt;
+	u32 hw_flags = 0;
+	int ret, i;
+
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+
+	if (to_mm != from_mm ||
+	    (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) {
+		trace_switch_mm(engine, to_ctx);
+		ret = to_mm->switch_mm(to_mm, rq);
+		if (ret)
+			goto err;
+
+		to_mm->pd_dirty_rings &= ~intel_engine_flag(engine);
+		engine->legacy_active_ppgtt = to_mm;
+		hw_flags = MI_FORCE_RESTORE;
+	}
+
+	if (to_ctx->engine[engine->id].state &&
+	    (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) {
+		GEM_BUG_ON(engine->id != RCS);
+
+		/*
+		 * The kernel context(s) is treated as pure scratch and is not
+		 * expected to retain any state (as we sacrifice it during
+		 * suspend and on resume it may be corrupted). This is ok,
+		 * as nothing actually executes using the kernel context; it
+		 * is purely used for flushing user contexts.
+		 */
+		if (i915_gem_context_is_kernel(to_ctx))
+			hw_flags = MI_RESTORE_INHIBIT;
+
+		ret = mi_set_context(rq, hw_flags);
+		if (ret)
+			goto err_mm;
+
+		engine->legacy_active_context = to_ctx;
+	}
+
+	if (to_ctx->remap_slice) {
+		for (i = 0; i < MAX_L3_SLICES; i++) {
+			if (!(to_ctx->remap_slice & BIT(i)))
+				continue;
+
+			ret = remap_l3(rq, i);
+			if (ret)
+				goto err_ctx;
+		}
+
+		to_ctx->remap_slice = 0;
+	}
+
+	return 0;
+
+err_ctx:
+	engine->legacy_active_context = from_ctx;
+err_mm:
+	engine->legacy_active_ppgtt = from_mm;
+err:
+	return ret;
+}
+
 static int ring_request_alloc(struct drm_i915_gem_request *request)
 {
 	int ret;
@@ -1592,6 +1585,10 @@ static int ring_request_alloc(struct drm_i915_gem_request *request)
 	if (ret)
 		return ret;
 
+	ret = switch_context(request);
+	if (ret)
+		return ret;
+
 	request->reserved_space -= LEGACY_REQUEST_SIZE;
 	return 0;
 }
@@ -1792,8 +1789,6 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 		return PTR_ERR(cs);
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_GEN(req->i915) >= 8)
-		cmd += 1;
 
 	/* We always require a command barrier so that subsequent
 	 * commands, such as breadcrumb interrupts, are strictly ordered
@@ -1813,38 +1808,9 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 
 	*cs++ = cmd;
 	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
-	if (INTEL_GEN(req->i915) >= 8) {
-		*cs++ = 0; /* upper addr */
-		*cs++ = 0; /* value */
-	} else  {
-		*cs++ = 0;
-		*cs++ = MI_NOOP;
-	}
-	intel_ring_advance(req, cs);
-	return 0;
-}
-
-static int
-gen8_emit_bb_start(struct drm_i915_gem_request *req,
-		   u64 offset, u32 len,
-		   unsigned int dispatch_flags)
-{
-	bool ppgtt = USES_PPGTT(req->i915) &&
-			!(dispatch_flags & I915_DISPATCH_SECURE);
-	u32 *cs;
-
-	cs = intel_ring_begin(req, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/* FIXME(BDW): Address space and security selectors. */
-	*cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags &
-		I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
+	*cs++ = 0;
 	*cs++ = MI_NOOP;
 	intel_ring_advance(req, cs);
-
 	return 0;
 }
 
@@ -1901,8 +1867,6 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 		return PTR_ERR(cs);
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_GEN(req->i915) >= 8)
-		cmd += 1;
 
 	/* We always require a command barrier so that subsequent
 	 * commands, such as breadcrumb interrupts, are strictly ordered
@@ -1921,13 +1885,8 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 		cmd |= MI_INVALIDATE_TLB;
 	*cs++ = cmd;
 	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
-	if (INTEL_GEN(req->i915) >= 8) {
-		*cs++ = 0; /* upper addr */
-		*cs++ = 0; /* value */
-	} else  {
-		*cs++ = 0;
-		*cs++ = MI_NOOP;
-	}
+	*cs++ = 0;
+	*cs++ = MI_NOOP;
 	intel_ring_advance(req, cs);
 
 	return 0;
@@ -1936,110 +1895,61 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
 				       struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *obj;
-	int ret, i;
+	int i;
 
-	if (!i915_modparams.semaphores)
+	if (!HAS_LEGACY_SEMAPHORES(dev_priv))
 		return;
 
-	if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) {
-		struct i915_vma *vma;
+	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
+	engine->semaphore.sync_to = gen6_ring_sync_to;
+	engine->semaphore.signal = gen6_signal;
 
-		obj = i915_gem_object_create(dev_priv, PAGE_SIZE);
-		if (IS_ERR(obj))
-			goto err;
-
-		vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
-		if (IS_ERR(vma))
-			goto err_obj;
-
-		ret = i915_gem_object_set_to_gtt_domain(obj, false);
-		if (ret)
-			goto err_obj;
-
-		ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-		if (ret)
-			goto err_obj;
-
-		dev_priv->semaphore = vma;
-	}
-
-	if (INTEL_GEN(dev_priv) >= 8) {
-		u32 offset = i915_ggtt_offset(dev_priv->semaphore);
-
-		engine->semaphore.sync_to = gen8_ring_sync_to;
-		engine->semaphore.signal = gen8_xcs_signal;
-
-		for (i = 0; i < I915_NUM_ENGINES; i++) {
-			u32 ring_offset;
-
-			if (i != engine->id)
-				ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
-			else
-				ring_offset = MI_SEMAPHORE_SYNC_INVALID;
-
-			engine->semaphore.signal_ggtt[i] = ring_offset;
-		}
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		engine->semaphore.sync_to = gen6_ring_sync_to;
-		engine->semaphore.signal = gen6_signal;
-
-		/*
-		 * The current semaphore is only applied on pre-gen8
-		 * platform.  And there is no VCS2 ring on the pre-gen8
-		 * platform. So the semaphore between RCS and VCS2 is
-		 * initialized as INVALID.  Gen8 will initialize the
-		 * sema between VCS2 and RCS later.
-		 */
-		for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
-			static const struct {
-				u32 wait_mbox;
-				i915_reg_t mbox_reg;
-			} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
-				[RCS_HW] = {
-					[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
-					[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
-					[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
-				},
-				[VCS_HW] = {
-					[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
-					[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
-					[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
-				},
-				[BCS_HW] = {
-					[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
-					[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
-					[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
-				},
-				[VECS_HW] = {
-					[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
-					[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
-					[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
-				},
-			};
+	/*
+	 * The current semaphore is only applied on pre-gen8
+	 * platform.  And there is no VCS2 ring on the pre-gen8
+	 * platform. So the semaphore between RCS and VCS2 is
+	 * initialized as INVALID.
+	 */
+	for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
+		static const struct {
 			u32 wait_mbox;
 			i915_reg_t mbox_reg;
+		} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
+			[RCS_HW] = {
+				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
+				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
+				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
+			},
+			[VCS_HW] = {
+				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
+				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
+				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
+			},
+			[BCS_HW] = {
+				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
+				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
+				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
+			},
+			[VECS_HW] = {
+				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
+				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
+				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
+			},
+		};
+		u32 wait_mbox;
+		i915_reg_t mbox_reg;
 
-			if (i == engine->hw_id) {
-				wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
-				mbox_reg = GEN6_NOSYNC;
-			} else {
-				wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
-				mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
-			}
-
-			engine->semaphore.mbox.wait[i] = wait_mbox;
-			engine->semaphore.mbox.signal[i] = mbox_reg;
+		if (i == engine->hw_id) {
+			wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
+			mbox_reg = GEN6_NOSYNC;
+		} else {
+			wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
+			mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
 		}
+
+		engine->semaphore.mbox.wait[i] = wait_mbox;
+		engine->semaphore.mbox.signal[i] = mbox_reg;
 	}
-
-	return;
-
-err_obj:
-	i915_gem_object_put(obj);
-err:
-	DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
-	i915_modparams.semaphores = 0;
 }
 
 static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
@@ -2047,11 +1957,7 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 {
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		engine->irq_enable = gen8_irq_enable;
-		engine->irq_disable = gen8_irq_disable;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
-	} else if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(dev_priv) >= 6) {
 		engine->irq_enable = gen6_irq_enable;
 		engine->irq_disable = gen6_irq_disable;
 		engine->irq_seqno_barrier = gen6_seqno_barrier;
@@ -2086,6 +1992,9 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 				      struct intel_engine_cs *engine)
 {
+	/* gen8+ are only supported with execlists */
+	GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
+
 	intel_ring_init_irq(dev_priv, engine);
 	intel_ring_init_semaphores(dev_priv, engine);
 
@@ -2099,26 +2008,20 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
-	if (i915_modparams.semaphores) {
+	if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
 		int num_rings;
 
 		engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
 
 		num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
-		if (INTEL_GEN(dev_priv) >= 8) {
-			engine->emit_breadcrumb_sz += num_rings * 6;
-		} else {
-			engine->emit_breadcrumb_sz += num_rings * 3;
-			if (num_rings & 1)
-				engine->emit_breadcrumb_sz++;
-		}
+		engine->emit_breadcrumb_sz += num_rings * 3;
+		if (num_rings & 1)
+			engine->emit_breadcrumb_sz++;
 	}
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
-	if (INTEL_GEN(dev_priv) >= 8)
-		engine->emit_bb_start = gen8_emit_bb_start;
-	else if (INTEL_GEN(dev_priv) >= 6)
+	if (INTEL_GEN(dev_priv) >= 6)
 		engine->emit_bb_start = gen6_emit_bb_start;
 	else if (INTEL_GEN(dev_priv) >= 4)
 		engine->emit_bb_start = i965_emit_bb_start;
@@ -2138,20 +2041,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 	if (HAS_L3_DPF(dev_priv))
 		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		engine->init_context = intel_rcs_ctx_init;
-		engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz;
-		engine->emit_flush = gen8_render_ring_flush;
-		if (i915_modparams.semaphores) {
-			int num_rings;
-
-			engine->semaphore.signal = gen8_rcs_signal;
-
-			num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
-			engine->emit_breadcrumb_sz += num_rings * 8;
-		}
-	} else if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(dev_priv) >= 6) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
 		if (IS_GEN6(dev_priv))
@@ -2170,7 +2060,6 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_bb_start = hsw_emit_bb_start;
 
 	engine->init_hw = init_render_ring;
-	engine->cleanup = render_ring_cleanup;
 
 	ret = intel_init_ring_buffer(engine);
 	if (ret)
@@ -2200,8 +2089,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 		if (IS_GEN6(dev_priv))
 			engine->set_default_submission = gen6_bsd_set_default_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
-		if (INTEL_GEN(dev_priv) < 8)
-			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 	} else {
 		engine->mmio_base = BSD_RING_BASE;
 		engine->emit_flush = bsd_ring_flush;
@@ -2221,8 +2109,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 	intel_ring_default_vfuncs(dev_priv, engine);
 
 	engine->emit_flush = gen6_ring_flush;
-	if (INTEL_GEN(dev_priv) < 8)
-		engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2234,12 +2121,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	intel_ring_default_vfuncs(dev_priv, engine);
 
 	engine->emit_flush = gen6_ring_flush;
-
-	if (INTEL_GEN(dev_priv) < 8) {
-		engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
-		engine->irq_enable = hsw_vebox_irq_enable;
-		engine->irq_disable = hsw_vebox_irq_disable;
-	}
+	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
+	engine->irq_enable = hsw_vebox_irq_enable;
+	engine->irq_disable = hsw_vebox_irq_disable;
 
 	return intel_init_ring_buffer(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f867aa6..c68ab3e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,6 +6,7 @@
 #include "i915_gem_batch_pool.h"
 #include "i915_gem_request.h"
 #include "i915_gem_timeline.h"
+#include "i915_pmu.h"
 #include "i915_selftest.h"
 
 struct drm_printer;
@@ -47,16 +48,6 @@ struct intel_hw_status_page {
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
-#define gen8_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SEMAPHORE_OFFSET(__from, __to)			     \
-	(((__from) * I915_NUM_ENGINES  + (__to)) * gen8_semaphore_seqno_size)
-#define GEN8_SIGNAL_OFFSET(__ring, to)			     \
-	(dev_priv->semaphore->node.start + \
-	 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
-#define GEN8_WAIT_OFFSET(__ring, from)			     \
-	(dev_priv->semaphore->node.start + \
-	 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
-
 enum intel_engine_hangcheck_action {
 	ENGINE_IDLE = 0,
 	ENGINE_WAIT,
@@ -252,6 +243,7 @@ struct intel_engine_execlists {
 	unsigned int active;
 #define EXECLISTS_ACTIVE_USER 0
 #define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -348,6 +340,43 @@ struct intel_engine_cs {
 		I915_SELFTEST_DECLARE(bool mock : 1);
 	} breadcrumbs;
 
+	struct {
+		/**
+		 * @enable: Bitmask of enable sample events on this engine.
+		 *
+		 * Bits correspond to sample event types, for instance
+		 * I915_SAMPLE_QUEUED is bit 0 etc.
+		 */
+		u32 enable;
+		/**
+		 * @enable_count: Reference count for the enabled samplers.
+		 *
+		 * Index number corresponds to the bit number from @enable.
+		 */
+		unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+		/**
+		 * @sample: Counter values for sampling events.
+		 *
+		 * Our internal timer stores the current counters in this field.
+		 */
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+		/**
+		 * @busy_stats: Has enablement of engine stats tracking been
+		 * 		requested.
+		 */
+		bool busy_stats;
+		/**
+		 * @disable_busy_stats: Work item for busy stats disabling.
+		 *
+		 * Same as with @enable_busy_stats action, with the difference
+		 * that we delay it in case there are rapid enable-disable
+		 * actions, which can happen during tool startup (like perf
+		 * stat).
+		 */
+		struct delayed_work disable_busy_stats;
+	} pmu;
+
 	/*
 	 * A pool of objects to use as shadow copies of client batch buffers
 	 * when the command parser is enabled. Prevents the client from
@@ -467,18 +496,15 @@ struct intel_engine_cs {
 	 *  ie. transpose of f(x, y)
 	 */
 	struct {
-		union {
 #define GEN6_SEMAPHORE_LAST	VECS_HW
 #define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)
 #define GEN6_SEMAPHORES_MASK	GENMASK(GEN6_SEMAPHORE_LAST, 0)
-			struct {
-				/* our mbox written by others */
-				u32		wait[GEN6_NUM_SEMAPHORES];
-				/* mboxes this ring signals to */
-				i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
-			} mbox;
-			u64		signal_ggtt[I915_NUM_ENGINES];
-		};
+		struct {
+			/* our mbox written by others */
+			u32		wait[GEN6_NUM_SEMAPHORES];
+			/* mboxes this ring signals to */
+			i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
+		} mbox;
 
 		/* AKA wait() */
 		int	(*sync_to)(struct drm_i915_gem_request *req,
@@ -506,13 +532,16 @@ struct intel_engine_cs {
 	 * stream (ring).
 	 */
 	struct i915_gem_context *legacy_active_context;
+	struct i915_hw_ppgtt *legacy_active_ppgtt;
 
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
 	struct intel_engine_hangcheck hangcheck;
 
-	bool needs_cmd_parser;
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS   BIT(1)
+	unsigned int flags;
 
 	/*
 	 * Table of commands the command parser needs to know about
@@ -537,8 +566,50 @@ struct intel_engine_cs {
 	 * certain bits to encode the command length in the header).
 	 */
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+	struct {
+		/**
+		 * @lock: Lock protecting the below fields.
+		 */
+		spinlock_t lock;
+		/**
+		 * @enabled: Reference count indicating number of listeners.
+		 */
+		unsigned int enabled;
+		/**
+		 * @active: Number of contexts currently scheduled in.
+		 */
+		unsigned int active;
+		/**
+		 * @enabled_at: Timestamp when busy stats were enabled.
+		 */
+		ktime_t enabled_at;
+		/**
+		 * @start: Timestamp of the last idle to active transition.
+		 *
+		 * Idle is defined as active == 0, active is active > 0.
+		 */
+		ktime_t start;
+		/**
+		 * @total: Total time this engine was busy.
+		 *
+		 * Accumulated time not counting the most recent block in cases
+		 * where engine is currently busy (active > 0).
+		 */
+		ktime_t total;
+	} stats;
 };
 
+static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
@@ -939,4 +1010,67 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 
 void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p);
 
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+static inline void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (READ_ONCE(engine->stats.enabled) == 0)
+		return;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+
+	if (engine->stats.enabled > 0) {
+		if (engine->stats.active++ == 0)
+			engine->stats.start = ktime_get();
+		GEM_BUG_ON(engine->stats.active == 0);
+	}
+
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (READ_ONCE(engine->stats.enabled) == 0)
+		return;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+
+	if (engine->stats.enabled > 0) {
+		ktime_t last;
+
+		if (engine->stats.active && --engine->stats.active == 0) {
+			/*
+			 * Decrement the active context count and in case GPU
+			 * is now idle add up to the running total.
+			 */
+			last = ktime_sub(ktime_get(), engine->stats.start);
+
+			engine->stats.total = ktime_add(engine->stats.total,
+							last);
+		} else if (engine->stats.active == 0) {
+			/*
+			 * After turning on engine stats, context out might be
+			 * the first event in which case we account from the
+			 * time stats gathering was turned on.
+			 */
+			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+			engine->stats.total = ktime_add(engine->stats.total,
+							last);
+		}
+	}
+
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+int intel_enable_engine_stats(struct intel_engine_cs *engine);
+void intel_disable_engine_stats(struct intel_engine_cs *engine);
+
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index ce61570..dd485f5 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -325,6 +325,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+bool
+skl_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static void
 chv_update_csc(struct intel_plane *plane, uint32_t format)
 {
@@ -502,6 +522,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool
+vlv_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
@@ -642,6 +682,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool
+ivb_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret =  I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
@@ -773,6 +832,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool
+g4x_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static int
 intel_check_sprite_plane(struct intel_plane *plane,
 			 struct intel_crtc_state *crtc_state,
@@ -1231,6 +1309,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = skl_update_plane;
 		intel_plane->disable_plane = skl_disable_plane;
+		intel_plane->get_hw_state = skl_plane_get_hw_state;
 
 		plane_formats = skl_plane_formats;
 		num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1241,6 +1320,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = skl_update_plane;
 		intel_plane->disable_plane = skl_disable_plane;
+		intel_plane->get_hw_state = skl_plane_get_hw_state;
 
 		plane_formats = skl_plane_formats;
 		num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1251,6 +1331,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = vlv_update_plane;
 		intel_plane->disable_plane = vlv_disable_plane;
+		intel_plane->get_hw_state = vlv_plane_get_hw_state;
 
 		plane_formats = vlv_plane_formats;
 		num_plane_formats = ARRAY_SIZE(vlv_plane_formats);
@@ -1266,6 +1347,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = ivb_update_plane;
 		intel_plane->disable_plane = ivb_disable_plane;
+		intel_plane->get_hw_state = ivb_plane_get_hw_state;
 
 		plane_formats = snb_plane_formats;
 		num_plane_formats = ARRAY_SIZE(snb_plane_formats);
@@ -1276,6 +1358,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = g4x_update_plane;
 		intel_plane->disable_plane = g4x_disable_plane;
+		intel_plane->get_hw_state = g4x_plane_get_hw_state;
 
 		modifiers = i9xx_plane_format_modifiers;
 		if (IS_GEN6(dev_priv)) {
@@ -1301,7 +1384,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 	}
 
 	intel_plane->pipe = pipe;
-	intel_plane->plane = plane;
+	intel_plane->i9xx_plane = plane;
 	intel_plane->id = PLANE_SPRITE0 + plane;
 	intel_plane->frontbuffer_bit = INTEL_FRONTBUFFER_SPRITE(pipe, plane);
 	intel_plane->check_plane = intel_check_sprite_plane;
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index 4bc82d3..b376dd3 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -105,7 +105,7 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 	}
 
 	/* now RSA */
-	if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) {
+	if (css->key_size_dw != UOS_RSA_SCRATCH_COUNT) {
 		DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n",
 			 intel_uc_fw_type_repr(uc_fw->type), css->key_size_dw);
 		err = -ENOEXEC;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index b4621271..89547b61 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -565,9 +565,6 @@ void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv)
 
 void intel_uncore_sanitize(struct drm_i915_private *dev_priv)
 {
-	i915_modparams.enable_rc6 =
-		sanitize_rc6_option(dev_priv, i915_modparams.enable_rc6);
-
 	/* BIOS often leaves RC6 enabled, but disable it for hw init */
 	intel_sanitize_gt_powersave(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 01af540..e6b3104 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -27,6 +27,7 @@
 #include <linux/prime_numbers.h>
 
 #include "mock_drm.h"
+#include "i915_random.h"
 
 static const unsigned int page_sizes[] = {
 	I915_GTT_PAGE_SIZE_2M,
@@ -989,17 +990,9 @@ static int gpu_write(struct i915_vma *vma,
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
 
-	err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (err)
-		goto err_request;
-
-	err = i915_switch_context(rq);
-	if (err)
-		goto err_request;
-
-	err = rq->engine->emit_bb_start(rq,
-					batch->node.start, batch->node.size,
-					flags);
+	err = engine->emit_bb_start(rq,
+				    batch->node.start, batch->node.size,
+				    flags);
 	if (err)
 		goto err_request;
 
@@ -1047,19 +1040,78 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	return err;
 }
 
+static int __igt_write_huge(struct i915_gem_context *ctx,
+			    struct intel_engine_cs *engine,
+			    struct drm_i915_gem_object *obj,
+			    u64 size, u64 offset,
+			    u32 dword, u32 val)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
+	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_unbind(vma);
+	if (err)
+		goto out_vma_close;
+
+	err = i915_vma_pin(vma, size, 0, flags | offset);
+	if (err) {
+		/*
+		 * The ggtt may have some pages reserved so
+		 * refrain from erroring out.
+		 */
+		if (err == -ENOSPC && i915_is_ggtt(vm))
+			err = 0;
+
+		goto out_vma_close;
+	}
+
+	err = igt_check_page_sizes(vma);
+	if (err)
+		goto out_vma_unpin;
+
+	err = gpu_write(vma, ctx, engine, dword, val);
+	if (err) {
+		pr_err("gpu-write failed at offset=%llx\n", offset);
+		goto out_vma_unpin;
+	}
+
+	err = cpu_check(obj, dword, val);
+	if (err) {
+		pr_err("cpu-check failed at offset=%llx\n", offset);
+		goto out_vma_unpin;
+	}
+
+out_vma_unpin:
+	i915_vma_unpin(vma);
+out_vma_close:
+	i915_vma_close(vma);
+
+	return err;
+}
+
 static int igt_write_huge(struct i915_gem_context *ctx,
 			  struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base;
+	static struct intel_engine_cs *engines[I915_NUM_ENGINES];
 	struct intel_engine_cs *engine;
-	struct i915_vma *vma;
-	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
 	unsigned int max_page_size;
 	unsigned int id;
 	u64 max;
 	u64 num;
 	u64 size;
+	int *order;
+	int i, n;
 	int err = 0;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
@@ -1071,78 +1123,56 @@ static int igt_write_huge(struct i915_gem_context *ctx,
 	max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
 	max = div_u64((vm->total - size), max_page_size);
 
-	vma = i915_vma_instance(obj, vm, NULL);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
+	n = 0;
 	for_each_engine(engine, i915, id) {
-		IGT_TIMEOUT(end_time);
-
 		if (!intel_engine_can_store_dword(engine)) {
-			pr_info("store-dword-imm not supported on engine=%u\n",
-				id);
+			pr_info("store-dword-imm not supported on engine=%u\n", id);
 			continue;
 		}
-
-		/*
-		 * Try various offsets until we timeout -- we want to avoid
-		 * issues hidden by effectively always using offset = 0.
-		 */
-		for_each_prime_number_from(num, 0, max) {
-			u64 offset = num * max_page_size;
-			u32 dword;
-
-			err = i915_vma_unbind(vma);
-			if (err)
-				goto out_vma_close;
-
-			err = i915_vma_pin(vma, size, max_page_size, flags | offset);
-			if (err) {
-				/*
-				 * The ggtt may have some pages reserved so
-				 * refrain from erroring out.
-				 */
-				if (err == -ENOSPC && i915_is_ggtt(vm)) {
-					err = 0;
-					continue;
-				}
-
-				goto out_vma_close;
-			}
-
-			err = igt_check_page_sizes(vma);
-			if (err)
-				goto out_vma_unpin;
-
-			dword = offset_in_page(num) / 4;
-
-			err = gpu_write(vma, ctx, engine, dword, num + 1);
-			if (err) {
-				pr_err("gpu-write failed at offset=%llx", offset);
-				goto out_vma_unpin;
-			}
-
-			err = cpu_check(obj, dword, num + 1);
-			if (err) {
-				pr_err("cpu-check failed at offset=%llx", offset);
-				goto out_vma_unpin;
-			}
-
-			i915_vma_unpin(vma);
-
-			if (num > 0 &&
-			    igt_timeout(end_time,
-					"%s timed out on engine=%u at offset=%llx, max_page_size=%x\n",
-					__func__, id, offset, max_page_size))
-				break;
-		}
+		engines[n++] = engine;
 	}
 
-out_vma_unpin:
-	if (i915_vma_is_pinned(vma))
-		i915_vma_unpin(vma);
-out_vma_close:
-	i915_vma_close(vma);
+	if (!n)
+		return 0;
+
+	/*
+	 * To keep things interesting when alternating between engines in our
+	 * randomized order, lets also make feeding to the same engine a few
+	 * times in succession a possibility by enlarging the permutation array.
+	 */
+	order = i915_random_order(n * I915_NUM_ENGINES, &prng);
+	if (!order)
+		return -ENOMEM;
+
+	/*
+	 * Try various offsets in an ascending/descending fashion until we
+	 * timeout -- we want to avoid issues hidden by effectively always using
+	 * offset = 0.
+	 */
+	i = 0;
+	for_each_prime_number_from(num, 0, max) {
+		u64 offset_low = num * max_page_size;
+		u64 offset_high = (max - num) * max_page_size;
+		u32 dword = offset_in_page(num) / 4;
+
+		engine = engines[order[i] % n];
+		i = (i + 1) % (n * I915_NUM_ENGINES);
+
+		err = __igt_write_huge(ctx, engine, obj, size, offset_low, dword, num + 1);
+		if (err)
+			break;
+
+		err = __igt_write_huge(ctx, engine, obj, size, offset_high, dword, num + 1);
+		if (err)
+			break;
+
+		if (igt_timeout(end_time,
+				"%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
+				__func__, engine->id, offset_low, offset_high, max_page_size))
+			break;
+	}
+
+	kfree(order);
 
 	return err;
 }
@@ -1719,6 +1749,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 		return PTR_ERR(file);
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_runtime_pm_get(dev_priv);
 
 	ctx = live_context(dev_priv, file);
 	if (IS_ERR(ctx)) {
@@ -1729,6 +1760,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	err = i915_subtests(tests, ctx);
 
 out_unlock:
+	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	mock_file_free(dev_priv, file);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index c82780a..56a803d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -158,14 +158,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		goto err_batch;
 	}
 
-	err = engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (err)
-		goto err_request;
-
-	err = i915_switch_context(rq);
-	if (err)
-		goto err_request;
-
 	flags = 0;
 	if (INTEL_GEN(vm->i915) <= 5)
 		flags |= I915_DISPATCH_SECURE;
@@ -272,6 +264,23 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 	return err;
 }
 
+static int file_add_object(struct drm_file *file,
+			    struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	GEM_BUG_ON(obj->base.handle_count);
+
+	/* tie the object to the drm_file for easy reaping */
+	err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL);
+	if (err < 0)
+		return  err;
+
+	i915_gem_object_get(obj);
+	obj->base.handle_count++;
+	return 0;
+}
+
 static struct drm_i915_gem_object *
 create_test_object(struct i915_gem_context *ctx,
 		   struct drm_file *file,
@@ -281,7 +290,6 @@ create_test_object(struct i915_gem_context *ctx,
 	struct i915_address_space *vm =
 		ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
 	u64 size;
-	u32 handle;
 	int err;
 
 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
@@ -291,8 +299,7 @@ create_test_object(struct i915_gem_context *ctx,
 	if (IS_ERR(obj))
 		return obj;
 
-	/* tie the handle to the drm_file for easy reaping */
-	err = drm_gem_handle_create(file, &obj->base, &handle);
+	err = file_add_object(file, obj);
 	i915_gem_object_put(obj);
 	if (err)
 		return ERR_PTR(err);
@@ -369,7 +376,9 @@ static int igt_ctx_exec(void *arg)
 				}
 			}
 
+			intel_runtime_pm_get(i915);
 			err = gpu_fill(obj, ctx, engine, dw);
+			intel_runtime_pm_put(i915);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index f463105..e1ddad6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -355,6 +355,7 @@ static int igt_evict_contexts(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
 
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
@@ -463,6 +464,7 @@ static int igt_evict_contexts(void *arg)
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
+	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 6bce990..647bf2b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -459,14 +459,6 @@ empty_request(struct intel_engine_cs *engine,
 	if (IS_ERR(request))
 		return request;
 
-	err = engine->emit_flush(request, EMIT_INVALIDATE);
-	if (err)
-		goto out_request;
-
-	err = i915_switch_context(request);
-	if (err)
-		goto out_request;
-
 	err = engine->emit_bb_start(request,
 				    batch->node.start,
 				    batch->node.size,
@@ -675,12 +667,6 @@ static int live_all_engines(void *arg)
 			goto out_request;
 		}
 
-		err = engine->emit_flush(request[id], EMIT_INVALIDATE);
-		GEM_BUG_ON(err);
-
-		err = i915_switch_context(request[id]);
-		GEM_BUG_ON(err);
-
 		err = engine->emit_bb_start(request[id],
 					    batch->node.start,
 					    batch->node.size,
@@ -797,12 +783,6 @@ static int live_sequential_engines(void *arg)
 			}
 		}
 
-		err = engine->emit_flush(request[id], EMIT_INVALIDATE);
-		GEM_BUG_ON(err);
-
-		err = i915_switch_context(request[id]);
-		GEM_BUG_ON(err);
-
 		err = engine->emit_bb_start(request[id],
 					    batch->node.start,
 					    batch->node.size,
diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c
index b85872c..2088ae5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
@@ -57,7 +57,7 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state)
 {
 	unsigned int *order, i;
 
-	order = kmalloc_array(count, sizeof(*order), GFP_KERNEL);
+	order = kmalloc_array(count, sizeof(*order), GFP_KERNEL | __GFP_NOWARN);
 	if (!order)
 		return order;
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index f10029e..7b23597 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -27,7 +27,7 @@
 /* max doorbell number + negative test for each client type */
 #define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM)
 
-struct intel_guc_client *clients[ATTEMPTS];
+static struct intel_guc_client *clients[ATTEMPTS];
 
 static bool available_dbs(struct intel_guc *guc, u32 priority)
 {
@@ -131,6 +131,8 @@ static int igt_guc_init_doorbell_hw(void *args)
 		pr_err("Failed to create clients\n");
 		goto unlock;
 	}
+	GEM_BUG_ON(!guc->execbuf_client);
+	GEM_BUG_ON(!guc->preempt_client);
 
 	err = validate_client(guc->execbuf_client,
 			      GUC_CLIENT_PRIORITY_KMD_NORMAL, false);
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 71ce066..1bbb8c4 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -114,14 +114,6 @@ static int emit_recurse_batch(struct hang *h,
 	if (err)
 		goto unpin_vma;
 
-	err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
-	if (err)
-		goto unpin_hws;
-
-	err = i915_switch_context(rq);
-	if (err)
-		goto unpin_hws;
-
 	i915_vma_move_to_active(vma, rq, 0);
 	if (!i915_gem_object_has_active_reference(vma->obj)) {
 		i915_gem_object_get(vma->obj);
@@ -173,7 +165,6 @@ static int emit_recurse_batch(struct hang *h,
 
 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
 
-unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);