Merge tag 'drm-intel-next-2015-11-20-merged' of git://anongit.freedesktop.org/drm-intel into drm-next

drm-intel-next-2015-11-20-rebased:
4 weeks because of my vacation, so a bit more:
- final bits of the typesafe register mmio functions (Ville)
- power domain fix for hdmi detection (Imre)
- tons of fixes and improvements to the psr code (Rodrigo)
- refactoring of the dp detection code (Ander)
- complete rework of the dmc loader and dc5/dc6 handling (Imre, Patrik and
  others)
- dp compliance improvements from Shubhangi Shrivastava
- stop_machine hack from Chris to fix corruptions when updating GTT ptes on bsw
- lots of fifo underrun fixes from Ville
- big pile of fbc fixes and improvements from Paulo
- fix fbdev failures paths (Tvrtko and Lukas Wunner)
- dp link training refactoring (Ander)
- interruptible prepare_plane for atomic (Maarten)
- basic kabylake support (Deepak&Rodrigo)
- don't leak ringspace on resets (Chris)
drm-intel-next-2015-10-23:
- 2nd attempt at atomic watermarks from Matt, but just prep for now
- fixes all over

* tag 'drm-intel-next-2015-11-20-merged' of git://anongit.freedesktop.org/drm-intel: (209 commits)
  drm/i915: Update DRIVER_DATE to 20151120
  drm/i915: take a power domain reference while checking the HDMI live status
  drm/i915: take a power domain ref only when needed during HDMI detect
  drm/i915: Tear down fbdev if initialization fails
  async: export current_is_async()
  Revert "drm/i915: Initialize HWS page address after GPU reset"
  drm/i915: Fix oops caused by fbdev initialization failure
  drm/i915: Fix i915_ggtt_view_equal to handle rotation correctly
  drm/i915: Stuff rotation params into view union
  drm/i915: Drop return value from intel_fill_fb_ggtt_view
  drm/i915 : Fix to remove unnecsessary checks in postclose function.
  drm/i915: add MISSING_CASE to a few port/aux power domain helpers
  drm/i915/ddi: fix intel_display_port_aux_power_domain() after HDMI detect
  drm/i915: Remove platform specific *_dp_detect() functions
  drm/i915: Don't do edp panel detection in g4x_dp_detect()
  drm/i915: Send TP1 TP2/3 even when panel claims no NO_TRAIN_ON_EXIT.
  drm/i915: PSR: Don't Skip aux handshake on DP_PSR_NO_TRAIN_ON_EXIT.
  drm/i915: Reduce PSR re-activation time for VLV/CHV.
  drm/i915: Delay first PSR activation.
  drm/i915: Type safe register read/write
  ...
diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
index 944e65a..03f01e7 100644
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
@@ -4177,17 +4177,21 @@
       </sect2>
     </sect1>
     <sect1>
-      <title>GuC-based Command Submission</title>
+      <title>GuC</title>
       <sect2>
-        <title>GuC</title>
+        <title>GuC-specific firmware loader</title>
 !Pdrivers/gpu/drm/i915/intel_guc_loader.c GuC-specific firmware loader
 !Idrivers/gpu/drm/i915/intel_guc_loader.c
       </sect2>
       <sect2>
-        <title>GuC Client</title>
-!Pdrivers/gpu/drm/i915/i915_guc_submission.c GuC-based command submissison
+        <title>GuC-based command submission</title>
+!Pdrivers/gpu/drm/i915/i915_guc_submission.c GuC-based command submission
 !Idrivers/gpu/drm/i915/i915_guc_submission.c
       </sect2>
+      <sect2>
+        <title>GuC Firmware Layout</title>
+!Pdrivers/gpu/drm/i915/intel_guc_fwif.h GuC Firmware Layout
+      </sect2>
     </sect1>
 
     <sect1>
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index db9a675..bca14c8 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -547,6 +547,7 @@
 	INTEL_CHV_IDS(&chv_stolen_funcs),
 	INTEL_SKL_IDS(&gen9_stolen_funcs),
 	INTEL_BXT_IDS(&gen9_stolen_funcs),
+	INTEL_KBL_IDS(&gen9_stolen_funcs),
 };
 
 static void __init intel_graphics_stolen(int num, int slot, int func)
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 051eab3..fcd77b2 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -10,6 +10,7 @@
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
 	select TMPFS
+	select STOP_MACHINE
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 44d290a..0851de07 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -77,6 +77,7 @@
 	  dvo_tfp410.o \
 	  intel_crt.o \
 	  intel_ddi.o \
+	  intel_dp_link_training.o \
 	  intel_dp_mst.o \
 	  intel_dp.o \
 	  intel_dsi.o \
diff --git a/drivers/gpu/drm/i915/dvo.h b/drivers/gpu/drm/i915/dvo.h
index 0e2c1b9..13dea42 100644
--- a/drivers/gpu/drm/i915/dvo.h
+++ b/drivers/gpu/drm/i915/dvo.h
@@ -32,7 +32,8 @@
 	const char *name;
 	int type;
 	/* DVOA/B/C output register */
-	u32 dvo_reg;
+	i915_reg_t dvo_reg;
+	i915_reg_t dvo_srcdim_reg;
 	/* GPIO register used for i2c bus to control this device */
 	u32 gpio;
 	int slave_addr;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index db58c8d..814d894 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -407,14 +407,14 @@
  * LRI.
  */
 struct drm_i915_reg_descriptor {
-	u32 addr;
+	i915_reg_t addr;
 	u32 mask;
 	u32 value;
 };
 
 /* Convenience macro for adding 32-bit registers. */
-#define REG32(address, ...)                             \
-	{ .addr = address, __VA_ARGS__ }
+#define REG32(_reg, ...) \
+	{ .addr = (_reg), __VA_ARGS__ }
 
 /*
  * Convenience macro for adding 64-bit registers.
@@ -423,8 +423,13 @@
  * access commands only allow 32-bit accesses. Hence, we have to include
  * entries for both halves of the 64-bit registers.
  */
-#define REG64(addr)                                     \
-	REG32(addr), REG32(addr + sizeof(u32))
+#define REG64(_reg) \
+	{ .addr = _reg }, \
+	{ .addr = _reg ## _UDW }
+
+#define REG64_IDX(_reg, idx) \
+	{ .addr = _reg(idx) }, \
+	{ .addr = _reg ## _UDW(idx) }
 
 static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
 	REG64(GPGPU_THREADS_DISPATCHED),
@@ -451,14 +456,14 @@
 	REG32(GEN7_GPGPU_DISPATCHDIMX),
 	REG32(GEN7_GPGPU_DISPATCHDIMY),
 	REG32(GEN7_GPGPU_DISPATCHDIMZ),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(0)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 3),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 0),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 1),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 2),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 3),
 	REG32(GEN7_SO_WRITE_OFFSET(0)),
 	REG32(GEN7_SO_WRITE_OFFSET(1)),
 	REG32(GEN7_SO_WRITE_OFFSET(2)),
@@ -592,7 +597,7 @@
 	bool ret = true;
 
 	for (i = 0; i < reg_count; i++) {
-		u32 curr = reg_table[i].addr;
+		u32 curr = i915_mmio_reg_offset(reg_table[i].addr);
 
 		if (curr < previous) {
 			DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
@@ -847,7 +852,7 @@
 		int i;
 
 		for (i = 0; i < count; i++) {
-			if (table[i].addr == addr)
+			if (i915_mmio_reg_offset(table[i].addr) == addr)
 				return &table[i];
 		}
 	}
@@ -1023,7 +1028,7 @@
 			 * to the register. Hence, limit OACONTROL writes to
 			 * only MI_LOAD_REGISTER_IMM commands.
 			 */
-			if (reg_addr == OACONTROL) {
+			if (reg_addr == i915_mmio_reg_offset(OACONTROL)) {
 				if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
 					DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n");
 					return false;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a3b22bd..411a9c6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1252,18 +1252,21 @@
 
 		max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 0 :
 			    rp_state_cap >> 16) & 0xff;
-		max_freq *= (IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1);
+		max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+			     GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 
 		max_freq = (rp_state_cap & 0xff00) >> 8;
-		max_freq *= (IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1);
+		max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+			     GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 
 		max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 16 :
 			    rp_state_cap >> 0) & 0xff;
-		max_freq *= (IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1);
+		max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+			     GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 		seq_printf(m, "Max overclocked frequency: %dMHz\n",
@@ -1523,7 +1526,7 @@
 		seq_printf(m, "RC information accurate: %s\n", yesno(count < 51));
 	}
 
-	gt_core_status = readl(dev_priv->regs + GEN6_GT_CORE_STATUS);
+	gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS);
 	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
 
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
@@ -1640,7 +1643,7 @@
 		seq_puts(m, "FBC enabled\n");
 	else
 		seq_printf(m, "FBC disabled: %s\n",
-			  intel_no_fbc_reason_str(dev_priv->fbc.no_fbc_reason));
+			   dev_priv->fbc.no_fbc_reason);
 
 	if (INTEL_INFO(dev_priv)->gen >= 7)
 		seq_printf(m, "Compressing: %s\n",
@@ -1801,7 +1804,7 @@
 	if (ret)
 		goto out;
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		/* Convert GT frequency to 50 HZ units */
 		min_gpu_freq =
 			dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER;
@@ -1821,7 +1824,8 @@
 				       &ia_freq);
 		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
 			   intel_gpu_freq(dev_priv, (gpu_freq *
-				(IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1))),
+				(IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+				 GEN9_FREQ_SCALER : 1))),
 			   ((ia_freq >> 0) & 0xff) * 100,
 			   ((ia_freq >> 8) & 0xff) * 100);
 	}
@@ -1873,17 +1877,19 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	ifbdev = dev_priv->fbdev;
-	fb = to_intel_framebuffer(ifbdev->helper.fb);
+	if (ifbdev) {
+		fb = to_intel_framebuffer(ifbdev->helper.fb);
 
-	seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
-		   fb->base.width,
-		   fb->base.height,
-		   fb->base.depth,
-		   fb->base.bits_per_pixel,
-		   fb->base.modifier[0],
-		   atomic_read(&fb->base.refcount.refcount));
-	describe_obj(m, fb->obj);
-	seq_putc(m, '\n');
+		seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
+			   fb->base.width,
+			   fb->base.height,
+			   fb->base.depth,
+			   fb->base.bits_per_pixel,
+			   fb->base.modifier[0],
+			   atomic_read(&fb->base.refcount.refcount));
+		describe_obj(m, fb->obj);
+		seq_putc(m, '\n');
+	}
 #endif
 
 	mutex_lock(&dev->mode_config.fb_lock);
@@ -2402,6 +2408,12 @@
 		guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted);
 	seq_printf(m, "\tversion found: %d.%d\n",
 		guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found);
+	seq_printf(m, "\theader: offset is %d; size = %d\n",
+		guc_fw->header_offset, guc_fw->header_size);
+	seq_printf(m, "\tuCode: offset is %d; size = %d\n",
+		guc_fw->ucode_offset, guc_fw->ucode_size);
+	seq_printf(m, "\tRSA: offset is %d; size = %d\n",
+		guc_fw->rsa_offset, guc_fw->rsa_size);
 
 	tmp = I915_READ(GUC_STATUS);
 
@@ -2550,7 +2562,7 @@
 		   yesno(work_busy(&dev_priv->psr.work.work)));
 
 	if (HAS_DDI(dev))
-		enabled = I915_READ(EDP_PSR_CTL(dev)) & EDP_PSR_ENABLE;
+		enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
 	else {
 		for_each_pipe(dev_priv, pipe) {
 			stat[pipe] = I915_READ(VLV_PSRSTAT(pipe)) &
@@ -2572,7 +2584,7 @@
 
 	/* CHV PSR has no kind of performance counter */
 	if (HAS_DDI(dev)) {
-		psrperf = I915_READ(EDP_PSR_PERF_CNT(dev)) &
+		psrperf = I915_READ(EDP_PSR_PERF_CNT) &
 			EDP_PSR_PERF_CNT_MASK;
 
 		seq_printf(m, "Performance_Counter: %u\n", psrperf);
@@ -2696,24 +2708,16 @@
 		return "TRANSCODER_C";
 	case POWER_DOMAIN_TRANSCODER_EDP:
 		return "TRANSCODER_EDP";
-	case POWER_DOMAIN_PORT_DDI_A_2_LANES:
-		return "PORT_DDI_A_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_A_4_LANES:
-		return "PORT_DDI_A_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_B_2_LANES:
-		return "PORT_DDI_B_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_B_4_LANES:
-		return "PORT_DDI_B_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_C_2_LANES:
-		return "PORT_DDI_C_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_C_4_LANES:
-		return "PORT_DDI_C_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_D_2_LANES:
-		return "PORT_DDI_D_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_D_4_LANES:
-		return "PORT_DDI_D_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_E_2_LANES:
-		return "PORT_DDI_E_2_LANES";
+	case POWER_DOMAIN_PORT_DDI_A_LANES:
+		return "PORT_DDI_A_LANES";
+	case POWER_DOMAIN_PORT_DDI_B_LANES:
+		return "PORT_DDI_B_LANES";
+	case POWER_DOMAIN_PORT_DDI_C_LANES:
+		return "PORT_DDI_C_LANES";
+	case POWER_DOMAIN_PORT_DDI_D_LANES:
+		return "PORT_DDI_D_LANES";
+	case POWER_DOMAIN_PORT_DDI_E_LANES:
+		return "PORT_DDI_E_LANES";
 	case POWER_DOMAIN_PORT_DSI:
 		return "PORT_DSI";
 	case POWER_DOMAIN_PORT_CRT:
@@ -2734,6 +2738,10 @@
 		return "AUX_C";
 	case POWER_DOMAIN_AUX_D:
 		return "AUX_D";
+	case POWER_DOMAIN_GMBUS:
+		return "GMBUS";
+	case POWER_DOMAIN_MODESET:
+		return "MODESET";
 	case POWER_DOMAIN_INIT:
 		return "INIT";
 	default:
@@ -2777,6 +2785,51 @@
 	return 0;
 }
 
+static int i915_dmc_info(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_csr *csr;
+
+	if (!HAS_CSR(dev)) {
+		seq_puts(m, "not supported\n");
+		return 0;
+	}
+
+	csr = &dev_priv->csr;
+
+	intel_runtime_pm_get(dev_priv);
+
+	seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
+	seq_printf(m, "path: %s\n", csr->fw_path);
+
+	if (!csr->dmc_payload)
+		goto out;
+
+	seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
+		   CSR_VERSION_MINOR(csr->version));
+
+	if (IS_SKYLAKE(dev) && csr->version >= CSR_VERSION(1, 6)) {
+		seq_printf(m, "DC3 -> DC5 count: %d\n",
+			   I915_READ(SKL_CSR_DC3_DC5_COUNT));
+		seq_printf(m, "DC5 -> DC6 count: %d\n",
+			   I915_READ(SKL_CSR_DC5_DC6_COUNT));
+	} else if (IS_BROXTON(dev) && csr->version >= CSR_VERSION(1, 4)) {
+		seq_printf(m, "DC3 -> DC5 count: %d\n",
+			   I915_READ(BXT_CSR_DC3_DC5_COUNT));
+	}
+
+out:
+	seq_printf(m, "program base: 0x%08x\n", I915_READ(CSR_PROGRAM(0)));
+	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
+	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
+
+	intel_runtime_pm_put(dev_priv);
+
+	return 0;
+}
+
 static void intel_seq_print_mode(struct seq_file *m, int tabs,
 				 struct drm_display_mode *mode)
 {
@@ -2944,6 +2997,107 @@
 	return cursor_active(dev, pipe);
 }
 
+static const char *plane_type(enum drm_plane_type type)
+{
+	switch (type) {
+	case DRM_PLANE_TYPE_OVERLAY:
+		return "OVL";
+	case DRM_PLANE_TYPE_PRIMARY:
+		return "PRI";
+	case DRM_PLANE_TYPE_CURSOR:
+		return "CUR";
+	/*
+	 * Deliberately omitting default: to generate compiler warnings
+	 * when a new drm_plane_type gets added.
+	 */
+	}
+
+	return "unknown";
+}
+
+static const char *plane_rotation(unsigned int rotation)
+{
+	static char buf[48];
+	/*
+	 * According to doc only one DRM_ROTATE_ is allowed but this
+	 * will print them all to visualize if the values are misused
+	 */
+	snprintf(buf, sizeof(buf),
+		 "%s%s%s%s%s%s(0x%08x)",
+		 (rotation & BIT(DRM_ROTATE_0)) ? "0 " : "",
+		 (rotation & BIT(DRM_ROTATE_90)) ? "90 " : "",
+		 (rotation & BIT(DRM_ROTATE_180)) ? "180 " : "",
+		 (rotation & BIT(DRM_ROTATE_270)) ? "270 " : "",
+		 (rotation & BIT(DRM_REFLECT_X)) ? "FLIPX " : "",
+		 (rotation & BIT(DRM_REFLECT_Y)) ? "FLIPY " : "",
+		 rotation);
+
+	return buf;
+}
+
+static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct intel_plane *intel_plane;
+
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		struct drm_plane_state *state;
+		struct drm_plane *plane = &intel_plane->base;
+
+		if (!plane->state) {
+			seq_puts(m, "plane->state is NULL!\n");
+			continue;
+		}
+
+		state = plane->state;
+
+		seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n",
+			   plane->base.id,
+			   plane_type(intel_plane->base.type),
+			   state->crtc_x, state->crtc_y,
+			   state->crtc_w, state->crtc_h,
+			   (state->src_x >> 16),
+			   ((state->src_x & 0xffff) * 15625) >> 10,
+			   (state->src_y >> 16),
+			   ((state->src_y & 0xffff) * 15625) >> 10,
+			   (state->src_w >> 16),
+			   ((state->src_w & 0xffff) * 15625) >> 10,
+			   (state->src_h >> 16),
+			   ((state->src_h & 0xffff) * 15625) >> 10,
+			   state->fb ? drm_get_format_name(state->fb->pixel_format) : "N/A",
+			   plane_rotation(state->rotation));
+	}
+}
+
+static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc)
+{
+	struct intel_crtc_state *pipe_config;
+	int num_scalers = intel_crtc->num_scalers;
+	int i;
+
+	pipe_config = to_intel_crtc_state(intel_crtc->base.state);
+
+	/* Not all platformas have a scaler */
+	if (num_scalers) {
+		seq_printf(m, "\tnum_scalers=%d, scaler_users=%x scaler_id=%d",
+			   num_scalers,
+			   pipe_config->scaler_state.scaler_users,
+			   pipe_config->scaler_state.scaler_id);
+
+		for (i = 0; i < SKL_NUM_SCALERS; i++) {
+			struct intel_scaler *sc =
+					&pipe_config->scaler_state.scalers[i];
+
+			seq_printf(m, ", scalers[%d]: use=%s, mode=%x",
+				   i, yesno(sc->in_use), sc->mode);
+		}
+		seq_puts(m, "\n");
+	} else {
+		seq_puts(m, "\tNo scalers available on this platform\n");
+	}
+}
+
 static int i915_display_info(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
@@ -2963,10 +3117,12 @@
 
 		pipe_config = to_intel_crtc_state(crtc->base.state);
 
-		seq_printf(m, "CRTC %d: pipe: %c, active=%s (size=%dx%d)\n",
+		seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n",
 			   crtc->base.base.id, pipe_name(crtc->pipe),
 			   yesno(pipe_config->base.active),
-			   pipe_config->pipe_src_w, pipe_config->pipe_src_h);
+			   pipe_config->pipe_src_w, pipe_config->pipe_src_h,
+			   yesno(pipe_config->dither), pipe_config->pipe_bpp);
+
 		if (pipe_config->base.active) {
 			intel_crtc_info(m, crtc);
 
@@ -2976,6 +3132,8 @@
 				   x, y, crtc->base.cursor->state->crtc_w,
 				   crtc->base.cursor->state->crtc_h,
 				   crtc->cursor_addr, yesno(active));
+			intel_scaler_info(m, crtc);
+			intel_plane_info(m, crtc);
 		}
 
 		seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n",
@@ -3110,7 +3268,8 @@
 
 	seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
 	for (i = 0; i < dev_priv->workarounds.count; ++i) {
-		u32 addr, mask, value, read;
+		i915_reg_t addr;
+		u32 mask, value, read;
 		bool ok;
 
 		addr = dev_priv->workarounds.reg[i].addr;
@@ -3119,7 +3278,7 @@
 		read = I915_READ(addr);
 		ok = (value & mask) == (read & mask);
 		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
-			   addr, value, mask, read, ok ? "OK" : "FAIL");
+			   i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
 	}
 
 	intel_runtime_pm_put(dev_priv);
@@ -5023,7 +5182,7 @@
 
 		stat->slice_total++;
 
-		if (IS_SKYLAKE(dev))
+		if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 			ss_cnt = INTEL_INFO(dev)->subslice_per_slice;
 
 		for (ss = 0; ss < ss_max; ss++) {
@@ -5236,6 +5395,7 @@
 	{"i915_energy_uJ", i915_energy_uJ, 0},
 	{"i915_runtime_pm_status", i915_runtime_pm_status, 0},
 	{"i915_power_domain_info", i915_power_domain_info, 0},
+	{"i915_dmc_info", i915_dmc_info, 0},
 	{"i915_display_info", i915_display_info, 0},
 	{"i915_semaphore_status", i915_semaphore_status, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index b4741d1..a81c766 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -28,7 +28,6 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/async.h>
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
@@ -338,7 +337,7 @@
 		i915_resume_switcheroo(dev);
 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	} else {
-		pr_err("switched off\n");
+		pr_info("switched off\n");
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		i915_suspend_switcheroo(dev, pmm);
 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
@@ -396,7 +395,9 @@
 	if (ret)
 		goto cleanup_vga_switcheroo;
 
-	intel_power_domains_init_hw(dev_priv);
+	intel_power_domains_init_hw(dev_priv, false);
+
+	intel_csr_ucode_init(dev_priv);
 
 	ret = intel_irq_install(dev_priv);
 	if (ret)
@@ -437,7 +438,7 @@
 	 * scanning against hotplug events. Hence do this first and ignore the
 	 * tiny window where we will loose hotplug notifactions.
 	 */
-	async_schedule(intel_fbdev_initial_config, dev_priv);
+	intel_fbdev_initial_config_async(dev);
 
 	drm_kms_helper_poll_init(dev);
 
@@ -663,7 +664,8 @@
 	 * supports EU power gating on devices with more than one EU
 	 * pair per subslice.
 	*/
-	info->has_slice_pg = (IS_SKYLAKE(dev) && (info->slice_total > 1));
+	info->has_slice_pg = ((IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) &&
+			       (info->slice_total > 1));
 	info->has_subslice_pg = (IS_BROXTON(dev) && (info->subslice_total > 1));
 	info->has_eu_pg = (info->eu_per_subslice > 2);
 }
@@ -890,7 +892,6 @@
 	spin_lock_init(&dev_priv->mmio_flip_lock);
 	mutex_init(&dev_priv->sb_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
-	mutex_init(&dev_priv->csr_lock);
 	mutex_init(&dev_priv->av_mutex);
 
 	intel_pm_setup(dev);
@@ -937,9 +938,6 @@
 
 	intel_uncore_init(dev);
 
-	/* Load CSR Firmware for SKL */
-	intel_csr_ucode_init(dev);
-
 	ret = i915_gem_gtt_init(dev);
 	if (ret)
 		goto out_freecsr;
@@ -1113,7 +1111,7 @@
 out_gtt:
 	i915_global_gtt_cleanup(dev);
 out_freecsr:
-	intel_csr_ucode_fini(dev);
+	intel_csr_ucode_fini(dev_priv);
 	intel_uncore_fini(dev);
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_bridge:
@@ -1131,6 +1129,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
+	intel_fbdev_fini(dev);
+
 	i915_audio_component_cleanup(dev_priv);
 
 	ret = i915_gem_suspend(dev);
@@ -1153,8 +1153,6 @@
 
 	acpi_video_unregister();
 
-	intel_fbdev_fini(dev);
-
 	drm_vblank_cleanup(dev);
 
 	intel_modeset_cleanup(dev);
@@ -1196,7 +1194,7 @@
 	intel_fbc_cleanup_cfb(dev_priv);
 	i915_gem_cleanup_stolen(dev);
 
-	intel_csr_ucode_fini(dev);
+	intel_csr_ucode_fini(dev_priv);
 
 	intel_teardown_gmbus(dev);
 	intel_teardown_mchbar(dev);
@@ -1264,8 +1262,6 @@
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	if (file_priv && file_priv->bsd_ring)
-		file_priv->bsd_ring = NULL;
 	kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 760e0ce..6344dfb 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -383,6 +383,7 @@
 
 static const struct intel_device_info intel_broxton_info = {
 	.is_preliminary = 1,
+	.is_broxton = 1,
 	.gen = 9,
 	.need_gfx_hws = 1, .has_hotplug = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
@@ -394,50 +395,81 @@
 	IVB_CURSOR_OFFSETS,
 };
 
+static const struct intel_device_info intel_kabylake_info = {
+	.is_preliminary = 1,
+	.is_kabylake = 1,
+	.gen = 9,
+	.num_pipes = 3,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+	.has_llc = 1,
+	.has_ddi = 1,
+	.has_fpga_dbg = 1,
+	.has_fbc = 1,
+	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
+};
+
+static const struct intel_device_info intel_kabylake_gt3_info = {
+	.is_preliminary = 1,
+	.is_kabylake = 1,
+	.gen = 9,
+	.num_pipes = 3,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
+	.has_llc = 1,
+	.has_ddi = 1,
+	.has_fpga_dbg = 1,
+	.has_fbc = 1,
+	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
  * and subvendor IDs, we need it to come before the more general IVB
  * PCI ID matches, otherwise we'll use the wrong info struct above.
  */
-#define INTEL_PCI_IDS \
-	INTEL_I830_IDS(&intel_i830_info),	\
-	INTEL_I845G_IDS(&intel_845g_info),	\
-	INTEL_I85X_IDS(&intel_i85x_info),	\
-	INTEL_I865G_IDS(&intel_i865g_info),	\
-	INTEL_I915G_IDS(&intel_i915g_info),	\
-	INTEL_I915GM_IDS(&intel_i915gm_info),	\
-	INTEL_I945G_IDS(&intel_i945g_info),	\
-	INTEL_I945GM_IDS(&intel_i945gm_info),	\
-	INTEL_I965G_IDS(&intel_i965g_info),	\
-	INTEL_G33_IDS(&intel_g33_info),		\
-	INTEL_I965GM_IDS(&intel_i965gm_info),	\
-	INTEL_GM45_IDS(&intel_gm45_info), 	\
-	INTEL_G45_IDS(&intel_g45_info), 	\
-	INTEL_PINEVIEW_IDS(&intel_pineview_info),	\
-	INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info),	\
-	INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info),	\
-	INTEL_SNB_D_IDS(&intel_sandybridge_d_info),	\
-	INTEL_SNB_M_IDS(&intel_sandybridge_m_info),	\
-	INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */ \
-	INTEL_IVB_M_IDS(&intel_ivybridge_m_info),	\
-	INTEL_IVB_D_IDS(&intel_ivybridge_d_info),	\
-	INTEL_HSW_D_IDS(&intel_haswell_d_info), \
-	INTEL_HSW_M_IDS(&intel_haswell_m_info), \
-	INTEL_VLV_M_IDS(&intel_valleyview_m_info),	\
-	INTEL_VLV_D_IDS(&intel_valleyview_d_info),	\
-	INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info),	\
-	INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info),	\
-	INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info),	\
-	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info), \
-	INTEL_CHV_IDS(&intel_cherryview_info),	\
-	INTEL_SKL_GT1_IDS(&intel_skylake_info),	\
-	INTEL_SKL_GT2_IDS(&intel_skylake_info),	\
-	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),	\
-	INTEL_BXT_IDS(&intel_broxton_info)
-
-static const struct pci_device_id pciidlist[] = {		/* aka */
-	INTEL_PCI_IDS,
+static const struct pci_device_id pciidlist[] = {
+	INTEL_I830_IDS(&intel_i830_info),
+	INTEL_I845G_IDS(&intel_845g_info),
+	INTEL_I85X_IDS(&intel_i85x_info),
+	INTEL_I865G_IDS(&intel_i865g_info),
+	INTEL_I915G_IDS(&intel_i915g_info),
+	INTEL_I915GM_IDS(&intel_i915gm_info),
+	INTEL_I945G_IDS(&intel_i945g_info),
+	INTEL_I945GM_IDS(&intel_i945gm_info),
+	INTEL_I965G_IDS(&intel_i965g_info),
+	INTEL_G33_IDS(&intel_g33_info),
+	INTEL_I965GM_IDS(&intel_i965gm_info),
+	INTEL_GM45_IDS(&intel_gm45_info),
+	INTEL_G45_IDS(&intel_g45_info),
+	INTEL_PINEVIEW_IDS(&intel_pineview_info),
+	INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info),
+	INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info),
+	INTEL_SNB_D_IDS(&intel_sandybridge_d_info),
+	INTEL_SNB_M_IDS(&intel_sandybridge_m_info),
+	INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */
+	INTEL_IVB_M_IDS(&intel_ivybridge_m_info),
+	INTEL_IVB_D_IDS(&intel_ivybridge_d_info),
+	INTEL_HSW_D_IDS(&intel_haswell_d_info),
+	INTEL_HSW_M_IDS(&intel_haswell_m_info),
+	INTEL_VLV_M_IDS(&intel_valleyview_m_info),
+	INTEL_VLV_D_IDS(&intel_valleyview_d_info),
+	INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info),
+	INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info),
+	INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info),
+	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info),
+	INTEL_CHV_IDS(&intel_cherryview_info),
+	INTEL_SKL_GT1_IDS(&intel_skylake_info),
+	INTEL_SKL_GT2_IDS(&intel_skylake_info),
+	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),
+	INTEL_BXT_IDS(&intel_broxton_info),
+	INTEL_KBL_GT1_IDS(&intel_kabylake_info),
+	INTEL_KBL_GT2_IDS(&intel_kabylake_info),
+	INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info),
+	INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info),
 	{0, 0, 0}
 };
 
@@ -463,7 +495,7 @@
 	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		ret = PCH_LPT;
 		DRM_DEBUG_KMS("Assuming LynxPoint PCH\n");
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		ret = PCH_SPT;
 		DRM_DEBUG_KMS("Assuming SunrisePoint PCH\n");
 	}
@@ -526,11 +558,13 @@
 			} else if (id == INTEL_PCH_SPT_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_SPT;
 				DRM_DEBUG_KMS("Found SunrisePoint PCH\n");
-				WARN_ON(!IS_SKYLAKE(dev));
+				WARN_ON(!IS_SKYLAKE(dev) &&
+					!IS_KABYLAKE(dev));
 			} else if (id == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_SPT;
 				DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
-				WARN_ON(!IS_SKYLAKE(dev));
+				WARN_ON(!IS_SKYLAKE(dev) &&
+					!IS_KABYLAKE(dev));
 			} else if (id == INTEL_PCH_P2X_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = intel_virt_detect_pch(dev);
 			} else
@@ -570,26 +604,6 @@
 	return true;
 }
 
-void i915_firmware_load_error_print(const char *fw_path, int err)
-{
-	DRM_ERROR("failed to load firmware %s (%d)\n", fw_path, err);
-
-	/*
-	 * If the reason is not known assume -ENOENT since that's the most
-	 * usual failure mode.
-	 */
-	if (!err)
-		err = -ENOENT;
-
-	if (!(IS_BUILTIN(CONFIG_DRM_I915) && err == -ENOENT))
-		return;
-
-	DRM_ERROR(
-	  "The driver is built-in, so to load the firmware you need to\n"
-	  "include it either in the kernel (see CONFIG_EXTRA_FIRMWARE) or\n"
-	  "in your initrd/initramfs image.\n");
-}
-
 static void intel_suspend_encoders(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -608,7 +622,6 @@
 static int intel_suspend_complete(struct drm_i915_private *dev_priv);
 static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
 			      bool rpm_resume);
-static int skl_resume_prepare(struct drm_i915_private *dev_priv);
 static int bxt_resume_prepare(struct drm_i915_private *dev_priv);
 
 
@@ -679,6 +692,9 @@
 
 	intel_display_set_init_power(dev_priv, false);
 
+	if (HAS_CSR(dev_priv))
+		flush_work(&dev_priv->csr.work);
+
 	return 0;
 }
 
@@ -687,10 +703,13 @@
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
 	int ret;
 
+	intel_power_domains_suspend(dev_priv);
+
 	ret = intel_suspend_complete(dev_priv);
 
 	if (ret) {
 		DRM_ERROR("Suspend complete failed: %d\n", ret);
+		intel_power_domains_init_hw(dev_priv, true);
 
 		return ret;
 	}
@@ -838,13 +857,11 @@
 
 	if (IS_BROXTON(dev))
 		ret = bxt_resume_prepare(dev_priv);
-	else if (IS_SKYLAKE(dev_priv))
-		ret = skl_resume_prepare(dev_priv);
 	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		hsw_disable_pc8(dev_priv);
 
 	intel_uncore_sanitize(dev);
-	intel_power_domains_init_hw(dev_priv);
+	intel_power_domains_init_hw(dev_priv, true);
 
 	return ret;
 }
@@ -1051,15 +1068,6 @@
 	return i915_drm_resume(drm_dev);
 }
 
-static int skl_suspend_complete(struct drm_i915_private *dev_priv)
-{
-	/* Enabling DC6 is not a hard requirement to enter runtime D3 */
-
-	skl_uninit_cdclk(dev_priv);
-
-	return 0;
-}
-
 static int hsw_suspend_complete(struct drm_i915_private *dev_priv)
 {
 	hsw_enable_pc8(dev_priv);
@@ -1099,16 +1107,6 @@
 	return 0;
 }
 
-static int skl_resume_prepare(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-
-	skl_init_cdclk(dev_priv);
-	intel_csr_load_program(dev);
-
-	return 0;
-}
-
 /*
  * Save all Gunit registers that may be lost after a D3 and a subsequent
  * S0i[R123] transition. The list of registers needing a save/restore is
@@ -1572,8 +1570,6 @@
 
 	if (IS_BROXTON(dev))
 		ret = bxt_resume_prepare(dev_priv);
-	else if (IS_SKYLAKE(dev))
-		ret = skl_resume_prepare(dev_priv);
 	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		hsw_disable_pc8(dev_priv);
 	else if (IS_VALLEYVIEW(dev_priv))
@@ -1616,8 +1612,6 @@
 
 	if (IS_BROXTON(dev_priv))
 		ret = bxt_suspend_complete(dev_priv);
-	else if (IS_SKYLAKE(dev_priv))
-		ret = skl_suspend_complete(dev_priv);
 	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		ret = hsw_suspend_complete(dev_priv);
 	else if (IS_VALLEYVIEW(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fd88060..15c6dc0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -57,7 +57,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20151010"
+#define DRIVER_DATE		"20151120"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -180,15 +180,11 @@
 	POWER_DOMAIN_TRANSCODER_B,
 	POWER_DOMAIN_TRANSCODER_C,
 	POWER_DOMAIN_TRANSCODER_EDP,
-	POWER_DOMAIN_PORT_DDI_A_2_LANES,
-	POWER_DOMAIN_PORT_DDI_A_4_LANES,
-	POWER_DOMAIN_PORT_DDI_B_2_LANES,
-	POWER_DOMAIN_PORT_DDI_B_4_LANES,
-	POWER_DOMAIN_PORT_DDI_C_2_LANES,
-	POWER_DOMAIN_PORT_DDI_C_4_LANES,
-	POWER_DOMAIN_PORT_DDI_D_2_LANES,
-	POWER_DOMAIN_PORT_DDI_D_4_LANES,
-	POWER_DOMAIN_PORT_DDI_E_2_LANES,
+	POWER_DOMAIN_PORT_DDI_A_LANES,
+	POWER_DOMAIN_PORT_DDI_B_LANES,
+	POWER_DOMAIN_PORT_DDI_C_LANES,
+	POWER_DOMAIN_PORT_DDI_D_LANES,
+	POWER_DOMAIN_PORT_DDI_E_LANES,
 	POWER_DOMAIN_PORT_DSI,
 	POWER_DOMAIN_PORT_CRT,
 	POWER_DOMAIN_PORT_OTHER,
@@ -199,6 +195,8 @@
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_AUX_D,
+	POWER_DOMAIN_GMBUS,
+	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_INIT,
 
 	POWER_DOMAIN_NUM,
@@ -630,11 +628,9 @@
 			  int target, int refclk,
 			  struct dpll *match_clock,
 			  struct dpll *best_clock);
+	int (*compute_pipe_wm)(struct intel_crtc *crtc,
+			       struct drm_atomic_state *state);
 	void (*update_wm)(struct drm_crtc *crtc);
-	void (*update_sprite_wm)(struct drm_plane *plane,
-				 struct drm_crtc *crtc,
-				 uint32_t sprite_width, uint32_t sprite_height,
-				 int pixel_size, bool enable, bool scaled);
 	int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
 	void (*modeset_commit_cdclk)(struct drm_atomic_state *state);
 	/* Returns the active state of the crtc, and if the crtc is active,
@@ -692,18 +688,18 @@
 	void (*force_wake_put)(struct drm_i915_private *dev_priv,
 							enum forcewake_domains domains);
 
-	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
-	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
-	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
-	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
+	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
+	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
+	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
+	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
 
-	void (*mmio_writeb)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint8_t val, bool trace);
-	void (*mmio_writew)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint16_t val, bool trace);
-	void (*mmio_writel)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint32_t val, bool trace);
-	void (*mmio_writeq)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writeq)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint64_t val, bool trace);
 };
 
@@ -720,11 +716,11 @@
 		enum forcewake_domain_id id;
 		unsigned wake_count;
 		struct timer_list timer;
-		u32 reg_set;
+		i915_reg_t reg_set;
 		u32 val_set;
 		u32 val_clear;
-		u32 reg_ack;
-		u32 reg_post;
+		i915_reg_t reg_ack;
+		i915_reg_t reg_post;
 		u32 val_reset;
 	} fw_domain[FW_DOMAIN_ID_COUNT];
 };
@@ -739,20 +735,19 @@
 #define for_each_fw_domain(domain__, dev_priv__, i__) \
 	for_each_fw_domain_mask(domain__, FORCEWAKE_ALL, dev_priv__, i__)
 
-enum csr_state {
-	FW_UNINITIALIZED = 0,
-	FW_LOADED,
-	FW_FAILED
-};
+#define CSR_VERSION(major, minor)	((major) << 16 | (minor))
+#define CSR_VERSION_MAJOR(version)	((version) >> 16)
+#define CSR_VERSION_MINOR(version)	((version) & 0xffff)
 
 struct intel_csr {
+	struct work_struct work;
 	const char *fw_path;
 	uint32_t *dmc_payload;
 	uint32_t dmc_fw_size;
+	uint32_t version;
 	uint32_t mmio_count;
-	uint32_t mmioaddr[8];
+	i915_reg_t mmioaddr[8];
 	uint32_t mmiodata[8];
-	enum csr_state state;
 };
 
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
@@ -770,6 +765,8 @@
 	func(is_valleyview) sep \
 	func(is_haswell) sep \
 	func(is_skylake) sep \
+	func(is_broxton) sep \
+	func(is_kabylake) sep \
 	func(is_preliminary) sep \
 	func(has_fbc) sep \
 	func(has_pipe_cxsr) sep \
@@ -928,24 +925,7 @@
 		struct drm_framebuffer *fb;
 	} *fbc_work;
 
-	enum no_fbc_reason {
-		FBC_OK, /* FBC is enabled */
-		FBC_UNSUPPORTED, /* FBC is not supported by this chipset */
-		FBC_NO_OUTPUT, /* no outputs enabled to compress */
-		FBC_STOLEN_TOO_SMALL, /* not enough space for buffers */
-		FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
-		FBC_MODE_TOO_LARGE, /* mode too large for compression */
-		FBC_BAD_PLANE, /* fbc not supported on plane */
-		FBC_NOT_TILED, /* buffer not tiled */
-		FBC_MULTIPLE_PIPES, /* more than one pipe active */
-		FBC_MODULE_PARAM,
-		FBC_CHIP_DEFAULT, /* disabled by default on this chip */
-		FBC_ROTATION, /* rotation is not supported */
-		FBC_IN_DBG_MASTER, /* kernel debugger is active */
-		FBC_BAD_STRIDE, /* stride is not supported */
-		FBC_PIXEL_RATE, /* pixel rate is too big */
-		FBC_PIXEL_FORMAT /* pixel format is invalid */
-	} no_fbc_reason;
+	const char *no_fbc_reason;
 
 	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
 	void (*enable_fbc)(struct intel_crtc *crtc);
@@ -1019,7 +999,7 @@
 	struct i2c_adapter adapter;
 	u32 force_bit;
 	u32 reg0;
-	u32 gpio_reg;
+	i915_reg_t gpio_reg;
 	struct i2c_algo_bit_data bit_algo;
 	struct drm_i915_private *dev_priv;
 };
@@ -1668,7 +1648,7 @@
 };
 
 struct i915_wa_reg {
-	u32 addr;
+	i915_reg_t addr;
 	u32 value;
 	/* bitmask representing WA bits */
 	u32 mask;
@@ -1697,6 +1677,13 @@
 	struct drm_i915_gem_request     *request;
 };
 
+/* used in computing the new watermarks state */
+struct intel_wm_config {
+	unsigned int num_pipes_active;
+	bool sprites_enabled;
+	bool sprites_scaled;
+};
+
 struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *objects;
@@ -1717,9 +1704,6 @@
 
 	struct intel_csr csr;
 
-	/* Display CSR-related protection */
-	struct mutex csr_lock;
-
 	struct intel_gmbus gmbus[GMBUS_NUM_PINS];
 
 	/** gmbus_mutex protects against concurrent usage of the single hw gmbus
@@ -1734,6 +1718,8 @@
 	/* MMIO base address for MIPI regs */
 	uint32_t mipi_mmio_base;
 
+	uint32_t psr_mmio_base;
+
 	wait_queue_head_t gmbus_wait_queue;
 
 	struct pci_dev *bridge_dev;
@@ -1921,6 +1907,9 @@
 		 */
 		uint16_t skl_latency[8];
 
+		/* Committed wm config */
+		struct intel_wm_config config;
+
 		/*
 		 * The skl_wm_values structure is a bit too big for stack
 		 * allocation, so we keep the staging struct where we store
@@ -2435,6 +2424,15 @@
 #define INTEL_DEVID(p)	(INTEL_INFO(p)->device_id)
 #define INTEL_REVID(p)	(__I915__(p)->dev->pdev->revision)
 
+#define REVID_FOREVER		0xff
+/*
+ * Return true if revision is in range [since,until] inclusive.
+ *
+ * Use 0 for open-ended since, and REVID_FOREVER for open-ended until.
+ */
+#define IS_REVID(p, since, until) \
+	(INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until))
+
 #define IS_I830(dev)		(INTEL_DEVID(dev) == 0x3577)
 #define IS_845G(dev)		(INTEL_DEVID(dev) == 0x2562)
 #define IS_I85X(dev)		(INTEL_INFO(dev)->is_i85x)
@@ -2461,7 +2459,8 @@
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_BROADWELL(dev)	(!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
 #define IS_SKYLAKE(dev)	(INTEL_INFO(dev)->is_skylake)
-#define IS_BROXTON(dev)	(!INTEL_INFO(dev)->is_skylake && IS_GEN9(dev))
+#define IS_BROXTON(dev)		(INTEL_INFO(dev)->is_broxton)
+#define IS_KABYLAKE(dev)	(INTEL_INFO(dev)->is_kabylake)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 #define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
 				 (INTEL_DEVID(dev) & 0xFF00) == 0x0C00)
@@ -2496,16 +2495,21 @@
 
 #define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary)
 
-#define SKL_REVID_A0		(0x0)
-#define SKL_REVID_B0		(0x1)
-#define SKL_REVID_C0		(0x2)
-#define SKL_REVID_D0		(0x3)
-#define SKL_REVID_E0		(0x4)
-#define SKL_REVID_F0		(0x5)
+#define SKL_REVID_A0		0x0
+#define SKL_REVID_B0		0x1
+#define SKL_REVID_C0		0x2
+#define SKL_REVID_D0		0x3
+#define SKL_REVID_E0		0x4
+#define SKL_REVID_F0		0x5
 
-#define BXT_REVID_A0		(0x0)
-#define BXT_REVID_B0		(0x3)
-#define BXT_REVID_C0		(0x9)
+#define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until))
+
+#define BXT_REVID_A0		0x0
+#define BXT_REVID_A1		0x1
+#define BXT_REVID_B0		0x3
+#define BXT_REVID_C0		0x9
+
+#define IS_BXT_REVID(p, since, until) (IS_BROXTON(p) && IS_REVID(p, since, until))
 
 /*
  * The genX designation typically refers to the render engine, so render
@@ -2577,10 +2581,10 @@
 #define HAS_FPGA_DBG_UNCLAIMED(dev)	(INTEL_INFO(dev)->has_fpga_dbg)
 #define HAS_PSR(dev)		(IS_HASWELL(dev) || IS_BROADWELL(dev) || \
 				 IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev) || \
-				 IS_SKYLAKE(dev))
+				 IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 #define HAS_RUNTIME_PM(dev)	(IS_GEN6(dev) || IS_HASWELL(dev) || \
 				 IS_BROADWELL(dev) || IS_VALLEYVIEW(dev) || \
-				 IS_SKYLAKE(dev))
+				 IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 #define HAS_RC6(dev)		(INTEL_INFO(dev)->gen >= 6)
 #define HAS_RC6p(dev)		(INTEL_INFO(dev)->gen == 6 || IS_IVYBRIDGE(dev))
 
@@ -2640,6 +2644,7 @@
 	int panel_use_ssc;
 	int vbt_sdvo_panel_type;
 	int enable_rc6;
+	int enable_dc;
 	int enable_fbc;
 	int enable_ppgtt;
 	int enable_execlists;
@@ -2688,7 +2693,6 @@
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
-void i915_firmware_load_error_print(const char *fw_path, int err);
 
 /* intel_hotplug.c */
 void intel_hpd_irq_handler(struct drm_device *dev, u32 pin_mask, u32 long_mask);
@@ -2995,8 +2999,6 @@
 int __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_engine_cs *pipelined,
-				     struct drm_i915_gem_request **pipelined_request,
 				     const struct i915_ggtt_view *view);
 void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
 					      const struct i915_ggtt_view *view);
@@ -3351,7 +3353,6 @@
 extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
 				  bool enable);
 extern void intel_detect_pch(struct drm_device *dev);
-extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
 extern int intel_enable_rc6(const struct drm_device *dev);
 
 extern bool i915_semaphore_is_enabled(struct drm_device *dev);
@@ -3434,6 +3435,32 @@
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
+#define __raw_read(x, s) \
+static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \
+					     i915_reg_t reg) \
+{ \
+	return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \
+}
+
+#define __raw_write(x, s) \
+static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \
+				       i915_reg_t reg, uint##x##_t val) \
+{ \
+	write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \
+}
+__raw_read(8, b)
+__raw_read(16, w)
+__raw_read(32, l)
+__raw_read(64, q)
+
+__raw_write(8, b)
+__raw_write(16, w)
+__raw_write(32, l)
+__raw_write(64, q)
+
+#undef __raw_read
+#undef __raw_write
+
 /* These are untraced mmio-accessors that are only valid to be used inside
  * criticial sections inside IRQ handlers where forcewake is explicitly
  * controlled.
@@ -3441,8 +3468,8 @@
  * Note: Should only be used between intel_uncore_forcewake_irqlock() and
  * intel_uncore_forcewake_irqunlock().
  */
-#define I915_READ_FW(reg__) readl(dev_priv->regs + (reg__))
-#define I915_WRITE_FW(reg__, val__) writel(val__, dev_priv->regs + (reg__))
+#define I915_READ_FW(reg__) __raw_i915_read32(dev_priv, (reg__))
+#define I915_WRITE_FW(reg__, val__) __raw_i915_write32(dev_priv, (reg__), (val__))
 #define POSTING_READ_FW(reg__) (void)I915_READ_FW(reg__)
 
 /* "Broadcast RGB" property */
@@ -3450,7 +3477,7 @@
 #define INTEL_BROADCAST_RGB_FULL 1
 #define INTEL_BROADCAST_RGB_LIMITED 2
 
-static inline uint32_t i915_vgacntrl_reg(struct drm_device *dev)
+static inline i915_reg_t i915_vgacntrl_reg(struct drm_device *dev)
 {
 	if (IS_VALLEYVIEW(dev))
 		return VLV_VGACNTRL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 91bb1fc..33adc8f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2737,6 +2737,8 @@
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 					struct intel_engine_cs *ring)
 {
+	struct intel_ringbuffer *buffer;
+
 	while (!list_empty(&ring->active_list)) {
 		struct drm_i915_gem_object *obj;
 
@@ -2752,18 +2754,23 @@
 	 * are the ones that keep the context and ringbuffer backing objects
 	 * pinned in place.
 	 */
-	while (!list_empty(&ring->execlist_queue)) {
-		struct drm_i915_gem_request *submit_req;
 
-		submit_req = list_first_entry(&ring->execlist_queue,
-				struct drm_i915_gem_request,
-				execlist_link);
-		list_del(&submit_req->execlist_link);
+	if (i915.enable_execlists) {
+		spin_lock_irq(&ring->execlist_lock);
+		while (!list_empty(&ring->execlist_queue)) {
+			struct drm_i915_gem_request *submit_req;
 
-		if (submit_req->ctx != ring->default_context)
-			intel_lr_context_unpin(submit_req);
+			submit_req = list_first_entry(&ring->execlist_queue,
+					struct drm_i915_gem_request,
+					execlist_link);
+			list_del(&submit_req->execlist_link);
 
-		i915_gem_request_unreference(submit_req);
+			if (submit_req->ctx != ring->default_context)
+				intel_lr_context_unpin(submit_req);
+
+			i915_gem_request_unreference(submit_req);
+		}
+		spin_unlock_irq(&ring->execlist_lock);
 	}
 
 	/*
@@ -2782,6 +2789,18 @@
 
 		i915_gem_request_retire(request);
 	}
+
+	/* Having flushed all requests from all queues, we know that all
+	 * ringbuffers must now be empty. However, since we do not reclaim
+	 * all space when retiring the request (to prevent HEADs colliding
+	 * with rapid ringbuffer wraparound) the amount of available space
+	 * upon reset is less than when we start. Do one more pass over
+	 * all the ringbuffers to reset last_retired_head.
+	 */
+	list_for_each_entry(buffer, &ring->buffers, link) {
+		buffer->last_retired_head = buffer->tail;
+		intel_ring_update_space(buffer);
+	}
 }
 
 void i915_gem_reset(struct drm_device *dev)
@@ -3826,7 +3845,7 @@
 		 * cacheline, whereas normally such cachelines would get
 		 * invalidated.
 		 */
-		if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)
+		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 			return -ENODEV;
 
 		level = I915_CACHE_LLC;
@@ -3869,17 +3888,11 @@
 int
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_engine_cs *pipelined,
-				     struct drm_i915_gem_request **pipelined_request,
 				     const struct i915_ggtt_view *view)
 {
 	u32 old_read_domains, old_write_domain;
 	int ret;
 
-	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
-	if (ret)
-		return ret;
-
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
@@ -4476,10 +4489,8 @@
 {
 	struct i915_vma *vma;
 	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (i915_is_ggtt(vma->vm) &&
-		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
-			continue;
-		if (vma->vm == vm)
+		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
+		    vma->vm == vm)
 			return vma;
 	}
 	return NULL;
@@ -4568,7 +4579,6 @@
 	struct intel_engine_cs *ring = req->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
 	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
 	int i, ret;
 
@@ -4584,10 +4594,10 @@
 	 * here because no other code should access these registers other than
 	 * at initialization time.
 	 */
-	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
+	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, reg_base + i);
-		intel_ring_emit(ring, remap_info[i/4]);
+		intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
+		intel_ring_emit(ring, remap_info[i]);
 	}
 
 	intel_ring_advance(ring);
@@ -4755,18 +4765,9 @@
 	if (HAS_GUC_UCODE(dev)) {
 		ret = intel_guc_ucode_load(dev);
 		if (ret) {
-			/*
-			 * If we got an error and GuC submission is enabled, map
-			 * the error to -EIO so the GPU will be declared wedged.
-			 * OTOH, if we didn't intend to use the GuC anyway, just
-			 * discard the error and carry on.
-			 */
-			DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
-				  i915.enable_guc_submission ? "" :
-				  " (ignored)");
-			ret = i915.enable_guc_submission ? -EIO : 0;
-			if (ret)
-				goto out;
+			DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
+			ret = -EIO;
+			goto out;
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8c688a5..4b94004 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -556,7 +556,7 @@
 				if (signaller == ring)
 					continue;
 
-				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base));
 				intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 			}
 		}
@@ -581,7 +581,7 @@
 				if (signaller == ring)
 					continue;
 
-				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base));
 				intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 			}
 		}
@@ -925,6 +925,14 @@
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		args->value = ctx->flags & CONTEXT_NO_ZEROMAP;
 		break;
+	case I915_CONTEXT_PARAM_GTT_SIZE:
+		if (ctx->ppgtt)
+			args->value = ctx->ppgtt->base.total;
+		else if (to_i915(dev)->mm.aliasing_ppgtt)
+			args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
+		else
+			args->value = to_i915(dev)->gtt.base.total;
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6ed7d63a..a4c243c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,7 +1114,7 @@
 
 	for (i = 0; i < 4; i++) {
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
+		intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
 		intel_ring_emit(ring, 0);
 	}
 
@@ -1241,7 +1241,7 @@
 
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, INSTPM);
+		intel_ring_emit_reg(ring, INSTPM);
 		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
 		intel_ring_advance(ring);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 40a10b2..b80d045 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -59,7 +59,7 @@
 				 struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int fence_reg_lo, fence_reg_hi;
+	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 43f35d1..1f7e6b9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/seq_file.h>
+#include <linux/stop_machine.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -104,9 +105,11 @@
 {
 	bool has_aliasing_ppgtt;
 	bool has_full_ppgtt;
+	bool has_full_48bit_ppgtt;
 
 	has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
 	has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
+	has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9;
 
 	if (intel_vgpu_active(dev))
 		has_full_ppgtt = false; /* emulation is too hard */
@@ -125,6 +128,9 @@
 	if (enable_ppgtt == 2 && has_full_ppgtt)
 		return 2;
 
+	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
+		return 3;
+
 #ifdef CONFIG_INTEL_IOMMU
 	/* Disable ppgtt on SNB if VT-d is on. */
 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
@@ -141,7 +147,7 @@
 	}
 
 	if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
-		return 2;
+		return has_full_48bit_ppgtt ? 3 : 2;
 	else
 		return has_aliasing_ppgtt ? 1 : 0;
 }
@@ -661,10 +667,10 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
+	intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry));
 	intel_ring_emit(ring, upper_32_bits(addr));
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
+	intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry));
 	intel_ring_emit(ring, lower_32_bits(addr));
 	intel_ring_advance(ring);
 
@@ -904,14 +910,13 @@
 	enum vgt_g2v_type msg;
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned int offset = vgtif_reg(pdp0_lo);
 	int i;
 
 	if (USES_FULL_48BIT_PPGTT(dev)) {
 		u64 daddr = px_dma(&ppgtt->pml4);
 
-		I915_WRITE(offset, lower_32_bits(daddr));
-		I915_WRITE(offset + 4, upper_32_bits(daddr));
+		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
 
 		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
 				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
@@ -919,10 +924,8 @@
 		for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
 			u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
 
-			I915_WRITE(offset, lower_32_bits(daddr));
-			I915_WRITE(offset + 4, upper_32_bits(daddr));
-
-			offset += 8;
+			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
+			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
 		}
 
 		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
@@ -1662,9 +1665,9 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
-	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
-	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
 	intel_ring_emit(ring, get_pd_offset(ppgtt));
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -1699,9 +1702,9 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
-	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
-	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
 	intel_ring_emit(ring, get_pd_offset(ppgtt));
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -2528,6 +2531,26 @@
 	return 0;
 }
 
+struct ggtt_bind_vma__cb {
+	struct i915_vma *vma;
+	enum i915_cache_level cache_level;
+	u32 flags;
+};
+
+static int ggtt_bind_vma__cb(void *_arg)
+{
+	struct ggtt_bind_vma__cb *arg = _arg;
+	return ggtt_bind_vma(arg->vma, arg->cache_level, arg->flags);
+}
+
+static int ggtt_bind_vma__BKL(struct i915_vma *vma,
+			      enum i915_cache_level cache_level,
+			      u32 flags)
+{
+	struct ggtt_bind_vma__cb arg = { vma, cache_level, flags };
+	return stop_machine(ggtt_bind_vma__cb, &arg, NULL);
+}
+
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
@@ -2995,6 +3018,9 @@
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
 
+	if (IS_CHERRYVIEW(dev))
+		dev_priv->gtt.base.bind_vma = ggtt_bind_vma__BKL;
+
 	return ret;
 }
 
@@ -3302,7 +3328,7 @@
 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
 			  struct drm_i915_gem_object *obj)
 {
-	struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
+	struct intel_rotation_info *rot_info = &ggtt_view->params.rotation_info;
 	unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
 	unsigned int size_pages_uv;
 	struct sg_page_iter sg_iter;
@@ -3534,7 +3560,7 @@
 	if (view->type == I915_GGTT_VIEW_NORMAL) {
 		return obj->base.size;
 	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
-		return view->rotation_info.size;
+		return view->params.rotation_info.size;
 	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
 		return view->params.partial.size << PAGE_SHIFT;
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a216397..877c32c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -156,13 +156,10 @@
 			u64 offset;
 			unsigned int size;
 		} partial;
+		struct intel_rotation_info rotation_info;
 	} params;
 
 	struct sg_table *pages;
-
-	union {
-		struct intel_rotation_info rotation_info;
-	};
 };
 
 extern const struct i915_ggtt_view i915_ggtt_view_normal;
@@ -556,7 +553,7 @@
 
 	if (a->type != b->type)
 		return false;
-	if (a->type == I915_GGTT_VIEW_PARTIAL)
+	if (a->type != I915_GGTT_VIEW_NORMAL)
 		return !memcmp(&a->params, &b->params, sizeof(a->params));
 	return true;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index cdacf3f..598ed2f 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -433,7 +433,8 @@
 					 &reserved_size);
 		break;
 	default:
-		if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+		if (IS_BROADWELL(dev_priv) ||
+		    IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev))
 			bdw_get_stolen_reserved(dev_priv, &reserved_base,
 						&reserved_size);
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 8a6717c..7410f6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -176,6 +176,8 @@
 		return -EINVAL;
 	}
 
+	intel_runtime_pm_get(dev_priv);
+
 	mutex_lock(&dev->struct_mutex);
 	if (obj->pin_display || obj->framebuffer_references) {
 		ret = -EBUSY;
@@ -269,6 +271,8 @@
 	drm_gem_object_unreference(&obj->base);
 	mutex_unlock(&dev->struct_mutex);
 
+	intel_runtime_pm_put(dev_priv);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2f04e4f..06ca408 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -366,6 +366,17 @@
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
 	err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device);
 	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
+
+	if (HAS_CSR(dev)) {
+		struct intel_csr *csr = &dev_priv->csr;
+
+		err_printf(m, "DMC loaded: %s\n",
+			   yesno(csr->dmc_payload != NULL));
+		err_printf(m, "DMC fw version: %d.%d\n",
+			   CSR_VERSION_MAJOR(csr->version),
+			   CSR_VERSION_MINOR(csr->version));
+	}
+
 	err_printf(m, "EIR: 0x%08x\n", error->eir);
 	err_printf(m, "IER: 0x%08x\n", error->ier);
 	if (INTEL_INFO(dev)->gen >= 8) {
@@ -862,7 +873,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		ering->rc_psmi = I915_READ(ring->mmio_base + 0x50);
+		ering->rc_psmi = I915_READ(RING_PSMI_CTL(ring->mmio_base));
 		ering->fault_reg = I915_READ(RING_FAULT_REG(ring));
 		if (INTEL_INFO(dev)->gen >= 8)
 			gen8_record_semaphore_state(dev_priv, error, ring, ering);
@@ -899,7 +910,7 @@
 	ering->ctl = I915_READ_CTL(ring);
 
 	if (I915_NEED_GFX_HWS(dev)) {
-		int mmio;
+		i915_reg_t mmio;
 
 		if (IS_GEN7(dev)) {
 			switch (ring->id) {
@@ -1071,6 +1082,25 @@
 		list_for_each_entry(request, &ring->request_list, list) {
 			struct drm_i915_error_request *erq;
 
+			if (count >= error->ring[i].num_requests) {
+				/*
+				 * If the ring request list was changed in
+				 * between the point where the error request
+				 * list was created and dimensioned and this
+				 * point then just exit early to avoid crashes.
+				 *
+				 * We don't need to communicate that the
+				 * request list changed state during error
+				 * state capture and that the error state is
+				 * slightly incorrect as a consequence since we
+				 * are typically only interested in the request
+				 * list state at the point of error state
+				 * capture, not in any changes happening during
+				 * the capture.
+				 */
+				break;
+			}
+
 			erq = &error->ring[i].requests[count++];
 			erq->seqno = request->seqno;
 			erq->jiffies = request->emitted_jiffies;
@@ -1181,7 +1211,7 @@
 	if (IS_VALLEYVIEW(dev)) {
 		error->gtier[0] = I915_READ(GTIER);
 		error->ier = I915_READ(VLV_IER);
-		error->forcewake = I915_READ(FORCEWAKE_VLV);
+		error->forcewake = I915_READ_FW(FORCEWAKE_VLV);
 	}
 
 	if (IS_GEN7(dev))
@@ -1193,14 +1223,14 @@
 	}
 
 	if (IS_GEN6(dev)) {
-		error->forcewake = I915_READ(FORCEWAKE);
+		error->forcewake = I915_READ_FW(FORCEWAKE);
 		error->gab_ctl = I915_READ(GAB_CTL);
 		error->gfx_mode = I915_READ(GFX_MODE);
 	}
 
 	/* 2: Registers which belong to multiple generations */
 	if (INTEL_INFO(dev)->gen >= 7)
-		error->forcewake = I915_READ(FORCEWAKE_MT);
+		error->forcewake = I915_READ_FW(FORCEWAKE_MT);
 
 	if (INTEL_INFO(dev)->gen >= 6) {
 		error->derrmr = I915_READ(DERRMR);
diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h
index c4cb1c0..685c799 100644
--- a/drivers/gpu/drm/i915/i915_guc_reg.h
+++ b/drivers/gpu/drm/i915/i915_guc_reg.h
@@ -26,7 +26,7 @@
 
 /* Definitions of GuC H/W registers, bits, etc */
 
-#define GUC_STATUS			0xc000
+#define GUC_STATUS			_MMIO(0xc000)
 #define   GS_BOOTROM_SHIFT		1
 #define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
 #define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
@@ -39,40 +39,41 @@
 #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
 #define   GS_MIA_CORE_STATE		  (1 << GS_MIA_SHIFT)
 
-#define SOFT_SCRATCH(n)			(0xc180 + ((n) * 4))
+#define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
 
-#define UOS_RSA_SCRATCH(i)		(0xc200 + (i) * 4)
-#define DMA_ADDR_0_LOW			0xc300
-#define DMA_ADDR_0_HIGH			0xc304
-#define DMA_ADDR_1_LOW			0xc308
-#define DMA_ADDR_1_HIGH			0xc30c
+#define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
+#define   UOS_RSA_SCRATCH_MAX_COUNT	  64
+#define DMA_ADDR_0_LOW			_MMIO(0xc300)
+#define DMA_ADDR_0_HIGH			_MMIO(0xc304)
+#define DMA_ADDR_1_LOW			_MMIO(0xc308)
+#define DMA_ADDR_1_HIGH			_MMIO(0xc30c)
 #define   DMA_ADDRESS_SPACE_WOPCM	  (7 << 16)
 #define   DMA_ADDRESS_SPACE_GTT		  (8 << 16)
-#define DMA_COPY_SIZE			0xc310
-#define DMA_CTRL			0xc314
+#define DMA_COPY_SIZE			_MMIO(0xc310)
+#define DMA_CTRL			_MMIO(0xc314)
 #define   UOS_MOVE			  (1<<4)
 #define   START_DMA			  (1<<0)
-#define DMA_GUC_WOPCM_OFFSET		0xc340
+#define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
 #define   GUC_WOPCM_OFFSET_VALUE	  0x80000	/* 512KB */
-#define GUC_MAX_IDLE_COUNT		0xC3E4
+#define GUC_MAX_IDLE_COUNT		_MMIO(0xC3E4)
 
-#define GUC_WOPCM_SIZE			0xc050
+#define GUC_WOPCM_SIZE			_MMIO(0xc050)
 #define   GUC_WOPCM_SIZE_VALUE  	  (0x80 << 12)	/* 512KB */
 
 /* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */
 #define	GUC_WOPCM_TOP			(GUC_WOPCM_SIZE_VALUE)
 
-#define GEN8_GT_PM_CONFIG		0x138140
-#define GEN9LP_GT_PM_CONFIG		0x138140
-#define GEN9_GT_PM_CONFIG		0x13816c
+#define GEN8_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9LP_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9_GT_PM_CONFIG		_MMIO(0x13816c)
 #define   GT_DOORBELL_ENABLE		  (1<<0)
 
-#define GEN8_GTCR			0x4274
+#define GEN8_GTCR			_MMIO(0x4274)
 #define   GEN8_GTCR_INVALIDATE		  (1<<0)
 
-#define GUC_ARAT_C6DIS			0xA178
+#define GUC_ARAT_C6DIS			_MMIO(0xA178)
 
-#define GUC_SHIM_CONTROL		0xc064
+#define GUC_SHIM_CONTROL		_MMIO(0xc064)
 #define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	(1<<0)
 #define   GUC_ENABLE_READ_CACHE_LOGIC		(1<<1)
 #define   GUC_ENABLE_MIA_CACHING		(1<<2)
@@ -89,21 +90,21 @@
 				 GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	| \
 				 GUC_ENABLE_MIA_CLOCK_GATING)
 
-#define HOST2GUC_INTERRUPT		0xc4c8
+#define HOST2GUC_INTERRUPT		_MMIO(0xc4c8)
 #define   HOST2GUC_TRIGGER		  (1<<0)
 
 #define DRBMISC1			0x1984
 #define   DOORBELL_ENABLE		  (1<<0)
 
-#define GEN8_DRBREGL(x)			(0x1000 + (x) * 8)
+#define GEN8_DRBREGL(x)			_MMIO(0x1000 + (x) * 8)
 #define   GEN8_DRB_VALID		  (1<<0)
-#define GEN8_DRBREGU(x)			(GEN8_DRBREGL(x) + 4)
+#define GEN8_DRBREGU(x)			_MMIO(0x1000 + (x) * 8 + 4)
 
-#define DE_GUCRMR			0x44054
+#define DE_GUCRMR			_MMIO(0x44054)
 
-#define GUC_BCS_RCS_IER			0xC550
-#define GUC_VCS2_VCS1_IER		0xC554
-#define GUC_WD_VECS_IER			0xC558
-#define GUC_PM_P24C_IER			0xC55C
+#define GUC_BCS_RCS_IER			_MMIO(0xC550)
+#define GUC_VCS2_VCS1_IER		_MMIO(0xC554)
+#define GUC_WD_VECS_IER			_MMIO(0xC558)
+#define GUC_PM_P24C_IER			_MMIO(0xC55C)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 036b42b..ed9f100 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -27,7 +27,7 @@
 #include "intel_guc.h"
 
 /**
- * DOC: GuC Client
+ * DOC: GuC-based command submission
  *
  * i915_guc_client:
  * We use the term client to avoid confusion with contexts. A i915_guc_client is
@@ -161,9 +161,9 @@
 	data[0] = HOST2GUC_ACTION_SAMPLE_FORCEWAKE;
 	/* WaRsDisableCoarsePowerGating:skl,bxt */
 	if (!intel_enable_rc6(dev_priv->dev) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
-	    (IS_SKL_GT3(dev) && (INTEL_REVID(dev) <= SKL_REVID_E0)) ||
-	    (IS_SKL_GT4(dev) && (INTEL_REVID(dev) <= SKL_REVID_E0)))
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1) ||
+	    (IS_SKL_GT3(dev) && IS_SKL_REVID(dev, 0, SKL_REVID_E0)) ||
+	    (IS_SKL_GT4(dev) && IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
 		data[1] = 0;
 	else
 		/* bit 0 and 1 are for Render and Media domain separately */
@@ -258,7 +258,7 @@
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct guc_doorbell_info *doorbell;
 	void *base;
-	int drbreg = GEN8_DRBREGL(client->doorbell_id);
+	i915_reg_t drbreg = GEN8_DRBREGL(client->doorbell_id);
 	int value;
 
 	base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
@@ -588,8 +588,7 @@
 /**
  * i915_guc_submit() - Submit commands through GuC
  * @client:	the guc client where commands will go through
- * @ctx:	LRC where commands come from
- * @ring:	HW engine that will excute the commands
+ * @rq:		request associated with the commands
  *
  * Return:	0 if succeed
  */
@@ -731,7 +730,8 @@
  * 		The kernel client to replace ExecList submission is created with
  * 		NORMAL priority. Priority of a client for scheduler can be HIGH,
  * 		while a preemption context can use CRITICAL.
- * @ctx		the context to own the client (we use the default render context)
+ * @ctx:	the context that owns the client (we use the default render
+ * 		context)
  *
  * Return:	An i915_guc_client object if success.
  */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0d228f9..c8ba949 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -139,7 +139,8 @@
 /*
  * We should clear IMR at preinstall/uninstall, and just check at postinstall.
  */
-static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv, u32 reg)
+static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv,
+				    i915_reg_t reg)
 {
 	u32 val = I915_READ(reg);
 
@@ -147,7 +148,7 @@
 		return;
 
 	WARN(1, "Interrupt register 0x%x is not zero: 0x%08x\n",
-	     reg, val);
+	     i915_mmio_reg_offset(reg), val);
 	I915_WRITE(reg, 0xffffffff);
 	POSTING_READ(reg);
 	I915_WRITE(reg, 0xffffffff);
@@ -283,17 +284,17 @@
 	ilk_update_gt_irq(dev_priv, mask, 0);
 }
 
-static u32 gen6_pm_iir(struct drm_i915_private *dev_priv)
+static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
 {
 	return INTEL_INFO(dev_priv)->gen >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
 }
 
-static u32 gen6_pm_imr(struct drm_i915_private *dev_priv)
+static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
 {
 	return INTEL_INFO(dev_priv)->gen >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
 }
 
-static u32 gen6_pm_ier(struct drm_i915_private *dev_priv)
+static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
 {
 	return INTEL_INFO(dev_priv)->gen >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
 }
@@ -350,7 +351,7 @@
 void gen6_reset_rps_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t reg = gen6_pm_iir(dev_priv);
+	i915_reg_t reg = gen6_pm_iir(dev_priv);
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	I915_WRITE(reg, dev_priv->pm_rps_events);
@@ -477,7 +478,7 @@
 __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
 		       u32 enable_mask, u32 status_mask)
 {
-	u32 reg = PIPESTAT(pipe);
+	i915_reg_t reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK;
 
 	assert_spin_locked(&dev_priv->irq_lock);
@@ -504,7 +505,7 @@
 __i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
 		        u32 enable_mask, u32 status_mask)
 {
-	u32 reg = PIPESTAT(pipe);
+	i915_reg_t reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK;
 
 	assert_spin_locked(&dev_priv->irq_lock);
@@ -665,8 +666,7 @@
 static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long high_frame;
-	unsigned long low_frame;
+	i915_reg_t high_frame, low_frame;
 	u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal;
 	struct intel_crtc *intel_crtc =
 		to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
@@ -717,9 +717,7 @@
 	return I915_READ(PIPE_FRMCOUNT_G4X(pipe));
 }
 
-/* raw reads, only for fast reads of display block, no need for forcewake etc. */
-#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
-
+/* I915_READ_FW, only for fast reads of display block, no need for forcewake etc. */
 static int __intel_get_crtc_scanline(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -733,9 +731,9 @@
 		vtotal /= 2;
 
 	if (IS_GEN2(dev))
-		position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN2;
+		position = I915_READ_FW(PIPEDSL(pipe)) & DSL_LINEMASK_GEN2;
 	else
-		position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN3;
+		position = I915_READ_FW(PIPEDSL(pipe)) & DSL_LINEMASK_GEN3;
 
 	/*
 	 * On HSW, the DSL reg (0x70000) appears to return 0 if we
@@ -827,7 +825,7 @@
 		 * We can split this into vertical and horizontal
 		 * scanout position.
 		 */
-		position = (__raw_i915_read32(dev_priv, PIPEFRAMEPIXEL(pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT;
+		position = (I915_READ_FW(PIPEFRAMEPIXEL(pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT;
 
 		/* convert to pixel counts */
 		vbl_start *= htotal;
@@ -1188,7 +1186,7 @@
 	POSTING_READ(GEN7_MISCCPCTL);
 
 	while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) {
-		u32 reg;
+		i915_reg_t reg;
 
 		slice--;
 		if (WARN_ON_ONCE(slice >= NUM_L3_SLICES(dev_priv->dev)))
@@ -1196,7 +1194,7 @@
 
 		dev_priv->l3_parity.which_slice &= ~(1<<slice);
 
-		reg = GEN7_L3CDERRST1 + (slice * 0x200);
+		reg = GEN7_L3CDERRST1(slice);
 
 		error_status = I915_READ(reg);
 		row = GEN7_PARITY_ERROR_ROW(error_status);
@@ -1290,70 +1288,69 @@
 		ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
+static __always_inline void
+gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir, int test_shift)
+{
+	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
+		notify_ring(ring);
+	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
+		intel_lrc_irq_handler(ring);
+}
+
 static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
 				       u32 master_ctl)
 {
 	irqreturn_t ret = IRQ_NONE;
 
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(0));
-		if (tmp) {
-			I915_WRITE_FW(GEN8_GT_IIR(0), tmp);
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(0));
+		if (iir) {
+			I915_WRITE_FW(GEN8_GT_IIR(0), iir);
 			ret = IRQ_HANDLED;
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[RCS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[RCS]);
+			gen8_cs_irq_handler(&dev_priv->ring[RCS],
+					iir, GEN8_RCS_IRQ_SHIFT);
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[BCS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[BCS]);
+			gen8_cs_irq_handler(&dev_priv->ring[BCS],
+					iir, GEN8_BCS_IRQ_SHIFT);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT0)!\n");
 	}
 
 	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(1));
-		if (tmp) {
-			I915_WRITE_FW(GEN8_GT_IIR(1), tmp);
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(1));
+		if (iir) {
+			I915_WRITE_FW(GEN8_GT_IIR(1), iir);
 			ret = IRQ_HANDLED;
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[VCS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[VCS]);
+			gen8_cs_irq_handler(&dev_priv->ring[VCS],
+					iir, GEN8_VCS1_IRQ_SHIFT);
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[VCS2]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[VCS2]);
+			gen8_cs_irq_handler(&dev_priv->ring[VCS2],
+					iir, GEN8_VCS2_IRQ_SHIFT);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT1)!\n");
 	}
 
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(3));
-		if (tmp) {
-			I915_WRITE_FW(GEN8_GT_IIR(3), tmp);
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(3));
+		if (iir) {
+			I915_WRITE_FW(GEN8_GT_IIR(3), iir);
 			ret = IRQ_HANDLED;
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[VECS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[VECS]);
+			gen8_cs_irq_handler(&dev_priv->ring[VECS],
+					iir, GEN8_VECS_IRQ_SHIFT);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT3)!\n");
 	}
 
 	if (master_ctl & GEN8_GT_PM_IRQ) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(2));
-		if (tmp & dev_priv->pm_rps_events) {
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(2));
+		if (iir & dev_priv->pm_rps_events) {
 			I915_WRITE_FW(GEN8_GT_IIR(2),
-				      tmp & dev_priv->pm_rps_events);
+				      iir & dev_priv->pm_rps_events);
 			ret = IRQ_HANDLED;
-			gen6_rps_irq_handler(dev_priv, tmp);
+			gen6_rps_irq_handler(dev_priv, iir);
 		} else
 			DRM_ERROR("The master control interrupt lied (PM)!\n");
 	}
@@ -1625,7 +1622,7 @@
 
 	spin_lock(&dev_priv->irq_lock);
 	for_each_pipe(dev_priv, pipe) {
-		int reg;
+		i915_reg_t reg;
 		u32 mask, iir_bit = 0;
 
 		/*
@@ -2354,9 +2351,13 @@
 				spt_irq_handler(dev, pch_iir);
 			else
 				cpt_irq_handler(dev, pch_iir);
-		} else
-			DRM_ERROR("The master control interrupt lied (SDE)!\n");
-
+		} else {
+			/*
+			 * Like on previous PCH there seems to be something
+			 * fishy going on with forwarding PCH interrupts.
+			 */
+			DRM_DEBUG_DRIVER("The master control interrupt lied (SDE)!\n");
+		}
 	}
 
 	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
@@ -3869,7 +3870,7 @@
 			DRM_DEBUG("Command parser error, iir 0x%08x\n", iir);
 
 		for_each_pipe(dev_priv, pipe) {
-			int reg = PIPESTAT(pipe);
+			i915_reg_t reg = PIPESTAT(pipe);
 			pipe_stats[pipe] = I915_READ(reg);
 
 			/*
@@ -4050,7 +4051,7 @@
 			DRM_DEBUG("Command parser error, iir 0x%08x\n", iir);
 
 		for_each_pipe(dev_priv, pipe) {
-			int reg = PIPESTAT(pipe);
+			i915_reg_t reg = PIPESTAT(pipe);
 			pipe_stats[pipe] = I915_READ(reg);
 
 			/* Clear the PIPE*STAT regs before the IIR */
@@ -4272,7 +4273,7 @@
 			DRM_DEBUG("Command parser error, iir 0x%08x\n", iir);
 
 		for_each_pipe(dev_priv, pipe) {
-			int reg = PIPESTAT(pipe);
+			i915_reg_t reg = PIPESTAT(pipe);
 			pipe_stats[pipe] = I915_READ(reg);
 
 			/*
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 4be13a5..835d609 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -32,6 +32,7 @@
 	.panel_use_ssc = -1,
 	.vbt_sdvo_panel_type = -1,
 	.enable_rc6 = -1,
+	.enable_dc = -1,
 	.enable_fbc = -1,
 	.enable_execlists = -1,
 	.enable_hangcheck = true,
@@ -80,6 +81,11 @@
 	"For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. "
 	"default: -1 (use per-chip default)");
 
+module_param_named_unsafe(enable_dc, i915.enable_dc, int, 0400);
+MODULE_PARM_DESC(enable_dc,
+	"Enable power-saving display C-states. "
+	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)");
+
 module_param_named_unsafe(enable_fbc, i915.enable_fbc, int, 0600);
 MODULE_PARM_DESC(enable_fbc,
 	"Enable frame buffer compression for power savings "
@@ -112,7 +118,7 @@
 module_param_named_unsafe(enable_ppgtt, i915.enable_ppgtt, int, 0400);
 MODULE_PARM_DESC(enable_ppgtt,
 	"Override PPGTT usage. "
-	"(-1=auto [default], 0=disabled, 1=aliasing, 2=full)");
+	"(-1=auto [default], 0=disabled, 1=aliasing, 2=full, 3=full with extended address space)");
 
 module_param_named_unsafe(enable_execlists, i915.enable_execlists, int, 0400);
 MODULE_PARM_DESC(enable_execlists,
@@ -126,7 +132,7 @@
 MODULE_PARM_DESC(preliminary_hw_support,
 	"Enable preliminary hardware support.");
 
-module_param_named_unsafe(disable_power_well, i915.disable_power_well, int, 0600);
+module_param_named_unsafe(disable_power_well, i915.disable_power_well, int, 0400);
 MODULE_PARM_DESC(disable_power_well,
 	"Disable display power wells when possible "
 	"(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)");
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bc7b8fa..1a12d44 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -25,14 +25,43 @@
 #ifndef _I915_REG_H_
 #define _I915_REG_H_
 
+typedef struct {
+	uint32_t reg;
+} i915_reg_t;
+
+#define _MMIO(r) ((const i915_reg_t){ .reg = (r) })
+
+#define INVALID_MMIO_REG _MMIO(0)
+
+static inline uint32_t i915_mmio_reg_offset(i915_reg_t reg)
+{
+	return reg.reg;
+}
+
+static inline bool i915_mmio_reg_equal(i915_reg_t a, i915_reg_t b)
+{
+	return i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b);
+}
+
+static inline bool i915_mmio_reg_valid(i915_reg_t reg)
+{
+	return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
+}
+
 #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a)))
+#define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b))
 #define _PLANE(plane, a, b) _PIPE(plane, a, b)
-#define _TRANSCODER(tran, a, b) ((a) + (tran)*((b)-(a)))
+#define _MMIO_PLANE(plane, a, b) _MMIO_PIPE(plane, a, b)
+#define _TRANS(tran, a, b) ((a) + (tran)*((b)-(a)))
+#define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b))
 #define _PORT(port, a, b) ((a) + (port)*((b)-(a)))
+#define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b))
 #define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \
 			       (pipe) == PIPE_B ? (b) : (c))
+#define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c))
 #define _PORT3(port, a, b, c) ((port) == PORT_A ? (a) : \
 			       (port) == PORT_B ? (b) : (c))
+#define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c))
 
 #define _MASKED_FIELD(mask, value) ({					   \
 	if (__builtin_constant_p(mask))					   \
@@ -105,14 +134,14 @@
 #define  GRDOM_RESET_STATUS (1<<1)
 #define  GRDOM_RESET_ENABLE (1<<0)
 
-#define ILK_GDSR (MCHBAR_MIRROR_BASE + 0x2ca4)
+#define ILK_GDSR _MMIO(MCHBAR_MIRROR_BASE + 0x2ca4)
 #define  ILK_GRDOM_FULL		(0<<1)
 #define  ILK_GRDOM_RENDER	(1<<1)
 #define  ILK_GRDOM_MEDIA	(3<<1)
 #define  ILK_GRDOM_MASK		(3<<1)
 #define  ILK_GRDOM_RESET_ENABLE (1<<0)
 
-#define GEN6_MBCUNIT_SNPCR	0x900c /* for LLC config */
+#define GEN6_MBCUNIT_SNPCR	_MMIO(0x900c) /* for LLC config */
 #define   GEN6_MBC_SNPCR_SHIFT	21
 #define   GEN6_MBC_SNPCR_MASK	(3<<21)
 #define   GEN6_MBC_SNPCR_MAX	(0<<21)
@@ -120,31 +149,31 @@
 #define   GEN6_MBC_SNPCR_LOW	(2<<21)
 #define   GEN6_MBC_SNPCR_MIN	(3<<21) /* only 1/16th of the cache is shared */
 
-#define VLV_G3DCTL		0x9024
-#define VLV_GSCKGCTL		0x9028
+#define VLV_G3DCTL		_MMIO(0x9024)
+#define VLV_GSCKGCTL		_MMIO(0x9028)
 
-#define GEN6_MBCTL		0x0907c
+#define GEN6_MBCTL		_MMIO(0x0907c)
 #define   GEN6_MBCTL_ENABLE_BOOT_FETCH	(1 << 4)
 #define   GEN6_MBCTL_CTX_FETCH_NEEDED	(1 << 3)
 #define   GEN6_MBCTL_BME_UPDATE_ENABLE	(1 << 2)
 #define   GEN6_MBCTL_MAE_UPDATE_ENABLE	(1 << 1)
 #define   GEN6_MBCTL_BOOT_FETCH_MECH	(1 << 0)
 
-#define GEN6_GDRST	0x941c
+#define GEN6_GDRST	_MMIO(0x941c)
 #define  GEN6_GRDOM_FULL		(1 << 0)
 #define  GEN6_GRDOM_RENDER		(1 << 1)
 #define  GEN6_GRDOM_MEDIA		(1 << 2)
 #define  GEN6_GRDOM_BLT			(1 << 3)
 
-#define RING_PP_DIR_BASE(ring)		((ring)->mmio_base+0x228)
-#define RING_PP_DIR_BASE_READ(ring)	((ring)->mmio_base+0x518)
-#define RING_PP_DIR_DCLV(ring)		((ring)->mmio_base+0x220)
+#define RING_PP_DIR_BASE(ring)		_MMIO((ring)->mmio_base+0x228)
+#define RING_PP_DIR_BASE_READ(ring)	_MMIO((ring)->mmio_base+0x518)
+#define RING_PP_DIR_DCLV(ring)		_MMIO((ring)->mmio_base+0x220)
 #define   PP_DIR_DCLV_2G		0xffffffff
 
-#define GEN8_RING_PDP_UDW(ring, n)	((ring)->mmio_base+0x270 + ((n) * 8 + 4))
-#define GEN8_RING_PDP_LDW(ring, n)	((ring)->mmio_base+0x270 + (n) * 8)
+#define GEN8_RING_PDP_UDW(ring, n)	_MMIO((ring)->mmio_base+0x270 + (n) * 8 + 4)
+#define GEN8_RING_PDP_LDW(ring, n)	_MMIO((ring)->mmio_base+0x270 + (n) * 8)
 
-#define GEN8_R_PWR_CLK_STATE		0x20C8
+#define GEN8_R_PWR_CLK_STATE		_MMIO(0x20C8)
 #define   GEN8_RPCS_ENABLE		(1 << 31)
 #define   GEN8_RPCS_S_CNT_ENABLE	(1 << 18)
 #define   GEN8_RPCS_S_CNT_SHIFT		15
@@ -157,7 +186,7 @@
 #define   GEN8_RPCS_EU_MIN_SHIFT	0
 #define   GEN8_RPCS_EU_MIN_MASK		(0xf << GEN8_RPCS_EU_MIN_SHIFT)
 
-#define GAM_ECOCHK			0x4090
+#define GAM_ECOCHK			_MMIO(0x4090)
 #define   BDW_DISABLE_HDC_INVALIDATION	(1<<25)
 #define   ECOCHK_SNB_BIT		(1<<10)
 #define   ECOCHK_DIS_TLB		(1<<8)
@@ -170,15 +199,15 @@
 #define   ECOCHK_PPGTT_WT_HSW		(0x2<<3)
 #define   ECOCHK_PPGTT_WB_HSW		(0x3<<3)
 
-#define GAC_ECO_BITS			0x14090
+#define GAC_ECO_BITS			_MMIO(0x14090)
 #define   ECOBITS_SNB_BIT		(1<<13)
 #define   ECOBITS_PPGTT_CACHE64B	(3<<8)
 #define   ECOBITS_PPGTT_CACHE4B		(0<<8)
 
-#define GAB_CTL				0x24000
+#define GAB_CTL				_MMIO(0x24000)
 #define   GAB_CTL_CONT_AFTER_PAGEFAULT	(1<<8)
 
-#define GEN6_STOLEN_RESERVED		0x1082C0
+#define GEN6_STOLEN_RESERVED		_MMIO(0x1082C0)
 #define GEN6_STOLEN_RESERVED_ADDR_MASK	(0xFFF << 20)
 #define GEN7_STOLEN_RESERVED_ADDR_MASK	(0x3FFF << 18)
 #define GEN6_STOLEN_RESERVED_SIZE_MASK	(3 << 4)
@@ -200,6 +229,7 @@
 #define VGA_ST01_MDA 0x3ba
 #define VGA_ST01_CGA 0x3da
 
+#define _VGA_MSR_WRITE _MMIO(0x3c2)
 #define VGA_MSR_WRITE 0x3c2
 #define VGA_MSR_READ 0x3cc
 #define   VGA_MSR_MEM_EN (1<<1)
@@ -377,10 +407,12 @@
 #define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
 #define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
-#define MI_PREDICATE_SRC0	(0x2400)
-#define MI_PREDICATE_SRC1	(0x2408)
+#define MI_PREDICATE_SRC0	_MMIO(0x2400)
+#define MI_PREDICATE_SRC0_UDW	_MMIO(0x2400 + 4)
+#define MI_PREDICATE_SRC1	_MMIO(0x2408)
+#define MI_PREDICATE_SRC1_UDW	_MMIO(0x2408 + 4)
 
-#define MI_PREDICATE_RESULT_2	(0x2214)
+#define MI_PREDICATE_RESULT_2	_MMIO(0x2214)
 #define  LOWER_SLICE_ENABLED	(1<<0)
 #define  LOWER_SLICE_DISABLED	(0<<0)
 
@@ -509,49 +541,61 @@
 /*
  * Registers used only by the command parser
  */
-#define BCS_SWCTRL 0x22200
+#define BCS_SWCTRL _MMIO(0x22200)
 
-#define GPGPU_THREADS_DISPATCHED        0x2290
-#define HS_INVOCATION_COUNT             0x2300
-#define DS_INVOCATION_COUNT             0x2308
-#define IA_VERTICES_COUNT               0x2310
-#define IA_PRIMITIVES_COUNT             0x2318
-#define VS_INVOCATION_COUNT             0x2320
-#define GS_INVOCATION_COUNT             0x2328
-#define GS_PRIMITIVES_COUNT             0x2330
-#define CL_INVOCATION_COUNT             0x2338
-#define CL_PRIMITIVES_COUNT             0x2340
-#define PS_INVOCATION_COUNT             0x2348
-#define PS_DEPTH_COUNT                  0x2350
+#define GPGPU_THREADS_DISPATCHED        _MMIO(0x2290)
+#define GPGPU_THREADS_DISPATCHED_UDW	_MMIO(0x2290 + 4)
+#define HS_INVOCATION_COUNT             _MMIO(0x2300)
+#define HS_INVOCATION_COUNT_UDW		_MMIO(0x2300 + 4)
+#define DS_INVOCATION_COUNT             _MMIO(0x2308)
+#define DS_INVOCATION_COUNT_UDW		_MMIO(0x2308 + 4)
+#define IA_VERTICES_COUNT               _MMIO(0x2310)
+#define IA_VERTICES_COUNT_UDW		_MMIO(0x2310 + 4)
+#define IA_PRIMITIVES_COUNT             _MMIO(0x2318)
+#define IA_PRIMITIVES_COUNT_UDW		_MMIO(0x2318 + 4)
+#define VS_INVOCATION_COUNT             _MMIO(0x2320)
+#define VS_INVOCATION_COUNT_UDW		_MMIO(0x2320 + 4)
+#define GS_INVOCATION_COUNT             _MMIO(0x2328)
+#define GS_INVOCATION_COUNT_UDW		_MMIO(0x2328 + 4)
+#define GS_PRIMITIVES_COUNT             _MMIO(0x2330)
+#define GS_PRIMITIVES_COUNT_UDW		_MMIO(0x2330 + 4)
+#define CL_INVOCATION_COUNT             _MMIO(0x2338)
+#define CL_INVOCATION_COUNT_UDW		_MMIO(0x2338 + 4)
+#define CL_PRIMITIVES_COUNT             _MMIO(0x2340)
+#define CL_PRIMITIVES_COUNT_UDW		_MMIO(0x2340 + 4)
+#define PS_INVOCATION_COUNT             _MMIO(0x2348)
+#define PS_INVOCATION_COUNT_UDW		_MMIO(0x2348 + 4)
+#define PS_DEPTH_COUNT                  _MMIO(0x2350)
+#define PS_DEPTH_COUNT_UDW		_MMIO(0x2350 + 4)
 
 /* There are the 4 64-bit counter registers, one for each stream output */
-#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n)		_MMIO(0x5200 + (n) * 8)
+#define GEN7_SO_NUM_PRIMS_WRITTEN_UDW(n)	_MMIO(0x5200 + (n) * 8 + 4)
 
-#define GEN7_SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n)		_MMIO(0x5240 + (n) * 8)
+#define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n)	_MMIO(0x5240 + (n) * 8 + 4)
 
-#define GEN7_3DPRIM_END_OFFSET          0x2420
-#define GEN7_3DPRIM_START_VERTEX        0x2430
-#define GEN7_3DPRIM_VERTEX_COUNT        0x2434
-#define GEN7_3DPRIM_INSTANCE_COUNT      0x2438
-#define GEN7_3DPRIM_START_INSTANCE      0x243C
-#define GEN7_3DPRIM_BASE_VERTEX         0x2440
+#define GEN7_3DPRIM_END_OFFSET          _MMIO(0x2420)
+#define GEN7_3DPRIM_START_VERTEX        _MMIO(0x2430)
+#define GEN7_3DPRIM_VERTEX_COUNT        _MMIO(0x2434)
+#define GEN7_3DPRIM_INSTANCE_COUNT      _MMIO(0x2438)
+#define GEN7_3DPRIM_START_INSTANCE      _MMIO(0x243C)
+#define GEN7_3DPRIM_BASE_VERTEX         _MMIO(0x2440)
 
-#define GEN7_GPGPU_DISPATCHDIMX         0x2500
-#define GEN7_GPGPU_DISPATCHDIMY         0x2504
-#define GEN7_GPGPU_DISPATCHDIMZ         0x2508
+#define GEN7_GPGPU_DISPATCHDIMX         _MMIO(0x2500)
+#define GEN7_GPGPU_DISPATCHDIMY         _MMIO(0x2504)
+#define GEN7_GPGPU_DISPATCHDIMZ         _MMIO(0x2508)
 
-#define OACONTROL 0x2360
+#define OACONTROL _MMIO(0x2360)
 
 #define _GEN7_PIPEA_DE_LOAD_SL	0x70068
 #define _GEN7_PIPEB_DE_LOAD_SL	0x71068
-#define GEN7_PIPE_DE_LOAD_SL(pipe) _PIPE(pipe, \
-					 _GEN7_PIPEA_DE_LOAD_SL, \
-					 _GEN7_PIPEB_DE_LOAD_SL)
+#define GEN7_PIPE_DE_LOAD_SL(pipe) _MMIO_PIPE(pipe, _GEN7_PIPEA_DE_LOAD_SL, _GEN7_PIPEB_DE_LOAD_SL)
 
 /*
  * Reset registers
  */
-#define DEBUG_RESET_I830		0x6070
+#define DEBUG_RESET_I830		_MMIO(0x6070)
 #define  DEBUG_RESET_FULL		(1<<7)
 #define  DEBUG_RESET_RENDER		(1<<8)
 #define  DEBUG_RESET_DISPLAY		(1<<9)
@@ -559,7 +603,7 @@
 /*
  * IOSF sideband
  */
-#define VLV_IOSF_DOORBELL_REQ			(VLV_DISPLAY_BASE + 0x2100)
+#define VLV_IOSF_DOORBELL_REQ			_MMIO(VLV_DISPLAY_BASE + 0x2100)
 #define   IOSF_DEVFN_SHIFT			24
 #define   IOSF_OPCODE_SHIFT			16
 #define   IOSF_PORT_SHIFT			8
@@ -576,8 +620,8 @@
 #define   IOSF_PORT_CCU				0xA9
 #define   IOSF_PORT_GPS_CORE			0x48
 #define   IOSF_PORT_FLISDSI			0x1B
-#define VLV_IOSF_DATA				(VLV_DISPLAY_BASE + 0x2104)
-#define VLV_IOSF_ADDR				(VLV_DISPLAY_BASE + 0x2108)
+#define VLV_IOSF_DATA				_MMIO(VLV_DISPLAY_BASE + 0x2104)
+#define VLV_IOSF_ADDR				_MMIO(VLV_DISPLAY_BASE + 0x2108)
 
 /* See configdb bunit SB addr map */
 #define BUNIT_REG_BISOC				0x11
@@ -609,6 +653,7 @@
 
 /* See the PUNIT HAS v0.8 for the below bits */
 enum punit_power_well {
+	/* These numbers are fixed and must match the position of the pw bits */
 	PUNIT_POWER_WELL_RENDER			= 0,
 	PUNIT_POWER_WELL_MEDIA			= 1,
 	PUNIT_POWER_WELL_DISP2D			= 3,
@@ -621,10 +666,12 @@
 	PUNIT_POWER_WELL_DPIO_RX1		= 11,
 	PUNIT_POWER_WELL_DPIO_CMN_D		= 12,
 
-	PUNIT_POWER_WELL_NUM,
+	/* Not actual bit groups. Used as IDs for lookup_power_well() */
+	PUNIT_POWER_WELL_ALWAYS_ON,
 };
 
 enum skl_disp_power_wells {
+	/* These numbers are fixed and must match the position of the pw bits */
 	SKL_DISP_PW_MISC_IO,
 	SKL_DISP_PW_DDI_A_E,
 	SKL_DISP_PW_DDI_B,
@@ -632,6 +679,10 @@
 	SKL_DISP_PW_DDI_D,
 	SKL_DISP_PW_1 = 14,
 	SKL_DISP_PW_2,
+
+	/* Not actual bit groups. Used as IDs for lookup_power_well() */
+	SKL_DISP_PW_ALWAYS_ON,
+	SKL_DISP_PW_DC_OFF,
 };
 
 #define SKL_POWER_WELL_STATE(pw) (1 << ((pw) * 2))
@@ -832,7 +883,7 @@
  */
 #define DPIO_DEVFN			0
 
-#define DPIO_CTL			(VLV_DISPLAY_BASE + 0x2110)
+#define DPIO_CTL			_MMIO(VLV_DISPLAY_BASE + 0x2110)
 #define  DPIO_MODSEL1			(1<<3) /* if ref clk b == 27 */
 #define  DPIO_MODSEL0			(1<<2) /* if ref clk a == 27 */
 #define  DPIO_SFR_BYPASS		(1<<1)
@@ -1185,9 +1236,9 @@
 #define   DPIO_UPAR_SHIFT		30
 
 /* BXT PHY registers */
-#define _BXT_PHY(phy, a, b)		_PIPE((phy), (a), (b))
+#define _BXT_PHY(phy, a, b)		_MMIO_PIPE((phy), (a), (b))
 
-#define BXT_P_CR_GT_DISP_PWRON		0x138090
+#define BXT_P_CR_GT_DISP_PWRON		_MMIO(0x138090)
 #define   GT_DISPLAY_POWER_ON(phy)	(1 << (phy))
 
 #define _PHY_CTL_FAMILY_EDP		0x64C80
@@ -1203,7 +1254,7 @@
 #define   PORT_PLL_ENABLE		(1 << 31)
 #define   PORT_PLL_LOCK			(1 << 30)
 #define   PORT_PLL_REF_SEL		(1 << 27)
-#define BXT_PORT_PLL_ENABLE(port)	_PORT(port, _PORT_PLL_A, _PORT_PLL_B)
+#define BXT_PORT_PLL_ENABLE(port)	_MMIO_PORT(port, _PORT_PLL_A, _PORT_PLL_B)
 
 #define _PORT_PLL_EBB_0_A		0x162034
 #define _PORT_PLL_EBB_0_B		0x6C034
@@ -1214,7 +1265,7 @@
 #define   PORT_PLL_P2_SHIFT		8
 #define   PORT_PLL_P2_MASK		(0x1f << PORT_PLL_P2_SHIFT)
 #define   PORT_PLL_P2(x)		((x)  << PORT_PLL_P2_SHIFT)
-#define BXT_PORT_PLL_EBB_0(port)	_PORT3(port, _PORT_PLL_EBB_0_A, \
+#define BXT_PORT_PLL_EBB_0(port)	_MMIO_PORT3(port, _PORT_PLL_EBB_0_A, \
 						_PORT_PLL_EBB_0_B,	\
 						_PORT_PLL_EBB_0_C)
 
@@ -1223,7 +1274,7 @@
 #define _PORT_PLL_EBB_4_C		0x6C344
 #define   PORT_PLL_10BIT_CLK_ENABLE	(1 << 13)
 #define   PORT_PLL_RECALIBRATE		(1 << 14)
-#define BXT_PORT_PLL_EBB_4(port)	_PORT3(port, _PORT_PLL_EBB_4_A, \
+#define BXT_PORT_PLL_EBB_4(port)	_MMIO_PORT3(port, _PORT_PLL_EBB_4_A, \
 						_PORT_PLL_EBB_4_B,	\
 						_PORT_PLL_EBB_4_C)
 
@@ -1259,7 +1310,7 @@
 #define _PORT_PLL_BASE(port)		_PORT3(port, _PORT_PLL_0_A,	\
 						_PORT_PLL_0_B,		\
 						_PORT_PLL_0_C)
-#define BXT_PORT_PLL(port, idx)		(_PORT_PLL_BASE(port) + (idx) * 4)
+#define BXT_PORT_PLL(port, idx)		_MMIO(_PORT_PLL_BASE(port) + (idx) * 4)
 
 /* BXT PHY common lane registers */
 #define _PORT_CL1CM_DW0_A		0x162000
@@ -1297,7 +1348,7 @@
 							_PORT_CL1CM_DW30_A)
 
 /* Defined for PHY0 only */
-#define BXT_PORT_CL2CM_DW6_BC		0x6C358
+#define BXT_PORT_CL2CM_DW6_BC		_MMIO(0x6C358)
 #define   DW6_OLDO_DYN_PWR_DOWN_EN	(1 << 28)
 
 /* BXT PHY Ref registers */
@@ -1337,10 +1388,10 @@
 #define _PORT_PCS_DW10_GRP_A		0x162C28
 #define _PORT_PCS_DW10_GRP_B		0x6CC28
 #define _PORT_PCS_DW10_GRP_C		0x6CE28
-#define BXT_PORT_PCS_DW10_LN01(port)	_PORT3(port, _PORT_PCS_DW10_LN01_A, \
+#define BXT_PORT_PCS_DW10_LN01(port)	_MMIO_PORT3(port, _PORT_PCS_DW10_LN01_A, \
 						     _PORT_PCS_DW10_LN01_B, \
 						     _PORT_PCS_DW10_LN01_C)
-#define BXT_PORT_PCS_DW10_GRP(port)	_PORT3(port, _PORT_PCS_DW10_GRP_A,  \
+#define BXT_PORT_PCS_DW10_GRP(port)	_MMIO_PORT3(port, _PORT_PCS_DW10_GRP_A,  \
 						     _PORT_PCS_DW10_GRP_B,  \
 						     _PORT_PCS_DW10_GRP_C)
 #define   TX2_SWING_CALC_INIT		(1 << 31)
@@ -1357,13 +1408,13 @@
 #define _PORT_PCS_DW12_GRP_C		0x6CE30
 #define   LANESTAGGER_STRAP_OVRD	(1 << 6)
 #define   LANE_STAGGER_MASK		0x1F
-#define BXT_PORT_PCS_DW12_LN01(port)	_PORT3(port, _PORT_PCS_DW12_LN01_A, \
+#define BXT_PORT_PCS_DW12_LN01(port)	_MMIO_PORT3(port, _PORT_PCS_DW12_LN01_A, \
 						     _PORT_PCS_DW12_LN01_B, \
 						     _PORT_PCS_DW12_LN01_C)
-#define BXT_PORT_PCS_DW12_LN23(port)	_PORT3(port, _PORT_PCS_DW12_LN23_A, \
+#define BXT_PORT_PCS_DW12_LN23(port)	_MMIO_PORT3(port, _PORT_PCS_DW12_LN23_A, \
 						     _PORT_PCS_DW12_LN23_B, \
 						     _PORT_PCS_DW12_LN23_C)
-#define BXT_PORT_PCS_DW12_GRP(port)	_PORT3(port, _PORT_PCS_DW12_GRP_A, \
+#define BXT_PORT_PCS_DW12_GRP(port)	_MMIO_PORT3(port, _PORT_PCS_DW12_GRP_A, \
 						     _PORT_PCS_DW12_GRP_B, \
 						     _PORT_PCS_DW12_GRP_C)
 
@@ -1377,10 +1428,10 @@
 #define _PORT_TX_DW2_GRP_A		0x162D08
 #define _PORT_TX_DW2_GRP_B		0x6CD08
 #define _PORT_TX_DW2_GRP_C		0x6CF08
-#define BXT_PORT_TX_DW2_GRP(port)	_PORT3(port, _PORT_TX_DW2_GRP_A,  \
+#define BXT_PORT_TX_DW2_GRP(port)	_MMIO_PORT3(port, _PORT_TX_DW2_GRP_A,  \
 						     _PORT_TX_DW2_GRP_B,  \
 						     _PORT_TX_DW2_GRP_C)
-#define BXT_PORT_TX_DW2_LN0(port)	_PORT3(port, _PORT_TX_DW2_LN0_A,  \
+#define BXT_PORT_TX_DW2_LN0(port)	_MMIO_PORT3(port, _PORT_TX_DW2_LN0_A,  \
 						     _PORT_TX_DW2_LN0_B,  \
 						     _PORT_TX_DW2_LN0_C)
 #define   MARGIN_000_SHIFT		16
@@ -1394,10 +1445,10 @@
 #define _PORT_TX_DW3_GRP_A		0x162D0C
 #define _PORT_TX_DW3_GRP_B		0x6CD0C
 #define _PORT_TX_DW3_GRP_C		0x6CF0C
-#define BXT_PORT_TX_DW3_GRP(port)	_PORT3(port, _PORT_TX_DW3_GRP_A,  \
+#define BXT_PORT_TX_DW3_GRP(port)	_MMIO_PORT3(port, _PORT_TX_DW3_GRP_A,  \
 						     _PORT_TX_DW3_GRP_B,  \
 						     _PORT_TX_DW3_GRP_C)
-#define BXT_PORT_TX_DW3_LN0(port)	_PORT3(port, _PORT_TX_DW3_LN0_A,  \
+#define BXT_PORT_TX_DW3_LN0(port)	_MMIO_PORT3(port, _PORT_TX_DW3_LN0_A,  \
 						     _PORT_TX_DW3_LN0_B,  \
 						     _PORT_TX_DW3_LN0_C)
 #define   SCALE_DCOMP_METHOD		(1 << 26)
@@ -1409,10 +1460,10 @@
 #define _PORT_TX_DW4_GRP_A		0x162D10
 #define _PORT_TX_DW4_GRP_B		0x6CD10
 #define _PORT_TX_DW4_GRP_C		0x6CF10
-#define BXT_PORT_TX_DW4_LN0(port)	_PORT3(port, _PORT_TX_DW4_LN0_A,  \
+#define BXT_PORT_TX_DW4_LN0(port)	_MMIO_PORT3(port, _PORT_TX_DW4_LN0_A,  \
 						     _PORT_TX_DW4_LN0_B,  \
 						     _PORT_TX_DW4_LN0_C)
-#define BXT_PORT_TX_DW4_GRP(port)	_PORT3(port, _PORT_TX_DW4_GRP_A,  \
+#define BXT_PORT_TX_DW4_GRP(port)	_MMIO_PORT3(port, _PORT_TX_DW4_GRP_A,  \
 						     _PORT_TX_DW4_GRP_B,  \
 						     _PORT_TX_DW4_GRP_C)
 #define   DEEMPH_SHIFT			24
@@ -1423,17 +1474,17 @@
 #define _PORT_TX_DW14_LN0_C		0x6C938
 #define   LATENCY_OPTIM_SHIFT		30
 #define   LATENCY_OPTIM			(1 << LATENCY_OPTIM_SHIFT)
-#define BXT_PORT_TX_DW14_LN(port, lane)	(_PORT3((port), _PORT_TX_DW14_LN0_A,   \
+#define BXT_PORT_TX_DW14_LN(port, lane)	_MMIO(_PORT3((port), _PORT_TX_DW14_LN0_A,   \
 							_PORT_TX_DW14_LN0_B,   \
 							_PORT_TX_DW14_LN0_C) + \
 					 _BXT_LANE_OFFSET(lane))
 
 /* UAIMI scratch pad register 1 */
-#define UAIMI_SPR1			0x4F074
+#define UAIMI_SPR1			_MMIO(0x4F074)
 /* SKL VccIO mask */
 #define SKL_VCCIO_MASK			0x1
 /* SKL balance leg register */
-#define DISPIO_CR_TX_BMU_CR0		0x6C00C
+#define DISPIO_CR_TX_BMU_CR0		_MMIO(0x6C00C)
 /* I_boost values */
 #define BALANCE_LEG_SHIFT(port)		(8+3*(port))
 #define BALANCE_LEG_MASK(port)		(7<<(8+3*(port)))
@@ -1450,7 +1501,7 @@
  * [0-15] @ 0x100000 gen6,vlv,chv
  * [0-31] @ 0x100000 gen7+
  */
-#define FENCE_REG(i)			(0x2000 + (((i) & 8) << 9) + ((i) & 7) * 4)
+#define FENCE_REG(i)			_MMIO(0x2000 + (((i) & 8) << 9) + ((i) & 7) * 4)
 #define   I830_FENCE_START_MASK		0x07f80000
 #define   I830_FENCE_TILING_Y_SHIFT	12
 #define   I830_FENCE_SIZE_BITS(size)	((ffs((size) >> 19) - 1) << 8)
@@ -1463,21 +1514,21 @@
 #define   I915_FENCE_START_MASK		0x0ff00000
 #define   I915_FENCE_SIZE_BITS(size)	((ffs((size) >> 20) - 1) << 8)
 
-#define FENCE_REG_965_LO(i)		(0x03000 + (i) * 8)
-#define FENCE_REG_965_HI(i)		(0x03000 + (i) * 8 + 4)
+#define FENCE_REG_965_LO(i)		_MMIO(0x03000 + (i) * 8)
+#define FENCE_REG_965_HI(i)		_MMIO(0x03000 + (i) * 8 + 4)
 #define   I965_FENCE_PITCH_SHIFT	2
 #define   I965_FENCE_TILING_Y_SHIFT	1
 #define   I965_FENCE_REG_VALID		(1<<0)
 #define   I965_FENCE_MAX_PITCH_VAL	0x0400
 
-#define FENCE_REG_GEN6_LO(i)	(0x100000 + (i) * 8)
-#define FENCE_REG_GEN6_HI(i)	(0x100000 + (i) * 8 + 4)
+#define FENCE_REG_GEN6_LO(i)		_MMIO(0x100000 + (i) * 8)
+#define FENCE_REG_GEN6_HI(i)		_MMIO(0x100000 + (i) * 8 + 4)
 #define   GEN6_FENCE_PITCH_SHIFT	32
 #define   GEN7_FENCE_MAX_PITCH_VAL	0x0800
 
 
 /* control register for cpu gtt access */
-#define TILECTL				0x101000
+#define TILECTL				_MMIO(0x101000)
 #define   TILECTL_SWZCTL			(1 << 0)
 #define   TILECTL_TLBPF			(1 << 1)
 #define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
@@ -1486,30 +1537,30 @@
 /*
  * Instruction and interrupt control regs
  */
-#define PGTBL_CTL	0x02020
+#define PGTBL_CTL	_MMIO(0x02020)
 #define   PGTBL_ADDRESS_LO_MASK	0xfffff000 /* bits [31:12] */
 #define   PGTBL_ADDRESS_HI_MASK	0x000000f0 /* bits [35:32] (gen4) */
-#define PGTBL_ER	0x02024
-#define PRB0_BASE (0x2030-0x30)
-#define PRB1_BASE (0x2040-0x30) /* 830,gen3 */
-#define PRB2_BASE (0x2050-0x30) /* gen3 */
-#define SRB0_BASE (0x2100-0x30) /* gen2 */
-#define SRB1_BASE (0x2110-0x30) /* gen2 */
-#define SRB2_BASE (0x2120-0x30) /* 830 */
-#define SRB3_BASE (0x2130-0x30) /* 830 */
+#define PGTBL_ER	_MMIO(0x02024)
+#define PRB0_BASE	(0x2030-0x30)
+#define PRB1_BASE	(0x2040-0x30) /* 830,gen3 */
+#define PRB2_BASE	(0x2050-0x30) /* gen3 */
+#define SRB0_BASE	(0x2100-0x30) /* gen2 */
+#define SRB1_BASE	(0x2110-0x30) /* gen2 */
+#define SRB2_BASE	(0x2120-0x30) /* 830 */
+#define SRB3_BASE	(0x2130-0x30) /* 830 */
 #define RENDER_RING_BASE	0x02000
 #define BSD_RING_BASE		0x04000
 #define GEN6_BSD_RING_BASE	0x12000
 #define GEN8_BSD2_RING_BASE	0x1c000
 #define VEBOX_RING_BASE		0x1a000
 #define BLT_RING_BASE		0x22000
-#define RING_TAIL(base)		((base)+0x30)
-#define RING_HEAD(base)		((base)+0x34)
-#define RING_START(base)	((base)+0x38)
-#define RING_CTL(base)		((base)+0x3c)
-#define RING_SYNC_0(base)	((base)+0x40)
-#define RING_SYNC_1(base)	((base)+0x44)
-#define RING_SYNC_2(base)	((base)+0x48)
+#define RING_TAIL(base)		_MMIO((base)+0x30)
+#define RING_HEAD(base)		_MMIO((base)+0x34)
+#define RING_START(base)	_MMIO((base)+0x38)
+#define RING_CTL(base)		_MMIO((base)+0x3c)
+#define RING_SYNC_0(base)	_MMIO((base)+0x40)
+#define RING_SYNC_1(base)	_MMIO((base)+0x44)
+#define RING_SYNC_2(base)	_MMIO((base)+0x48)
 #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
 #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
 #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
@@ -1522,51 +1573,52 @@
 #define GEN6_VEBSYNC	(RING_SYNC_0(VEBOX_RING_BASE))
 #define GEN6_VERSYNC	(RING_SYNC_1(VEBOX_RING_BASE))
 #define GEN6_VEVSYNC	(RING_SYNC_2(VEBOX_RING_BASE))
-#define GEN6_NOSYNC 0
-#define RING_PSMI_CTL(base)	((base)+0x50)
-#define RING_MAX_IDLE(base)	((base)+0x54)
-#define RING_HWS_PGA(base)	((base)+0x80)
-#define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
-#define RING_RESET_CTL(base)	((base)+0xd0)
+#define GEN6_NOSYNC	INVALID_MMIO_REG
+#define RING_PSMI_CTL(base)	_MMIO((base)+0x50)
+#define RING_MAX_IDLE(base)	_MMIO((base)+0x54)
+#define RING_HWS_PGA(base)	_MMIO((base)+0x80)
+#define RING_HWS_PGA_GEN6(base)	_MMIO((base)+0x2080)
+#define RING_RESET_CTL(base)	_MMIO((base)+0xd0)
 #define   RESET_CTL_REQUEST_RESET  (1 << 0)
 #define   RESET_CTL_READY_TO_RESET (1 << 1)
 
-#define HSW_GTT_CACHE_EN	0x4024
+#define HSW_GTT_CACHE_EN	_MMIO(0x4024)
 #define   GTT_CACHE_EN_ALL	0xF0007FFF
-#define GEN7_WR_WATERMARK	0x4028
-#define GEN7_GFX_PRIO_CTRL	0x402C
-#define ARB_MODE		0x4030
+#define GEN7_WR_WATERMARK	_MMIO(0x4028)
+#define GEN7_GFX_PRIO_CTRL	_MMIO(0x402C)
+#define ARB_MODE		_MMIO(0x4030)
 #define   ARB_MODE_SWIZZLE_SNB	(1<<4)
 #define   ARB_MODE_SWIZZLE_IVB	(1<<5)
-#define GEN7_GFX_PEND_TLB0	0x4034
-#define GEN7_GFX_PEND_TLB1	0x4038
+#define GEN7_GFX_PEND_TLB0	_MMIO(0x4034)
+#define GEN7_GFX_PEND_TLB1	_MMIO(0x4038)
 /* L3, CVS, ZTLB, RCC, CASC LRA min, max values */
-#define GEN7_LRA_LIMITS(i)	(0x403C + (i) * 4)
+#define GEN7_LRA_LIMITS(i)	_MMIO(0x403C + (i) * 4)
 #define GEN7_LRA_LIMITS_REG_NUM	13
-#define GEN7_MEDIA_MAX_REQ_COUNT	0x4070
-#define GEN7_GFX_MAX_REQ_COUNT		0x4074
+#define GEN7_MEDIA_MAX_REQ_COUNT	_MMIO(0x4070)
+#define GEN7_GFX_MAX_REQ_COUNT		_MMIO(0x4074)
 
-#define GAMTARBMODE		0x04a08
+#define GAMTARBMODE		_MMIO(0x04a08)
 #define   ARB_MODE_BWGTLB_DISABLE (1<<9)
 #define   ARB_MODE_SWIZZLE_BDW	(1<<1)
-#define RENDER_HWS_PGA_GEN7	(0x04080)
-#define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
+#define RENDER_HWS_PGA_GEN7	_MMIO(0x04080)
+#define RING_FAULT_REG(ring)	_MMIO(0x4094 + 0x100*(ring)->id)
 #define   RING_FAULT_GTTSEL_MASK (1<<11)
 #define   RING_FAULT_SRCID(x)	(((x) >> 3) & 0xff)
 #define   RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3)
 #define   RING_FAULT_VALID	(1<<0)
-#define DONE_REG		0x40b0
-#define GEN8_PRIVATE_PAT_LO	0x40e0
-#define GEN8_PRIVATE_PAT_HI	(0x40e0 + 4)
-#define BSD_HWS_PGA_GEN7	(0x04180)
-#define BLT_HWS_PGA_GEN7	(0x04280)
-#define VEBOX_HWS_PGA_GEN7	(0x04380)
-#define RING_ACTHD(base)	((base)+0x74)
-#define RING_ACTHD_UDW(base)	((base)+0x5c)
-#define RING_NOPID(base)	((base)+0x94)
-#define RING_IMR(base)		((base)+0xa8)
-#define RING_HWSTAM(base)	((base)+0x98)
-#define RING_TIMESTAMP(base)	((base)+0x358)
+#define DONE_REG		_MMIO(0x40b0)
+#define GEN8_PRIVATE_PAT_LO	_MMIO(0x40e0)
+#define GEN8_PRIVATE_PAT_HI	_MMIO(0x40e0 + 4)
+#define BSD_HWS_PGA_GEN7	_MMIO(0x04180)
+#define BLT_HWS_PGA_GEN7	_MMIO(0x04280)
+#define VEBOX_HWS_PGA_GEN7	_MMIO(0x04380)
+#define RING_ACTHD(base)	_MMIO((base)+0x74)
+#define RING_ACTHD_UDW(base)	_MMIO((base)+0x5c)
+#define RING_NOPID(base)	_MMIO((base)+0x94)
+#define RING_IMR(base)		_MMIO((base)+0xa8)
+#define RING_HWSTAM(base)	_MMIO((base)+0x98)
+#define RING_TIMESTAMP(base)		_MMIO((base)+0x358)
+#define RING_TIMESTAMP_UDW(base)	_MMIO((base)+0x358 + 4)
 #define   TAIL_ADDR		0x001FFFF8
 #define   HEAD_WRAP_COUNT	0xFFE00000
 #define   HEAD_WRAP_ONE		0x00200000
@@ -1583,57 +1635,65 @@
 #define   RING_WAIT		(1<<11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE	(1<<10) /* gen6+ */
 
-#define GEN7_TLB_RD_ADDR	0x4700
+#define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
 
 #if 0
-#define PRB0_TAIL	0x02030
-#define PRB0_HEAD	0x02034
-#define PRB0_START	0x02038
-#define PRB0_CTL	0x0203c
-#define PRB1_TAIL	0x02040 /* 915+ only */
-#define PRB1_HEAD	0x02044 /* 915+ only */
-#define PRB1_START	0x02048 /* 915+ only */
-#define PRB1_CTL	0x0204c /* 915+ only */
+#define PRB0_TAIL	_MMIO(0x2030)
+#define PRB0_HEAD	_MMIO(0x2034)
+#define PRB0_START	_MMIO(0x2038)
+#define PRB0_CTL	_MMIO(0x203c)
+#define PRB1_TAIL	_MMIO(0x2040) /* 915+ only */
+#define PRB1_HEAD	_MMIO(0x2044) /* 915+ only */
+#define PRB1_START	_MMIO(0x2048) /* 915+ only */
+#define PRB1_CTL	_MMIO(0x204c) /* 915+ only */
 #endif
-#define IPEIR_I965	0x02064
-#define IPEHR_I965	0x02068
-#define GEN7_SC_INSTDONE	0x07100
-#define GEN7_SAMPLER_INSTDONE	0x0e160
-#define GEN7_ROW_INSTDONE	0x0e164
+#define IPEIR_I965	_MMIO(0x2064)
+#define IPEHR_I965	_MMIO(0x2068)
+#define GEN7_SC_INSTDONE	_MMIO(0x7100)
+#define GEN7_SAMPLER_INSTDONE	_MMIO(0xe160)
+#define GEN7_ROW_INSTDONE	_MMIO(0xe164)
 #define I915_NUM_INSTDONE_REG	4
-#define RING_IPEIR(base)	((base)+0x64)
-#define RING_IPEHR(base)	((base)+0x68)
+#define RING_IPEIR(base)	_MMIO((base)+0x64)
+#define RING_IPEHR(base)	_MMIO((base)+0x68)
 /*
  * On GEN4, only the render ring INSTDONE exists and has a different
  * layout than the GEN7+ version.
  * The GEN2 counterpart of this register is GEN2_INSTDONE.
  */
-#define RING_INSTDONE(base)	((base)+0x6c)
-#define RING_INSTPS(base)	((base)+0x70)
-#define RING_DMA_FADD(base)	((base)+0x78)
-#define RING_DMA_FADD_UDW(base)	((base)+0x60) /* gen8+ */
-#define RING_INSTPM(base)	((base)+0xc0)
-#define RING_MI_MODE(base)	((base)+0x9c)
-#define INSTPS		0x02070 /* 965+ only */
-#define GEN4_INSTDONE1	0x0207c /* 965+ only, aka INSTDONE_2 on SNB */
-#define ACTHD_I965	0x02074
-#define HWS_PGA		0x02080
+#define RING_INSTDONE(base)	_MMIO((base)+0x6c)
+#define RING_INSTPS(base)	_MMIO((base)+0x70)
+#define RING_DMA_FADD(base)	_MMIO((base)+0x78)
+#define RING_DMA_FADD_UDW(base)	_MMIO((base)+0x60) /* gen8+ */
+#define RING_INSTPM(base)	_MMIO((base)+0xc0)
+#define RING_MI_MODE(base)	_MMIO((base)+0x9c)
+#define INSTPS		_MMIO(0x2070) /* 965+ only */
+#define GEN4_INSTDONE1	_MMIO(0x207c) /* 965+ only, aka INSTDONE_2 on SNB */
+#define ACTHD_I965	_MMIO(0x2074)
+#define HWS_PGA		_MMIO(0x2080)
 #define HWS_ADDRESS_MASK	0xfffff000
 #define HWS_START_ADDRESS_SHIFT	4
-#define PWRCTXA		0x2088 /* 965GM+ only */
+#define PWRCTXA		_MMIO(0x2088) /* 965GM+ only */
 #define   PWRCTX_EN	(1<<0)
-#define IPEIR		0x02088
-#define IPEHR		0x0208c
-#define GEN2_INSTDONE	0x02090
-#define NOPID		0x02094
-#define HWSTAM		0x02098
-#define DMA_FADD_I8XX	0x020d0
-#define RING_BBSTATE(base)	((base)+0x110)
-#define RING_BBADDR(base)	((base)+0x140)
-#define RING_BBADDR_UDW(base)	((base)+0x168) /* gen8+ */
+#define IPEIR		_MMIO(0x2088)
+#define IPEHR		_MMIO(0x208c)
+#define GEN2_INSTDONE	_MMIO(0x2090)
+#define NOPID		_MMIO(0x2094)
+#define HWSTAM		_MMIO(0x2098)
+#define DMA_FADD_I8XX	_MMIO(0x20d0)
+#define RING_BBSTATE(base)	_MMIO((base)+0x110)
+#define   RING_BB_PPGTT		(1 << 5)
+#define RING_SBBADDR(base)	_MMIO((base)+0x114) /* hsw+ */
+#define RING_SBBSTATE(base)	_MMIO((base)+0x118) /* hsw+ */
+#define RING_SBBADDR_UDW(base)	_MMIO((base)+0x11c) /* gen8+ */
+#define RING_BBADDR(base)	_MMIO((base)+0x140)
+#define RING_BBADDR_UDW(base)	_MMIO((base)+0x168) /* gen8+ */
+#define RING_BB_PER_CTX_PTR(base)	_MMIO((base)+0x1c0) /* gen8+ */
+#define RING_INDIRECT_CTX(base)		_MMIO((base)+0x1c4) /* gen8+ */
+#define RING_INDIRECT_CTX_OFFSET(base)	_MMIO((base)+0x1c8) /* gen8+ */
+#define RING_CTX_TIMESTAMP(base)	_MMIO((base)+0x3a8) /* gen8+ */
 
-#define ERROR_GEN6	0x040a0
-#define GEN7_ERR_INT	0x44040
+#define ERROR_GEN6	_MMIO(0x40a0)
+#define GEN7_ERR_INT	_MMIO(0x44040)
 #define   ERR_INT_POISON		(1<<31)
 #define   ERR_INT_MMIO_UNCLAIMED	(1<<13)
 #define   ERR_INT_PIPE_CRC_DONE_C	(1<<8)
@@ -1645,13 +1705,13 @@
 #define   ERR_INT_FIFO_UNDERRUN_A	(1<<0)
 #define   ERR_INT_FIFO_UNDERRUN(pipe)	(1<<((pipe)*3))
 
-#define GEN8_FAULT_TLB_DATA0		0x04b10
-#define GEN8_FAULT_TLB_DATA1		0x04b14
+#define GEN8_FAULT_TLB_DATA0		_MMIO(0x4b10)
+#define GEN8_FAULT_TLB_DATA1		_MMIO(0x4b14)
 
-#define FPGA_DBG		0x42300
+#define FPGA_DBG		_MMIO(0x42300)
 #define   FPGA_DBG_RM_NOCLAIM	(1<<31)
 
-#define DERRMR		0x44050
+#define DERRMR		_MMIO(0x44050)
 /* Note that HBLANK events are reserved on bdw+ */
 #define   DERRMR_PIPEA_SCANLINE		(1<<0)
 #define   DERRMR_PIPEA_PRI_FLIP_DONE	(1<<1)
@@ -1675,29 +1735,29 @@
  * for various sorts of correct behavior.  The top 16 bits of each are
  * the enables for writing to the corresponding low bit.
  */
-#define _3D_CHICKEN	0x02084
+#define _3D_CHICKEN	_MMIO(0x2084)
 #define  _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB	(1 << 10)
-#define _3D_CHICKEN2	0x0208c
+#define _3D_CHICKEN2	_MMIO(0x208c)
 /* Disables pipelining of read flushes past the SF-WIZ interface.
  * Required on all Ironlake steppings according to the B-Spec, but the
  * particular danger of not doing so is not specified.
  */
 # define _3D_CHICKEN2_WM_READ_PIPELINED			(1 << 14)
-#define _3D_CHICKEN3	0x02090
+#define _3D_CHICKEN3	_MMIO(0x2090)
 #define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL		(1 << 10)
 #define  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL		(1 << 5)
 #define  _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x)	((x)<<1) /* gen8+ */
 #define  _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH	(1 << 1) /* gen6 */
 
-#define MI_MODE		0x0209c
+#define MI_MODE		_MMIO(0x209c)
 # define VS_TIMER_DISPATCH				(1 << 6)
 # define MI_FLUSH_ENABLE				(1 << 12)
 # define ASYNC_FLIP_PERF_DISABLE			(1 << 14)
 # define MODE_IDLE					(1 << 9)
 # define STOP_RING					(1 << 8)
 
-#define GEN6_GT_MODE	0x20d0
-#define GEN7_GT_MODE	0x7008
+#define GEN6_GT_MODE	_MMIO(0x20d0)
+#define GEN7_GT_MODE	_MMIO(0x7008)
 #define   GEN6_WIZ_HASHING(hi, lo)			(((hi) << 9) | ((lo) << 7))
 #define   GEN6_WIZ_HASHING_8x8				GEN6_WIZ_HASHING(0, 0)
 #define   GEN6_WIZ_HASHING_8x4				GEN6_WIZ_HASHING(0, 1)
@@ -1707,9 +1767,9 @@
 #define   GEN9_IZ_HASHING_MASK(slice)			(0x3 << ((slice) * 2))
 #define   GEN9_IZ_HASHING(slice, val)			((val) << ((slice) * 2))
 
-#define GFX_MODE	0x02520
-#define GFX_MODE_GEN7	0x0229c
-#define RING_MODE_GEN7(ring)	((ring)->mmio_base+0x29c)
+#define GFX_MODE	_MMIO(0x2520)
+#define GFX_MODE_GEN7	_MMIO(0x229c)
+#define RING_MODE_GEN7(ring)	_MMIO((ring)->mmio_base+0x29c)
 #define   GFX_RUN_LIST_ENABLE		(1<<15)
 #define   GFX_INTERRUPT_STEERING	(1<<14)
 #define   GFX_TLB_INVALIDATE_EXPLICIT	(1<<13)
@@ -1727,36 +1787,36 @@
 #define VLV_DISPLAY_BASE 0x180000
 #define VLV_MIPI_BASE VLV_DISPLAY_BASE
 
-#define VLV_GU_CTL0	(VLV_DISPLAY_BASE + 0x2030)
-#define VLV_GU_CTL1	(VLV_DISPLAY_BASE + 0x2034)
-#define SCPD0		0x0209c /* 915+ only */
-#define IER		0x020a0
-#define IIR		0x020a4
-#define IMR		0x020a8
-#define ISR		0x020ac
-#define VLV_GUNIT_CLOCK_GATE	(VLV_DISPLAY_BASE + 0x2060)
+#define VLV_GU_CTL0	_MMIO(VLV_DISPLAY_BASE + 0x2030)
+#define VLV_GU_CTL1	_MMIO(VLV_DISPLAY_BASE + 0x2034)
+#define SCPD0		_MMIO(0x209c) /* 915+ only */
+#define IER		_MMIO(0x20a0)
+#define IIR		_MMIO(0x20a4)
+#define IMR		_MMIO(0x20a8)
+#define ISR		_MMIO(0x20ac)
+#define VLV_GUNIT_CLOCK_GATE	_MMIO(VLV_DISPLAY_BASE + 0x2060)
 #define   GINT_DIS		(1<<22)
 #define   GCFG_DIS		(1<<8)
-#define VLV_GUNIT_CLOCK_GATE2	(VLV_DISPLAY_BASE + 0x2064)
-#define VLV_IIR_RW	(VLV_DISPLAY_BASE + 0x2084)
-#define VLV_IER		(VLV_DISPLAY_BASE + 0x20a0)
-#define VLV_IIR		(VLV_DISPLAY_BASE + 0x20a4)
-#define VLV_IMR		(VLV_DISPLAY_BASE + 0x20a8)
-#define VLV_ISR		(VLV_DISPLAY_BASE + 0x20ac)
-#define VLV_PCBR	(VLV_DISPLAY_BASE + 0x2120)
+#define VLV_GUNIT_CLOCK_GATE2	_MMIO(VLV_DISPLAY_BASE + 0x2064)
+#define VLV_IIR_RW	_MMIO(VLV_DISPLAY_BASE + 0x2084)
+#define VLV_IER		_MMIO(VLV_DISPLAY_BASE + 0x20a0)
+#define VLV_IIR		_MMIO(VLV_DISPLAY_BASE + 0x20a4)
+#define VLV_IMR		_MMIO(VLV_DISPLAY_BASE + 0x20a8)
+#define VLV_ISR		_MMIO(VLV_DISPLAY_BASE + 0x20ac)
+#define VLV_PCBR	_MMIO(VLV_DISPLAY_BASE + 0x2120)
 #define VLV_PCBR_ADDR_SHIFT	12
 
 #define   DISPLAY_PLANE_FLIP_PENDING(plane) (1<<(11-(plane))) /* A and B only */
-#define EIR		0x020b0
-#define EMR		0x020b4
-#define ESR		0x020b8
+#define EIR		_MMIO(0x20b0)
+#define EMR		_MMIO(0x20b4)
+#define ESR		_MMIO(0x20b8)
 #define   GM45_ERROR_PAGE_TABLE				(1<<5)
 #define   GM45_ERROR_MEM_PRIV				(1<<4)
 #define   I915_ERROR_PAGE_TABLE				(1<<4)
 #define   GM45_ERROR_CP_PRIV				(1<<3)
 #define   I915_ERROR_MEMORY_REFRESH			(1<<1)
 #define   I915_ERROR_INSTRUCTION			(1<<0)
-#define INSTPM	        0x020c0
+#define INSTPM	        _MMIO(0x20c0)
 #define   INSTPM_SELF_EN (1<<12) /* 915GM only */
 #define   INSTPM_AGPBUSY_INT_EN (1<<11) /* gen3: when disabled, pending interrupts
 					will not assert AGPBUSY# and will only
@@ -1764,14 +1824,14 @@
 #define   INSTPM_FORCE_ORDERING				(1<<7) /* GEN6+ */
 #define   INSTPM_TLB_INVALIDATE	(1<<9)
 #define   INSTPM_SYNC_FLUSH	(1<<5)
-#define ACTHD	        0x020c8
-#define MEM_MODE	0x020cc
+#define ACTHD	        _MMIO(0x20c8)
+#define MEM_MODE	_MMIO(0x20cc)
 #define   MEM_DISPLAY_B_TRICKLE_FEED_DISABLE (1<<3) /* 830 only */
 #define   MEM_DISPLAY_A_TRICKLE_FEED_DISABLE (1<<2) /* 830/845 only */
 #define   MEM_DISPLAY_TRICKLE_FEED_DISABLE (1<<2) /* 85x only */
-#define FW_BLC		0x020d8
-#define FW_BLC2		0x020dc
-#define FW_BLC_SELF	0x020e0 /* 915+ only */
+#define FW_BLC		_MMIO(0x20d8)
+#define FW_BLC2		_MMIO(0x20dc)
+#define FW_BLC_SELF	_MMIO(0x20e0) /* 915+ only */
 #define   FW_BLC_SELF_EN_MASK      (1<<31)
 #define   FW_BLC_SELF_FIFO_MASK    (1<<16) /* 945 only */
 #define   FW_BLC_SELF_EN           (1<<15) /* 945 only */
@@ -1779,7 +1839,7 @@
 #define MM_FIFO_WATERMARK   0x0001F000
 #define LM_BURST_LENGTH     0x00000700
 #define LM_FIFO_WATERMARK   0x0000001F
-#define MI_ARB_STATE	0x020e4 /* 915+ only */
+#define MI_ARB_STATE	_MMIO(0x20e4) /* 915+ only */
 
 /* Make render/texture TLB fetches lower priorty than associated data
  *   fetches. This is not turned on by default
@@ -1843,11 +1903,11 @@
 #define   MI_ARB_DISPLAY_PRIORITY_A_B		(0 << 0)	/* display A > display B */
 #define   MI_ARB_DISPLAY_PRIORITY_B_A		(1 << 0)	/* display B > display A */
 
-#define MI_STATE	0x020e4 /* gen2 only */
+#define MI_STATE	_MMIO(0x20e4) /* gen2 only */
 #define   MI_AGPBUSY_INT_EN			(1 << 1) /* 85x only */
 #define   MI_AGPBUSY_830_MODE			(1 << 0) /* 85x only */
 
-#define CACHE_MODE_0	0x02120 /* 915+ only */
+#define CACHE_MODE_0	_MMIO(0x2120) /* 915+ only */
 #define   CM0_PIPELINED_RENDER_FLUSH_DISABLE (1<<8)
 #define   CM0_IZ_OPT_DISABLE      (1<<6)
 #define   CM0_ZR_OPT_DISABLE      (1<<5)
@@ -1856,32 +1916,32 @@
 #define   CM0_COLOR_EVICT_DISABLE (1<<3)
 #define   CM0_DEPTH_WRITE_DISABLE (1<<1)
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
-#define GFX_FLSH_CNTL	0x02170 /* 915+ only */
-#define GFX_FLSH_CNTL_GEN6	0x101008
+#define GFX_FLSH_CNTL	_MMIO(0x2170) /* 915+ only */
+#define GFX_FLSH_CNTL_GEN6	_MMIO(0x101008)
 #define   GFX_FLSH_CNTL_EN	(1<<0)
-#define ECOSKPD		0x021d0
+#define ECOSKPD		_MMIO(0x21d0)
 #define   ECO_GATING_CX_ONLY	(1<<3)
 #define   ECO_FLIP_DONE		(1<<0)
 
-#define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
+#define CACHE_MODE_0_GEN7	_MMIO(0x7000) /* IVB+ */
 #define RC_OP_FLUSH_ENABLE (1<<0)
 #define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
-#define CACHE_MODE_1		0x7004 /* IVB+ */
+#define CACHE_MODE_1		_MMIO(0x7004) /* IVB+ */
 #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE	(1<<6)
 #define   GEN8_4x4_STC_OPTIMIZATION_DISABLE	(1<<6)
 #define   GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE	(1<<1)
 
-#define GEN6_BLITTER_ECOSKPD	0x221d0
+#define GEN6_BLITTER_ECOSKPD	_MMIO(0x221d0)
 #define   GEN6_BLITTER_LOCK_SHIFT			16
 #define   GEN6_BLITTER_FBC_NOTIFY			(1<<3)
 
-#define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
+#define GEN6_RC_SLEEP_PSMI_CONTROL	_MMIO(0x2050)
 #define   GEN6_PSMI_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE	(1 << 12)
 #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE	(1<<10)
 
 /* Fuse readout registers for GT */
-#define CHV_FUSE_GT			(VLV_DISPLAY_BASE + 0x2168)
+#define CHV_FUSE_GT			_MMIO(VLV_DISPLAY_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0		(1 << 10)
 #define   CHV_FGT_DISABLE_SS1		(1 << 11)
 #define   CHV_FGT_EU_DIS_SS0_R0_SHIFT	16
@@ -1893,7 +1953,7 @@
 #define   CHV_FGT_EU_DIS_SS1_R1_SHIFT	28
 #define   CHV_FGT_EU_DIS_SS1_R1_MASK	(0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
 
-#define GEN8_FUSE2			0x9120
+#define GEN8_FUSE2			_MMIO(0x9120)
 #define   GEN8_F2_SS_DIS_SHIFT		21
 #define   GEN8_F2_SS_DIS_MASK		(0x7 << GEN8_F2_SS_DIS_SHIFT)
 #define   GEN8_F2_S_ENA_SHIFT		25
@@ -1902,22 +1962,22 @@
 #define   GEN9_F2_SS_DIS_SHIFT		20
 #define   GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
 
-#define GEN8_EU_DISABLE0		0x9134
+#define GEN8_EU_DISABLE0		_MMIO(0x9134)
 #define   GEN8_EU_DIS0_S0_MASK		0xffffff
 #define   GEN8_EU_DIS0_S1_SHIFT		24
 #define   GEN8_EU_DIS0_S1_MASK		(0xff << GEN8_EU_DIS0_S1_SHIFT)
 
-#define GEN8_EU_DISABLE1		0x9138
+#define GEN8_EU_DISABLE1		_MMIO(0x9138)
 #define   GEN8_EU_DIS1_S1_MASK		0xffff
 #define   GEN8_EU_DIS1_S2_SHIFT		16
 #define   GEN8_EU_DIS1_S2_MASK		(0xffff << GEN8_EU_DIS1_S2_SHIFT)
 
-#define GEN8_EU_DISABLE2		0x913c
+#define GEN8_EU_DISABLE2		_MMIO(0x913c)
 #define   GEN8_EU_DIS2_S2_MASK		0xff
 
-#define GEN9_EU_DISABLE(slice)		(0x9134 + (slice)*0x4)
+#define GEN9_EU_DISABLE(slice)		_MMIO(0x9134 + (slice)*0x4)
 
-#define GEN6_BSD_SLEEP_PSMI_CONTROL	0x12050
+#define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
 #define   GEN6_BSD_SLEEP_INDICATOR	(1 << 3)
@@ -1995,9 +2055,9 @@
 #define I915_ASLE_INTERRUPT				(1<<0)
 #define I915_BSD_USER_INTERRUPT				(1<<25)
 
-#define GEN6_BSD_RNCID			0x12198
+#define GEN6_BSD_RNCID			_MMIO(0x12198)
 
-#define GEN7_FF_THREAD_MODE		0x20a0
+#define GEN7_FF_THREAD_MODE		_MMIO(0x20a0)
 #define   GEN7_FF_SCHED_MASK		0x0077070
 #define   GEN8_FF_DS_REF_CNT_FFME	(1 << 19)
 #define   GEN7_FF_TS_SCHED_HS1		(0x5<<16)
@@ -2018,9 +2078,9 @@
  * Framebuffer compression (915+ only)
  */
 
-#define FBC_CFB_BASE		0x03200 /* 4k page aligned */
-#define FBC_LL_BASE		0x03204 /* 4k page aligned */
-#define FBC_CONTROL		0x03208
+#define FBC_CFB_BASE		_MMIO(0x3200) /* 4k page aligned */
+#define FBC_LL_BASE		_MMIO(0x3204) /* 4k page aligned */
+#define FBC_CONTROL		_MMIO(0x3208)
 #define   FBC_CTL_EN		(1<<31)
 #define   FBC_CTL_PERIODIC	(1<<30)
 #define   FBC_CTL_INTERVAL_SHIFT (16)
@@ -2028,14 +2088,14 @@
 #define   FBC_CTL_C3_IDLE	(1<<13)
 #define   FBC_CTL_STRIDE_SHIFT	(5)
 #define   FBC_CTL_FENCENO_SHIFT	(0)
-#define FBC_COMMAND		0x0320c
+#define FBC_COMMAND		_MMIO(0x320c)
 #define   FBC_CMD_COMPRESS	(1<<0)
-#define FBC_STATUS		0x03210
+#define FBC_STATUS		_MMIO(0x3210)
 #define   FBC_STAT_COMPRESSING	(1<<31)
 #define   FBC_STAT_COMPRESSED	(1<<30)
 #define   FBC_STAT_MODIFIED	(1<<29)
 #define   FBC_STAT_CURRENT_LINE_SHIFT	(0)
-#define FBC_CONTROL2		0x03214
+#define FBC_CONTROL2		_MMIO(0x3214)
 #define   FBC_CTL_FENCE_DBL	(0<<4)
 #define   FBC_CTL_IDLE_IMM	(0<<2)
 #define   FBC_CTL_IDLE_FULL	(1<<2)
@@ -2043,17 +2103,17 @@
 #define   FBC_CTL_IDLE_DEBUG	(3<<2)
 #define   FBC_CTL_CPU_FENCE	(1<<1)
 #define   FBC_CTL_PLANE(plane)	((plane)<<0)
-#define FBC_FENCE_OFF		0x03218 /* BSpec typo has 321Bh */
-#define FBC_TAG(i)		(0x03300 + (i) * 4)
+#define FBC_FENCE_OFF		_MMIO(0x3218) /* BSpec typo has 321Bh */
+#define FBC_TAG(i)		_MMIO(0x3300 + (i) * 4)
 
-#define FBC_STATUS2		0x43214
+#define FBC_STATUS2		_MMIO(0x43214)
 #define  FBC_COMPRESSION_MASK	0x7ff
 
 #define FBC_LL_SIZE		(1536)
 
 /* Framebuffer compression for GM45+ */
-#define DPFC_CB_BASE		0x3200
-#define DPFC_CONTROL		0x3208
+#define DPFC_CB_BASE		_MMIO(0x3200)
+#define DPFC_CONTROL		_MMIO(0x3208)
 #define   DPFC_CTL_EN		(1<<31)
 #define   DPFC_CTL_PLANE(plane)	((plane)<<30)
 #define   IVB_DPFC_CTL_PLANE(plane)	((plane)<<29)
@@ -2064,37 +2124,37 @@
 #define   DPFC_CTL_LIMIT_1X	(0<<6)
 #define   DPFC_CTL_LIMIT_2X	(1<<6)
 #define   DPFC_CTL_LIMIT_4X	(2<<6)
-#define DPFC_RECOMP_CTL		0x320c
+#define DPFC_RECOMP_CTL		_MMIO(0x320c)
 #define   DPFC_RECOMP_STALL_EN	(1<<27)
 #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
 #define   DPFC_RECOMP_STALL_WM_MASK (0x07ff0000)
 #define   DPFC_RECOMP_TIMER_COUNT_SHIFT (0)
 #define   DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f)
-#define DPFC_STATUS		0x3210
+#define DPFC_STATUS		_MMIO(0x3210)
 #define   DPFC_INVAL_SEG_SHIFT  (16)
 #define   DPFC_INVAL_SEG_MASK	(0x07ff0000)
 #define   DPFC_COMP_SEG_SHIFT	(0)
 #define   DPFC_COMP_SEG_MASK	(0x000003ff)
-#define DPFC_STATUS2		0x3214
-#define DPFC_FENCE_YOFF		0x3218
-#define DPFC_CHICKEN		0x3224
+#define DPFC_STATUS2		_MMIO(0x3214)
+#define DPFC_FENCE_YOFF		_MMIO(0x3218)
+#define DPFC_CHICKEN		_MMIO(0x3224)
 #define   DPFC_HT_MODIFY	(1<<31)
 
 /* Framebuffer compression for Ironlake */
-#define ILK_DPFC_CB_BASE	0x43200
-#define ILK_DPFC_CONTROL	0x43208
+#define ILK_DPFC_CB_BASE	_MMIO(0x43200)
+#define ILK_DPFC_CONTROL	_MMIO(0x43208)
 #define   FBC_CTL_FALSE_COLOR	(1<<10)
 /* The bit 28-8 is reserved */
 #define   DPFC_RESERVED		(0x1FFFFF00)
-#define ILK_DPFC_RECOMP_CTL	0x4320c
-#define ILK_DPFC_STATUS		0x43210
-#define ILK_DPFC_FENCE_YOFF	0x43218
-#define ILK_DPFC_CHICKEN	0x43224
-#define ILK_FBC_RT_BASE		0x2128
+#define ILK_DPFC_RECOMP_CTL	_MMIO(0x4320c)
+#define ILK_DPFC_STATUS		_MMIO(0x43210)
+#define ILK_DPFC_FENCE_YOFF	_MMIO(0x43218)
+#define ILK_DPFC_CHICKEN	_MMIO(0x43224)
+#define ILK_FBC_RT_BASE		_MMIO(0x2128)
 #define   ILK_FBC_RT_VALID	(1<<0)
 #define   SNB_FBC_FRONT_BUFFER	(1<<1)
 
-#define ILK_DISPLAY_CHICKEN1	0x42000
+#define ILK_DISPLAY_CHICKEN1	_MMIO(0x42000)
 #define   ILK_FBCQ_DIS		(1<<22)
 #define	  ILK_PABSTRETCH_DIS	(1<<21)
 
@@ -2104,31 +2164,31 @@
  *
  * The following two registers are of type GTTMMADR
  */
-#define SNB_DPFC_CTL_SA		0x100100
+#define SNB_DPFC_CTL_SA		_MMIO(0x100100)
 #define   SNB_CPU_FENCE_ENABLE	(1<<29)
-#define DPFC_CPU_FENCE_OFFSET	0x100104
+#define DPFC_CPU_FENCE_OFFSET	_MMIO(0x100104)
 
 /* Framebuffer compression for Ivybridge */
-#define IVB_FBC_RT_BASE			0x7020
+#define IVB_FBC_RT_BASE			_MMIO(0x7020)
 
-#define IPS_CTL		0x43408
+#define IPS_CTL		_MMIO(0x43408)
 #define   IPS_ENABLE	(1 << 31)
 
-#define MSG_FBC_REND_STATE	0x50380
+#define MSG_FBC_REND_STATE	_MMIO(0x50380)
 #define   FBC_REND_NUKE		(1<<2)
 #define   FBC_REND_CACHE_CLEAN	(1<<1)
 
 /*
  * GPIO regs
  */
-#define GPIOA			0x5010
-#define GPIOB			0x5014
-#define GPIOC			0x5018
-#define GPIOD			0x501c
-#define GPIOE			0x5020
-#define GPIOF			0x5024
-#define GPIOG			0x5028
-#define GPIOH			0x502c
+#define GPIOA			_MMIO(0x5010)
+#define GPIOB			_MMIO(0x5014)
+#define GPIOC			_MMIO(0x5018)
+#define GPIOD			_MMIO(0x501c)
+#define GPIOE			_MMIO(0x5020)
+#define GPIOF			_MMIO(0x5024)
+#define GPIOG			_MMIO(0x5028)
+#define GPIOH			_MMIO(0x502c)
 # define GPIO_CLOCK_DIR_MASK		(1 << 0)
 # define GPIO_CLOCK_DIR_IN		(0 << 1)
 # define GPIO_CLOCK_DIR_OUT		(1 << 1)
@@ -2144,7 +2204,7 @@
 # define GPIO_DATA_VAL_IN		(1 << 12)
 # define GPIO_DATA_PULLUP_DISABLE	(1 << 13)
 
-#define GMBUS0			(dev_priv->gpio_mmio_base + 0x5100) /* clock/port select */
+#define GMBUS0			_MMIO(dev_priv->gpio_mmio_base + 0x5100) /* clock/port select */
 #define   GMBUS_RATE_100KHZ	(0<<8)
 #define   GMBUS_RATE_50KHZ	(1<<8)
 #define   GMBUS_RATE_400KHZ	(2<<8) /* reserved on Pineview */
@@ -2163,7 +2223,7 @@
 #define   GMBUS_PIN_2_BXT	2
 #define   GMBUS_PIN_3_BXT	3
 #define   GMBUS_NUM_PINS	7 /* including 0 */
-#define GMBUS1			(dev_priv->gpio_mmio_base + 0x5104) /* command/status */
+#define GMBUS1			_MMIO(dev_priv->gpio_mmio_base + 0x5104) /* command/status */
 #define   GMBUS_SW_CLR_INT	(1<<31)
 #define   GMBUS_SW_RDY		(1<<30)
 #define   GMBUS_ENT		(1<<29) /* enable timeout */
@@ -2177,7 +2237,7 @@
 #define   GMBUS_SLAVE_ADDR_SHIFT 1
 #define   GMBUS_SLAVE_READ	(1<<0)
 #define   GMBUS_SLAVE_WRITE	(0<<0)
-#define GMBUS2			(dev_priv->gpio_mmio_base + 0x5108) /* status */
+#define GMBUS2			_MMIO(dev_priv->gpio_mmio_base + 0x5108) /* status */
 #define   GMBUS_INUSE		(1<<15)
 #define   GMBUS_HW_WAIT_PHASE	(1<<14)
 #define   GMBUS_STALL_TIMEOUT	(1<<13)
@@ -2185,14 +2245,14 @@
 #define   GMBUS_HW_RDY		(1<<11)
 #define   GMBUS_SATOER		(1<<10)
 #define   GMBUS_ACTIVE		(1<<9)
-#define GMBUS3			(dev_priv->gpio_mmio_base + 0x510c) /* data buffer bytes 3-0 */
-#define GMBUS4			(dev_priv->gpio_mmio_base + 0x5110) /* interrupt mask (Pineview+) */
+#define GMBUS3			_MMIO(dev_priv->gpio_mmio_base + 0x510c) /* data buffer bytes 3-0 */
+#define GMBUS4			_MMIO(dev_priv->gpio_mmio_base + 0x5110) /* interrupt mask (Pineview+) */
 #define   GMBUS_SLAVE_TIMEOUT_EN (1<<4)
 #define   GMBUS_NAK_EN		(1<<3)
 #define   GMBUS_IDLE_EN		(1<<2)
 #define   GMBUS_HW_WAIT_EN	(1<<1)
 #define   GMBUS_HW_RDY_EN	(1<<0)
-#define GMBUS5			(dev_priv->gpio_mmio_base + 0x5120) /* byte index */
+#define GMBUS5			_MMIO(dev_priv->gpio_mmio_base + 0x5120) /* byte index */
 #define   GMBUS_2BYTE_INDEX_EN	(1<<31)
 
 /*
@@ -2201,11 +2261,11 @@
 #define _DPLL_A (dev_priv->info.display_mmio_offset + 0x6014)
 #define _DPLL_B (dev_priv->info.display_mmio_offset + 0x6018)
 #define _CHV_DPLL_C (dev_priv->info.display_mmio_offset + 0x6030)
-#define DPLL(pipe) _PIPE3((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C)
+#define DPLL(pipe) _MMIO_PIPE3((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C)
 
-#define VGA0	0x6000
-#define VGA1	0x6004
-#define VGA_PD	0x6010
+#define VGA0	_MMIO(0x6000)
+#define VGA1	_MMIO(0x6004)
+#define VGA_PD	_MMIO(0x6010)
 #define   VGA0_PD_P2_DIV_4	(1 << 7)
 #define   VGA0_PD_P1_DIV_2	(1 << 5)
 #define   VGA0_PD_P1_SHIFT	0
@@ -2241,9 +2301,9 @@
 #define   DPLL_FPA01_P1_POST_DIV_MASK_I830	0x001f0000
 
 /* Additional CHV pll/phy registers */
-#define DPIO_PHY_STATUS			(VLV_DISPLAY_BASE + 0x6240)
+#define DPIO_PHY_STATUS			_MMIO(VLV_DISPLAY_BASE + 0x6240)
 #define   DPLL_PORTD_READY_MASK		(0xf)
-#define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100)
+#define DISPLAY_PHY_CONTROL _MMIO(VLV_DISPLAY_BASE + 0x60100)
 #define   PHY_CH_POWER_DOWN_OVRD_EN(phy, ch)	(1 << (2*(phy)+(ch)+27))
 #define   PHY_LDO_DELAY_0NS			0x0
 #define   PHY_LDO_DELAY_200NS			0x1
@@ -2254,7 +2314,7 @@
 #define   PHY_CH_DEEP_PSR			0x7
 #define   PHY_CH_POWER_MODE(mode, phy, ch)	((mode) << (6*(phy)+3*(ch)+2))
 #define   PHY_COM_LANE_RESET_DEASSERT(phy)	(1 << (phy))
-#define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104)
+#define DISPLAY_PHY_STATUS _MMIO(VLV_DISPLAY_BASE + 0x60104)
 #define   PHY_POWERGOOD(phy)	(((phy) == DPIO_PHY0) ? (1<<31) : (1<<30))
 #define   PHY_STATUS_CMN_LDO(phy, ch)                   (1 << (6-(6*(phy)+3*(ch))))
 #define   PHY_STATUS_SPLINE_LDO(phy, ch, spline)        (1 << (8-(6*(phy)+3*(ch)+(spline))))
@@ -2300,7 +2360,7 @@
 #define _DPLL_A_MD (dev_priv->info.display_mmio_offset + 0x601c)
 #define _DPLL_B_MD (dev_priv->info.display_mmio_offset + 0x6020)
 #define _CHV_DPLL_C_MD (dev_priv->info.display_mmio_offset + 0x603c)
-#define DPLL_MD(pipe) _PIPE3((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD)
+#define DPLL_MD(pipe) _MMIO_PIPE3((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD)
 
 /*
  * UDI pixel divider, controlling how many pixels are stuffed into a packet.
@@ -2339,12 +2399,12 @@
 #define   DPLL_MD_VGA_UDI_MULTIPLIER_MASK	0x0000003f
 #define   DPLL_MD_VGA_UDI_MULTIPLIER_SHIFT	0
 
-#define _FPA0	0x06040
-#define _FPA1	0x06044
-#define _FPB0	0x06048
-#define _FPB1	0x0604c
-#define FP0(pipe) _PIPE(pipe, _FPA0, _FPB0)
-#define FP1(pipe) _PIPE(pipe, _FPA1, _FPB1)
+#define _FPA0	0x6040
+#define _FPA1	0x6044
+#define _FPB0	0x6048
+#define _FPB1	0x604c
+#define FP0(pipe) _MMIO_PIPE(pipe, _FPA0, _FPB0)
+#define FP1(pipe) _MMIO_PIPE(pipe, _FPA1, _FPB1)
 #define   FP_N_DIV_MASK		0x003f0000
 #define   FP_N_PINEVIEW_DIV_MASK	0x00ff0000
 #define   FP_N_DIV_SHIFT		16
@@ -2353,7 +2413,7 @@
 #define   FP_M2_DIV_MASK	0x0000003f
 #define   FP_M2_PINEVIEW_DIV_MASK	0x000000ff
 #define   FP_M2_DIV_SHIFT		 0
-#define DPLL_TEST	0x606c
+#define DPLL_TEST	_MMIO(0x606c)
 #define   DPLLB_TEST_SDVO_DIV_1		(0 << 22)
 #define   DPLLB_TEST_SDVO_DIV_2		(1 << 22)
 #define   DPLLB_TEST_SDVO_DIV_4		(2 << 22)
@@ -2364,12 +2424,12 @@
 #define   DPLLA_TEST_N_BYPASS		(1 << 3)
 #define   DPLLA_TEST_M_BYPASS		(1 << 2)
 #define   DPLLA_INPUT_BUFFER_ENABLE	(1 << 0)
-#define D_STATE		0x6104
+#define D_STATE		_MMIO(0x6104)
 #define  DSTATE_GFX_RESET_I830			(1<<6)
 #define  DSTATE_PLL_D3_OFF			(1<<3)
 #define  DSTATE_GFX_CLOCK_GATING		(1<<1)
 #define  DSTATE_DOT_CLOCK_GATING		(1<<0)
-#define DSPCLK_GATE_D	(dev_priv->info.display_mmio_offset + 0x6200)
+#define DSPCLK_GATE_D	_MMIO(dev_priv->info.display_mmio_offset + 0x6200)
 # define DPUNIT_B_CLOCK_GATE_DISABLE		(1 << 30) /* 965 */
 # define VSUNIT_CLOCK_GATE_DISABLE		(1 << 29) /* 965 */
 # define VRHUNIT_CLOCK_GATE_DISABLE		(1 << 28) /* 965 */
@@ -2408,7 +2468,7 @@
 # define ZVUNIT_CLOCK_GATE_DISABLE		(1 << 0) /* 830 */
 # define OVLUNIT_CLOCK_GATE_DISABLE		(1 << 0) /* 845,865 */
 
-#define RENCLK_GATE_D1		0x6204
+#define RENCLK_GATE_D1		_MMIO(0x6204)
 # define BLITTER_CLOCK_GATE_DISABLE		(1 << 13) /* 945GM only */
 # define MPEG_CLOCK_GATE_DISABLE		(1 << 12) /* 945GM only */
 # define PC_FE_CLOCK_GATE_DISABLE		(1 << 11)
@@ -2472,35 +2532,35 @@
 # define I965_FT_CLOCK_GATE_DISABLE		(1 << 1)
 # define I965_DM_CLOCK_GATE_DISABLE		(1 << 0)
 
-#define RENCLK_GATE_D2		0x6208
+#define RENCLK_GATE_D2		_MMIO(0x6208)
 #define VF_UNIT_CLOCK_GATE_DISABLE		(1 << 9)
 #define GS_UNIT_CLOCK_GATE_DISABLE		(1 << 7)
 #define CL_UNIT_CLOCK_GATE_DISABLE		(1 << 6)
 
-#define VDECCLK_GATE_D		0x620C		/* g4x only */
+#define VDECCLK_GATE_D		_MMIO(0x620C)		/* g4x only */
 #define  VCP_UNIT_CLOCK_GATE_DISABLE		(1 << 4)
 
-#define RAMCLK_GATE_D		0x6210		/* CRL only */
-#define DEUC			0x6214          /* CRL only */
+#define RAMCLK_GATE_D		_MMIO(0x6210)		/* CRL only */
+#define DEUC			_MMIO(0x6214)          /* CRL only */
 
-#define FW_BLC_SELF_VLV		(VLV_DISPLAY_BASE + 0x6500)
+#define FW_BLC_SELF_VLV		_MMIO(VLV_DISPLAY_BASE + 0x6500)
 #define  FW_CSPWRDWNEN		(1<<15)
 
-#define MI_ARB_VLV		(VLV_DISPLAY_BASE + 0x6504)
+#define MI_ARB_VLV		_MMIO(VLV_DISPLAY_BASE + 0x6504)
 
-#define CZCLK_CDCLK_FREQ_RATIO	(VLV_DISPLAY_BASE + 0x6508)
+#define CZCLK_CDCLK_FREQ_RATIO	_MMIO(VLV_DISPLAY_BASE + 0x6508)
 #define   CDCLK_FREQ_SHIFT	4
 #define   CDCLK_FREQ_MASK	(0x1f << CDCLK_FREQ_SHIFT)
 #define   CZCLK_FREQ_MASK	0xf
 
-#define GCI_CONTROL		(VLV_DISPLAY_BASE + 0x650C)
+#define GCI_CONTROL		_MMIO(VLV_DISPLAY_BASE + 0x650C)
 #define   PFI_CREDIT_63		(9 << 28)		/* chv only */
 #define   PFI_CREDIT_31		(8 << 28)		/* chv only */
 #define   PFI_CREDIT(x)		(((x) - 8) << 28)	/* 8-15 */
 #define   PFI_CREDIT_RESEND	(1 << 27)
 #define   VGA_FAST_MODE_DISABLE	(1 << 14)
 
-#define GMBUSFREQ_VLV		(VLV_DISPLAY_BASE + 0x6510)
+#define GMBUSFREQ_VLV		_MMIO(VLV_DISPLAY_BASE + 0x6510)
 
 /*
  * Palette regs
@@ -2508,8 +2568,8 @@
 #define PALETTE_A_OFFSET 0xa000
 #define PALETTE_B_OFFSET 0xa800
 #define CHV_PALETTE_C_OFFSET 0xc000
-#define PALETTE(pipe, i) (dev_priv->info.palette_offsets[pipe] + \
-			  dev_priv->info.display_mmio_offset + (i) * 4)
+#define PALETTE(pipe, i) _MMIO(dev_priv->info.palette_offsets[pipe] +	\
+			      dev_priv->info.display_mmio_offset + (i) * 4)
 
 /* MCH MMIO space */
 
@@ -2527,37 +2587,37 @@
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
 
-#define CTG_STOLEN_RESERVED		(MCHBAR_MIRROR_BASE + 0x34)
-#define ELK_STOLEN_RESERVED		(MCHBAR_MIRROR_BASE + 0x48)
+#define CTG_STOLEN_RESERVED		_MMIO(MCHBAR_MIRROR_BASE + 0x34)
+#define ELK_STOLEN_RESERVED		_MMIO(MCHBAR_MIRROR_BASE + 0x48)
 #define G4X_STOLEN_RESERVED_ADDR1_MASK	(0xFFFF << 16)
 #define G4X_STOLEN_RESERVED_ADDR2_MASK	(0xFFF << 4)
 
 /* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */
-#define DCLK (MCHBAR_MIRROR_BASE_SNB + 0x5e04)
+#define DCLK _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5e04)
 
 /* 915-945 and GM965 MCH register controlling DRAM channel access */
-#define DCC			0x10200
+#define DCC			_MMIO(MCHBAR_MIRROR_BASE + 0x200)
 #define DCC_ADDRESSING_MODE_SINGLE_CHANNEL		(0 << 0)
 #define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC	(1 << 0)
 #define DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED	(2 << 0)
 #define DCC_ADDRESSING_MODE_MASK			(3 << 0)
 #define DCC_CHANNEL_XOR_DISABLE				(1 << 10)
 #define DCC_CHANNEL_XOR_BIT_17				(1 << 9)
-#define DCC2			0x10204
+#define DCC2			_MMIO(MCHBAR_MIRROR_BASE + 0x204)
 #define DCC2_MODIFIED_ENHANCED_DISABLE			(1 << 20)
 
 /* Pineview MCH register contains DDR3 setting */
-#define CSHRDDR3CTL            0x101a8
+#define CSHRDDR3CTL            _MMIO(MCHBAR_MIRROR_BASE + 0x1a8)
 #define CSHRDDR3CTL_DDR3       (1 << 2)
 
 /* 965 MCH register controlling DRAM channel configuration */
-#define C0DRB3			0x10206
-#define C1DRB3			0x10606
+#define C0DRB3			_MMIO(MCHBAR_MIRROR_BASE + 0x206)
+#define C1DRB3			_MMIO(MCHBAR_MIRROR_BASE + 0x606)
 
 /* snb MCH registers for reading the DRAM channel configuration */
-#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
-#define MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
-#define MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define MAD_DIMM_C0			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define MAD_DIMM_C1			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define MAD_DIMM_C2			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x500C)
 #define   MAD_DIMM_ECC_MASK		(0x3 << 24)
 #define   MAD_DIMM_ECC_OFF		(0x0 << 24)
 #define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
@@ -2577,14 +2637,14 @@
 #define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
 
 /* snb MCH registers for priority tuning */
-#define MCH_SSKPD			(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
+#define MCH_SSKPD			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
 #define   MCH_SSKPD_WM0_MASK		0x3f
 #define   MCH_SSKPD_WM0_VAL		0xc
 
-#define MCH_SECP_NRG_STTS		(MCHBAR_MIRROR_BASE_SNB + 0x592c)
+#define MCH_SECP_NRG_STTS		_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x592c)
 
 /* Clocking configuration register */
-#define CLKCFG			0x10c00
+#define CLKCFG			_MMIO(MCHBAR_MIRROR_BASE + 0xc00)
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
 #define CLKCFG_FSB_533					(1 << 0)	/* hrawclk 133 */
 #define CLKCFG_FSB_667					(3 << 0)	/* hrawclk 166 */
@@ -2600,26 +2660,26 @@
 #define CLKCFG_MEM_800					(3 << 4)
 #define CLKCFG_MEM_MASK					(7 << 4)
 
-#define HPLLVCO                 (MCHBAR_MIRROR_BASE + 0xc38)
-#define HPLLVCO_MOBILE          (MCHBAR_MIRROR_BASE + 0xc0f)
+#define HPLLVCO                 _MMIO(MCHBAR_MIRROR_BASE + 0xc38)
+#define HPLLVCO_MOBILE          _MMIO(MCHBAR_MIRROR_BASE + 0xc0f)
 
-#define TSC1			0x11001
+#define TSC1			_MMIO(0x11001)
 #define   TSE			(1<<0)
-#define TR1			0x11006
-#define TSFS			0x11020
+#define TR1			_MMIO(0x11006)
+#define TSFS			_MMIO(0x11020)
 #define   TSFS_SLOPE_MASK	0x0000ff00
 #define   TSFS_SLOPE_SHIFT	8
 #define   TSFS_INTR_MASK	0x000000ff
 
-#define CRSTANDVID		0x11100
-#define PXVFREQ(i)		(0x11110 + (i) * 4) /* P[0-15]VIDFREQ (0x1114c) (Ironlake) */
+#define CRSTANDVID		_MMIO(0x11100)
+#define PXVFREQ(fstart)		_MMIO(0x11110 + (fstart) * 4)  /* P[0-15]VIDFREQ (0x1114c) (Ironlake) */
 #define   PXVFREQ_PX_MASK	0x7f000000
 #define   PXVFREQ_PX_SHIFT	24
-#define VIDFREQ_BASE		0x11110
-#define VIDFREQ1		0x11110 /* VIDFREQ1-4 (0x1111c) (Cantiga) */
-#define VIDFREQ2		0x11114
-#define VIDFREQ3		0x11118
-#define VIDFREQ4		0x1111c
+#define VIDFREQ_BASE		_MMIO(0x11110)
+#define VIDFREQ1		_MMIO(0x11110) /* VIDFREQ1-4 (0x1111c) (Cantiga) */
+#define VIDFREQ2		_MMIO(0x11114)
+#define VIDFREQ3		_MMIO(0x11118)
+#define VIDFREQ4		_MMIO(0x1111c)
 #define   VIDFREQ_P0_MASK	0x1f000000
 #define   VIDFREQ_P0_SHIFT	24
 #define   VIDFREQ_P0_CSCLK_MASK	0x00f00000
@@ -2631,8 +2691,8 @@
 #define   VIDFREQ_P1_CSCLK_MASK	0x000000f0
 #define   VIDFREQ_P1_CSCLK_SHIFT 4
 #define   VIDFREQ_P1_CRCLK_MASK	0x0000000f
-#define INTTOEXT_BASE_ILK	0x11300
-#define INTTOEXT_BASE		0x11120 /* INTTOEXT1-8 (0x1113c) */
+#define INTTOEXT_BASE_ILK	_MMIO(0x11300)
+#define INTTOEXT_BASE		_MMIO(0x11120) /* INTTOEXT1-8 (0x1113c) */
 #define   INTTOEXT_MAP3_SHIFT	24
 #define   INTTOEXT_MAP3_MASK	(0x1f << INTTOEXT_MAP3_SHIFT)
 #define   INTTOEXT_MAP2_SHIFT	16
@@ -2641,7 +2701,7 @@
 #define   INTTOEXT_MAP1_MASK	(0x1f << INTTOEXT_MAP1_SHIFT)
 #define   INTTOEXT_MAP0_SHIFT	0
 #define   INTTOEXT_MAP0_MASK	(0x1f << INTTOEXT_MAP0_SHIFT)
-#define MEMSWCTL		0x11170 /* Ironlake only */
+#define MEMSWCTL		_MMIO(0x11170) /* Ironlake only */
 #define   MEMCTL_CMD_MASK	0xe000
 #define   MEMCTL_CMD_SHIFT	13
 #define   MEMCTL_CMD_RCLK_OFF	0
@@ -2656,8 +2716,8 @@
 #define   MEMCTL_FREQ_SHIFT	8
 #define   MEMCTL_SFCAVM		(1<<7)
 #define   MEMCTL_TGT_VID_MASK	0x007f
-#define MEMIHYST		0x1117c
-#define MEMINTREN		0x11180 /* 16 bits */
+#define MEMIHYST		_MMIO(0x1117c)
+#define MEMINTREN		_MMIO(0x11180) /* 16 bits */
 #define   MEMINT_RSEXIT_EN	(1<<8)
 #define   MEMINT_CX_SUPR_EN	(1<<7)
 #define   MEMINT_CONT_BUSY_EN	(1<<6)
@@ -2667,7 +2727,7 @@
 #define   MEMINT_UP_EVAL_EN	(1<<2)
 #define   MEMINT_DOWN_EVAL_EN	(1<<1)
 #define   MEMINT_SW_CMD_EN	(1<<0)
-#define MEMINTRSTR		0x11182 /* 16 bits */
+#define MEMINTRSTR		_MMIO(0x11182) /* 16 bits */
 #define   MEM_RSEXIT_MASK	0xc000
 #define   MEM_RSEXIT_SHIFT	14
 #define   MEM_CONT_BUSY_MASK	0x3000
@@ -2687,7 +2747,7 @@
 #define   MEM_INT_STEER_CMR	1
 #define   MEM_INT_STEER_SMI	2
 #define   MEM_INT_STEER_SCI	3
-#define MEMINTRSTS		0x11184
+#define MEMINTRSTS		_MMIO(0x11184)
 #define   MEMINT_RSEXIT		(1<<7)
 #define   MEMINT_CONT_BUSY	(1<<6)
 #define   MEMINT_AVG_BUSY	(1<<5)
@@ -2696,7 +2756,7 @@
 #define   MEMINT_UP_EVAL	(1<<2)
 #define   MEMINT_DOWN_EVAL	(1<<1)
 #define   MEMINT_SW_CMD		(1<<0)
-#define MEMMODECTL		0x11190
+#define MEMMODECTL		_MMIO(0x11190)
 #define   MEMMODE_BOOST_EN	(1<<31)
 #define   MEMMODE_BOOST_FREQ_MASK 0x0f000000 /* jitter for boost, 0-15 */
 #define   MEMMODE_BOOST_FREQ_SHIFT 24
@@ -2713,8 +2773,8 @@
 #define   MEMMODE_FMAX_MASK	0x000000f0 /* max jitter, 0-15 */
 #define   MEMMODE_FMAX_SHIFT	4
 #define   MEMMODE_FMIN_MASK	0x0000000f /* min jitter, 0-15 */
-#define RCBMAXAVG		0x1119c
-#define MEMSWCTL2		0x1119e /* Cantiga only */
+#define RCBMAXAVG		_MMIO(0x1119c)
+#define MEMSWCTL2		_MMIO(0x1119e) /* Cantiga only */
 #define   SWMEMCMD_RENDER_OFF	(0 << 13)
 #define   SWMEMCMD_RENDER_ON	(1 << 13)
 #define   SWMEMCMD_SWFREQ	(2 << 13)
@@ -2726,11 +2786,11 @@
 #define   SWFREQ_MASK		0x0380 /* P0-7 */
 #define   SWFREQ_SHIFT		7
 #define   TARVID_MASK		0x001f
-#define MEMSTAT_CTG		0x111a0
-#define RCBMINAVG		0x111a0
-#define RCUPEI			0x111b0
-#define RCDNEI			0x111b4
-#define RSTDBYCTL		0x111b8
+#define MEMSTAT_CTG		_MMIO(0x111a0)
+#define RCBMINAVG		_MMIO(0x111a0)
+#define RCUPEI			_MMIO(0x111b0)
+#define RCDNEI			_MMIO(0x111b4)
+#define RSTDBYCTL		_MMIO(0x111b8)
 #define   RS1EN			(1<<31)
 #define   RS2EN			(1<<30)
 #define   RS3EN			(1<<29)
@@ -2774,10 +2834,10 @@
 #define   RS_CSTATE_C367_RS2	(3<<4)
 #define   REDSAVES		(1<<3) /* no context save if was idle during rs0 */
 #define   REDRESTORES		(1<<2) /* no restore if was idle during rs0 */
-#define VIDCTL			0x111c0
-#define VIDSTS			0x111c8
-#define VIDSTART		0x111cc /* 8 bits */
-#define MEMSTAT_ILK			0x111f8
+#define VIDCTL			_MMIO(0x111c0)
+#define VIDSTS			_MMIO(0x111c8)
+#define VIDSTART		_MMIO(0x111cc) /* 8 bits */
+#define MEMSTAT_ILK		_MMIO(0x111f8)
 #define   MEMSTAT_VID_MASK	0x7f00
 #define   MEMSTAT_VID_SHIFT	8
 #define   MEMSTAT_PSTATE_MASK	0x00f8
@@ -2788,55 +2848,55 @@
 #define   MEMSTAT_SRC_CTL_TRB	1
 #define   MEMSTAT_SRC_CTL_THM	2
 #define   MEMSTAT_SRC_CTL_STDBY 3
-#define RCPREVBSYTUPAVG		0x113b8
-#define RCPREVBSYTDNAVG		0x113bc
-#define PMMISC			0x11214
+#define RCPREVBSYTUPAVG		_MMIO(0x113b8)
+#define RCPREVBSYTDNAVG		_MMIO(0x113bc)
+#define PMMISC			_MMIO(0x11214)
 #define   MCPPCE_EN		(1<<0) /* enable PM_MSG from PCH->MPC */
-#define SDEW			0x1124c
-#define CSIEW0			0x11250
-#define CSIEW1			0x11254
-#define CSIEW2			0x11258
-#define PEW(i)			(0x1125c + (i) * 4) /* 5 registers */
-#define DEW(i)			(0x11270 + (i) * 4) /* 3 registers */
-#define MCHAFE			0x112c0
-#define CSIEC			0x112e0
-#define DMIEC			0x112e4
-#define DDREC			0x112e8
-#define PEG0EC			0x112ec
-#define PEG1EC			0x112f0
-#define GFXEC			0x112f4
-#define RPPREVBSYTUPAVG		0x113b8
-#define RPPREVBSYTDNAVG		0x113bc
-#define ECR			0x11600
+#define SDEW			_MMIO(0x1124c)
+#define CSIEW0			_MMIO(0x11250)
+#define CSIEW1			_MMIO(0x11254)
+#define CSIEW2			_MMIO(0x11258)
+#define PEW(i)			_MMIO(0x1125c + (i) * 4) /* 5 registers */
+#define DEW(i)			_MMIO(0x11270 + (i) * 4) /* 3 registers */
+#define MCHAFE			_MMIO(0x112c0)
+#define CSIEC			_MMIO(0x112e0)
+#define DMIEC			_MMIO(0x112e4)
+#define DDREC			_MMIO(0x112e8)
+#define PEG0EC			_MMIO(0x112ec)
+#define PEG1EC			_MMIO(0x112f0)
+#define GFXEC			_MMIO(0x112f4)
+#define RPPREVBSYTUPAVG		_MMIO(0x113b8)
+#define RPPREVBSYTDNAVG		_MMIO(0x113bc)
+#define ECR			_MMIO(0x11600)
 #define   ECR_GPFE		(1<<31)
 #define   ECR_IMONE		(1<<30)
 #define   ECR_CAP_MASK		0x0000001f /* Event range, 0-31 */
-#define OGW0			0x11608
-#define OGW1			0x1160c
-#define EG0			0x11610
-#define EG1			0x11614
-#define EG2			0x11618
-#define EG3			0x1161c
-#define EG4			0x11620
-#define EG5			0x11624
-#define EG6			0x11628
-#define EG7			0x1162c
-#define PXW(i)			(0x11664 + (i) * 4) /* 4 registers */
-#define PXWL(i)			(0x11680 + (i) * 4) /* 8 registers */
-#define LCFUSE02		0x116c0
+#define OGW0			_MMIO(0x11608)
+#define OGW1			_MMIO(0x1160c)
+#define EG0			_MMIO(0x11610)
+#define EG1			_MMIO(0x11614)
+#define EG2			_MMIO(0x11618)
+#define EG3			_MMIO(0x1161c)
+#define EG4			_MMIO(0x11620)
+#define EG5			_MMIO(0x11624)
+#define EG6			_MMIO(0x11628)
+#define EG7			_MMIO(0x1162c)
+#define PXW(i)			_MMIO(0x11664 + (i) * 4) /* 4 registers */
+#define PXWL(i)			_MMIO(0x11680 + (i) * 8) /* 8 registers */
+#define LCFUSE02		_MMIO(0x116c0)
 #define   LCFUSE_HIV_MASK	0x000000ff
-#define CSIPLL0			0x12c10
-#define DDRMPLL1		0X12c20
-#define PEG_BAND_GAP_DATA	0x14d68
+#define CSIPLL0			_MMIO(0x12c10)
+#define DDRMPLL1		_MMIO(0X12c20)
+#define PEG_BAND_GAP_DATA	_MMIO(0x14d68)
 
-#define GEN6_GT_THREAD_STATUS_REG 0x13805c
+#define GEN6_GT_THREAD_STATUS_REG _MMIO(0x13805c)
 #define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7
 
-#define GEN6_GT_PERF_STATUS	(MCHBAR_MIRROR_BASE_SNB + 0x5948)
-#define BXT_GT_PERF_STATUS      (MCHBAR_MIRROR_BASE_SNB + 0x7070)
-#define GEN6_RP_STATE_LIMITS	(MCHBAR_MIRROR_BASE_SNB + 0x5994)
-#define GEN6_RP_STATE_CAP	(MCHBAR_MIRROR_BASE_SNB + 0x5998)
-#define BXT_RP_STATE_CAP        0x138170
+#define GEN6_GT_PERF_STATUS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5948)
+#define BXT_GT_PERF_STATUS      _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7070)
+#define GEN6_RP_STATE_LIMITS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994)
+#define GEN6_RP_STATE_CAP	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define BXT_RP_STATE_CAP        _MMIO(0x138170)
 
 #define INTERVAL_1_28_US(us)	(((us) * 100) >> 7)
 #define INTERVAL_1_33_US(us)	(((us) * 3)   >> 2)
@@ -2850,7 +2910,7 @@
 /*
  * Logical Context regs
  */
-#define CCID			0x2180
+#define CCID			_MMIO(0x2180)
 #define   CCID_EN		(1<<0)
 /*
  * Notes on SNB/IVB/VLV context size:
@@ -2865,7 +2925,7 @@
  * - GT1 size just indicates how much of render context
  *   doesn't need saving on GT1
  */
-#define CXT_SIZE		0x21a0
+#define CXT_SIZE		_MMIO(0x21a0)
 #define GEN6_CXT_POWER_SIZE(cxt_reg)	(((cxt_reg) >> 24) & 0x3f)
 #define GEN6_CXT_RING_SIZE(cxt_reg)	(((cxt_reg) >> 18) & 0x3f)
 #define GEN6_CXT_RENDER_SIZE(cxt_reg)	(((cxt_reg) >> 12) & 0x3f)
@@ -2874,7 +2934,7 @@
 #define GEN6_CXT_TOTAL_SIZE(cxt_reg)	(GEN6_CXT_RING_SIZE(cxt_reg) + \
 					GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \
 					GEN6_CXT_PIPELINE_SIZE(cxt_reg))
-#define GEN7_CXT_SIZE		0x21a8
+#define GEN7_CXT_SIZE		_MMIO(0x21a8)
 #define GEN7_CXT_POWER_SIZE(ctx_reg)	(((ctx_reg) >> 25) & 0x7f)
 #define GEN7_CXT_RING_SIZE(ctx_reg)	(((ctx_reg) >> 22) & 0x7)
 #define GEN7_CXT_RENDER_SIZE(ctx_reg)	(((ctx_reg) >> 16) & 0x3f)
@@ -2894,23 +2954,23 @@
 /* Same as Haswell, but 72064 bytes now. */
 #define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
 
-#define CHV_CLK_CTL1			0x101100
-#define VLV_CLK_CTL2			0x101104
+#define CHV_CLK_CTL1			_MMIO(0x101100)
+#define VLV_CLK_CTL2			_MMIO(0x101104)
 #define   CLK_CTL2_CZCOUNT_30NS_SHIFT	28
 
 /*
  * Overlay regs
  */
 
-#define OVADD			0x30000
-#define DOVSTA			0x30008
+#define OVADD			_MMIO(0x30000)
+#define DOVSTA			_MMIO(0x30008)
 #define OC_BUF			(0x3<<20)
-#define OGAMC5			0x30010
-#define OGAMC4			0x30014
-#define OGAMC3			0x30018
-#define OGAMC2			0x3001c
-#define OGAMC1			0x30020
-#define OGAMC0			0x30024
+#define OGAMC5			_MMIO(0x30010)
+#define OGAMC4			_MMIO(0x30014)
+#define OGAMC3			_MMIO(0x30018)
+#define OGAMC2			_MMIO(0x3001c)
+#define OGAMC1			_MMIO(0x30020)
+#define OGAMC0			_MMIO(0x30024)
 
 /*
  * Display engine regs
@@ -2970,28 +3030,18 @@
 #define _PIPE_CRC_RES_4_B_IVB		0x61070
 #define _PIPE_CRC_RES_5_B_IVB		0x61074
 
-#define PIPE_CRC_CTL(pipe) _TRANSCODER2(pipe, _PIPE_CRC_CTL_A)
-#define PIPE_CRC_RES_1_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_1_A_IVB)
-#define PIPE_CRC_RES_2_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_2_A_IVB)
-#define PIPE_CRC_RES_3_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_3_A_IVB)
-#define PIPE_CRC_RES_4_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_4_A_IVB)
-#define PIPE_CRC_RES_5_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_5_A_IVB)
+#define PIPE_CRC_CTL(pipe)		_MMIO_TRANS2(pipe, _PIPE_CRC_CTL_A)
+#define PIPE_CRC_RES_1_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_1_A_IVB)
+#define PIPE_CRC_RES_2_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_2_A_IVB)
+#define PIPE_CRC_RES_3_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_3_A_IVB)
+#define PIPE_CRC_RES_4_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_4_A_IVB)
+#define PIPE_CRC_RES_5_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_5_A_IVB)
 
-#define PIPE_CRC_RES_RED(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_RED_A)
-#define PIPE_CRC_RES_GREEN(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_GREEN_A)
-#define PIPE_CRC_RES_BLUE(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_BLUE_A)
-#define PIPE_CRC_RES_RES1_I915(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_RES1_A_I915)
-#define PIPE_CRC_RES_RES2_G4X(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_RES2_A_G4X)
+#define PIPE_CRC_RES_RED(pipe)		_MMIO_TRANS2(pipe, _PIPE_CRC_RES_RED_A)
+#define PIPE_CRC_RES_GREEN(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_GREEN_A)
+#define PIPE_CRC_RES_BLUE(pipe)		_MMIO_TRANS2(pipe, _PIPE_CRC_RES_BLUE_A)
+#define PIPE_CRC_RES_RES1_I915(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_RES1_A_I915)
+#define PIPE_CRC_RES_RES2_G4X(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_RES2_A_G4X)
 
 /* Pipe A timing regs */
 #define _HTOTAL_A	0x60000
@@ -3023,20 +3073,20 @@
 #define CHV_TRANSCODER_C_OFFSET 0x63000
 #define TRANSCODER_EDP_OFFSET 0x6f000
 
-#define _TRANSCODER2(pipe, reg) (dev_priv->info.trans_offsets[(pipe)] - \
+#define _MMIO_TRANS2(pipe, reg) _MMIO(dev_priv->info.trans_offsets[(pipe)] - \
 	dev_priv->info.trans_offsets[TRANSCODER_A] + (reg) + \
 	dev_priv->info.display_mmio_offset)
 
-#define HTOTAL(trans) _TRANSCODER2(trans, _HTOTAL_A)
-#define HBLANK(trans) _TRANSCODER2(trans, _HBLANK_A)
-#define HSYNC(trans) _TRANSCODER2(trans, _HSYNC_A)
-#define VTOTAL(trans) _TRANSCODER2(trans, _VTOTAL_A)
-#define VBLANK(trans) _TRANSCODER2(trans, _VBLANK_A)
-#define VSYNC(trans) _TRANSCODER2(trans, _VSYNC_A)
-#define BCLRPAT(trans) _TRANSCODER2(trans, _BCLRPAT_A)
-#define VSYNCSHIFT(trans) _TRANSCODER2(trans, _VSYNCSHIFT_A)
-#define PIPESRC(trans) _TRANSCODER2(trans, _PIPEASRC)
-#define PIPE_MULT(trans) _TRANSCODER2(trans, _PIPE_MULT_A)
+#define HTOTAL(trans)		_MMIO_TRANS2(trans, _HTOTAL_A)
+#define HBLANK(trans)		_MMIO_TRANS2(trans, _HBLANK_A)
+#define HSYNC(trans)		_MMIO_TRANS2(trans, _HSYNC_A)
+#define VTOTAL(trans)		_MMIO_TRANS2(trans, _VTOTAL_A)
+#define VBLANK(trans)		_MMIO_TRANS2(trans, _VBLANK_A)
+#define VSYNC(trans)		_MMIO_TRANS2(trans, _VSYNC_A)
+#define BCLRPAT(trans)		_MMIO_TRANS2(trans, _BCLRPAT_A)
+#define VSYNCSHIFT(trans)	_MMIO_TRANS2(trans, _VSYNCSHIFT_A)
+#define PIPESRC(trans)		_MMIO_TRANS2(trans, _PIPEASRC)
+#define PIPE_MULT(trans)	_MMIO_TRANS2(trans, _PIPE_MULT_A)
 
 /* VLV eDP PSR registers */
 #define _PSRCTLA				(VLV_DISPLAY_BASE + 0x60090)
@@ -3052,14 +3102,14 @@
 #define  VLV_EDP_PSR_DBL_FRAME			(1<<10)
 #define  VLV_EDP_PSR_FRAME_COUNT_MASK		(0xff<<16)
 #define  VLV_EDP_PSR_IDLE_FRAME_SHIFT		16
-#define VLV_PSRCTL(pipe) _PIPE(pipe, _PSRCTLA, _PSRCTLB)
+#define VLV_PSRCTL(pipe)	_MMIO_PIPE(pipe, _PSRCTLA, _PSRCTLB)
 
 #define _VSCSDPA			(VLV_DISPLAY_BASE + 0x600a0)
 #define _VSCSDPB			(VLV_DISPLAY_BASE + 0x610a0)
 #define  VLV_EDP_PSR_SDP_FREQ_MASK	(3<<30)
 #define  VLV_EDP_PSR_SDP_FREQ_ONCE	(1<<31)
 #define  VLV_EDP_PSR_SDP_FREQ_EVFRAME	(1<<30)
-#define VLV_VSCSDP(pipe)	_PIPE(pipe, _VSCSDPA, _VSCSDPB)
+#define VLV_VSCSDP(pipe)	_MMIO_PIPE(pipe, _VSCSDPA, _VSCSDPB)
 
 #define _PSRSTATA			(VLV_DISPLAY_BASE + 0x60094)
 #define _PSRSTATB			(VLV_DISPLAY_BASE + 0x61094)
@@ -3072,11 +3122,12 @@
 #define  VLV_EDP_PSR_ACTIVE_SF_UPDATE	(4<<0)
 #define  VLV_EDP_PSR_EXIT		(5<<0)
 #define  VLV_EDP_PSR_IN_TRANS		(1<<7)
-#define VLV_PSRSTAT(pipe) _PIPE(pipe, _PSRSTATA, _PSRSTATB)
+#define VLV_PSRSTAT(pipe)	_MMIO_PIPE(pipe, _PSRSTATA, _PSRSTATB)
 
 /* HSW+ eDP PSR registers */
-#define EDP_PSR_BASE(dev)                       (IS_HASWELL(dev) ? 0x64800 : 0x6f800)
-#define EDP_PSR_CTL(dev)			(EDP_PSR_BASE(dev) + 0)
+#define HSW_EDP_PSR_BASE	0x64800
+#define BDW_EDP_PSR_BASE	0x6f800
+#define EDP_PSR_CTL				_MMIO(dev_priv->psr_mmio_base + 0)
 #define   EDP_PSR_ENABLE			(1<<31)
 #define   BDW_PSR_SINGLE_FRAME			(1<<30)
 #define   EDP_PSR_LINK_STANDBY			(1<<27)
@@ -3099,14 +3150,10 @@
 #define   EDP_PSR_TP1_TIME_0us			(3<<4)
 #define   EDP_PSR_IDLE_FRAME_SHIFT		0
 
-#define EDP_PSR_AUX_CTL(dev)			(EDP_PSR_BASE(dev) + 0x10)
-#define EDP_PSR_AUX_DATA1(dev)			(EDP_PSR_BASE(dev) + 0x14)
-#define EDP_PSR_AUX_DATA2(dev)			(EDP_PSR_BASE(dev) + 0x18)
-#define EDP_PSR_AUX_DATA3(dev)			(EDP_PSR_BASE(dev) + 0x1c)
-#define EDP_PSR_AUX_DATA4(dev)			(EDP_PSR_BASE(dev) + 0x20)
-#define EDP_PSR_AUX_DATA5(dev)			(EDP_PSR_BASE(dev) + 0x24)
+#define EDP_PSR_AUX_CTL				_MMIO(dev_priv->psr_mmio_base + 0x10)
+#define EDP_PSR_AUX_DATA(i)			_MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */
 
-#define EDP_PSR_STATUS_CTL(dev)			(EDP_PSR_BASE(dev) + 0x40)
+#define EDP_PSR_STATUS_CTL			_MMIO(dev_priv->psr_mmio_base + 0x40)
 #define   EDP_PSR_STATUS_STATE_MASK		(7<<29)
 #define   EDP_PSR_STATUS_STATE_IDLE		(0<<29)
 #define   EDP_PSR_STATUS_STATE_SRDONACK		(1<<29)
@@ -3130,15 +3177,15 @@
 #define   EDP_PSR_STATUS_SENDING_TP1		(1<<4)
 #define   EDP_PSR_STATUS_IDLE_MASK		0xf
 
-#define EDP_PSR_PERF_CNT(dev)		(EDP_PSR_BASE(dev) + 0x44)
+#define EDP_PSR_PERF_CNT		_MMIO(dev_priv->psr_mmio_base + 0x44)
 #define   EDP_PSR_PERF_CNT_MASK		0xffffff
 
-#define EDP_PSR_DEBUG_CTL(dev)		(EDP_PSR_BASE(dev) + 0x60)
+#define EDP_PSR_DEBUG_CTL		_MMIO(dev_priv->psr_mmio_base + 0x60)
 #define   EDP_PSR_DEBUG_MASK_LPSP	(1<<27)
 #define   EDP_PSR_DEBUG_MASK_MEMUP	(1<<26)
 #define   EDP_PSR_DEBUG_MASK_HPD	(1<<25)
 
-#define EDP_PSR2_CTL			0x6f900
+#define EDP_PSR2_CTL			_MMIO(0x6f900)
 #define   EDP_PSR2_ENABLE		(1<<31)
 #define   EDP_SU_TRACK_ENABLE		(1<<30)
 #define   EDP_MAX_SU_DISABLE_TIME(t)	((t)<<20)
@@ -3153,9 +3200,9 @@
 #define   EDP_PSR2_IDLE_MASK		0xf
 
 /* VGA port control */
-#define ADPA			0x61100
-#define PCH_ADPA                0xe1100
-#define VLV_ADPA		(VLV_DISPLAY_BASE + ADPA)
+#define ADPA			_MMIO(0x61100)
+#define PCH_ADPA                _MMIO(0xe1100)
+#define VLV_ADPA		_MMIO(VLV_DISPLAY_BASE + 0x61100)
 
 #define   ADPA_DAC_ENABLE	(1<<31)
 #define   ADPA_DAC_DISABLE	0
@@ -3201,7 +3248,7 @@
 
 
 /* Hotplug control (945+ only) */
-#define PORT_HOTPLUG_EN		(dev_priv->info.display_mmio_offset + 0x61110)
+#define PORT_HOTPLUG_EN		_MMIO(dev_priv->info.display_mmio_offset + 0x61110)
 #define   PORTB_HOTPLUG_INT_EN			(1 << 29)
 #define   PORTC_HOTPLUG_INT_EN			(1 << 28)
 #define   PORTD_HOTPLUG_INT_EN			(1 << 27)
@@ -3231,7 +3278,7 @@
 #define CRT_HOTPLUG_DETECT_VOLTAGE_325MV	(0 << 2)
 #define CRT_HOTPLUG_DETECT_VOLTAGE_475MV	(1 << 2)
 
-#define PORT_HOTPLUG_STAT	(dev_priv->info.display_mmio_offset + 0x61114)
+#define PORT_HOTPLUG_STAT	_MMIO(dev_priv->info.display_mmio_offset + 0x61114)
 /*
  * HDMI/DP bits are gen4+
  *
@@ -3296,21 +3343,23 @@
 
 /* SDVO and HDMI port control.
  * The same register may be used for SDVO or HDMI */
-#define GEN3_SDVOB	0x61140
-#define GEN3_SDVOC	0x61160
+#define _GEN3_SDVOB	0x61140
+#define _GEN3_SDVOC	0x61160
+#define GEN3_SDVOB	_MMIO(_GEN3_SDVOB)
+#define GEN3_SDVOC	_MMIO(_GEN3_SDVOC)
 #define GEN4_HDMIB	GEN3_SDVOB
 #define GEN4_HDMIC	GEN3_SDVOC
-#define VLV_HDMIB	(VLV_DISPLAY_BASE + GEN4_HDMIB)
-#define VLV_HDMIC	(VLV_DISPLAY_BASE + GEN4_HDMIC)
-#define CHV_HDMID	(VLV_DISPLAY_BASE + 0x6116C)
-#define PCH_SDVOB	0xe1140
+#define VLV_HDMIB	_MMIO(VLV_DISPLAY_BASE + 0x61140)
+#define VLV_HDMIC	_MMIO(VLV_DISPLAY_BASE + 0x61160)
+#define CHV_HDMID	_MMIO(VLV_DISPLAY_BASE + 0x6116C)
+#define PCH_SDVOB	_MMIO(0xe1140)
 #define PCH_HDMIB	PCH_SDVOB
-#define PCH_HDMIC	0xe1150
-#define PCH_HDMID	0xe1160
+#define PCH_HDMIC	_MMIO(0xe1150)
+#define PCH_HDMID	_MMIO(0xe1160)
 
-#define PORT_DFT_I9XX				0x61150
+#define PORT_DFT_I9XX				_MMIO(0x61150)
 #define   DC_BALANCE_RESET			(1 << 25)
-#define PORT_DFT2_G4X		(dev_priv->info.display_mmio_offset + 0x61154)
+#define PORT_DFT2_G4X		_MMIO(dev_priv->info.display_mmio_offset + 0x61154)
 #define   DC_BALANCE_RESET_VLV			(1 << 31)
 #define   PIPE_SCRAMBLE_RESET_MASK		((1 << 14) | (0x3 << 0))
 #define   PIPE_C_SCRAMBLE_RESET			(1 << 14) /* chv */
@@ -3370,9 +3419,12 @@
 
 
 /* DVO port control */
-#define DVOA			0x61120
-#define DVOB			0x61140
-#define DVOC			0x61160
+#define _DVOA			0x61120
+#define DVOA			_MMIO(_DVOA)
+#define _DVOB			0x61140
+#define DVOB			_MMIO(_DVOB)
+#define _DVOC			0x61160
+#define DVOC			_MMIO(_DVOC)
 #define   DVO_ENABLE			(1 << 31)
 #define   DVO_PIPE_B_SELECT		(1 << 30)
 #define   DVO_PIPE_STALL_UNUSED		(0 << 28)
@@ -3397,14 +3449,14 @@
 #define   DVO_OUTPUT_CSTATE_PIXELS	(1 << 1)	/* SDG only */
 #define   DVO_OUTPUT_SOURCE_SIZE_PIXELS	(1 << 0)	/* SDG only */
 #define   DVO_PRESERVE_MASK		(0x7<<24)
-#define DVOA_SRCDIM		0x61124
-#define DVOB_SRCDIM		0x61144
-#define DVOC_SRCDIM		0x61164
+#define DVOA_SRCDIM		_MMIO(0x61124)
+#define DVOB_SRCDIM		_MMIO(0x61144)
+#define DVOC_SRCDIM		_MMIO(0x61164)
 #define   DVO_SRCDIM_HORIZONTAL_SHIFT	12
 #define   DVO_SRCDIM_VERTICAL_SHIFT	0
 
 /* LVDS port control */
-#define LVDS			0x61180
+#define LVDS			_MMIO(0x61180)
 /*
  * Enables the LVDS port.  This bit must be set before DPLLs are enabled, as
  * the DPLL semantics change when the LVDS is assigned to that pipe.
@@ -3454,13 +3506,13 @@
 #define   LVDS_B0B3_POWER_UP		(3 << 2)
 
 /* Video Data Island Packet control */
-#define VIDEO_DIP_DATA		0x61178
+#define VIDEO_DIP_DATA		_MMIO(0x61178)
 /* Read the description of VIDEO_DIP_DATA (before Haswell) or VIDEO_DIP_ECC
  * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
  * of the infoframe structure specified by CEA-861. */
 #define   VIDEO_DIP_DATA_SIZE	32
 #define   VIDEO_DIP_VSC_DATA_SIZE	36
-#define VIDEO_DIP_CTL		0x61170
+#define VIDEO_DIP_CTL		_MMIO(0x61170)
 /* Pre HSW: */
 #define   VIDEO_DIP_ENABLE		(1 << 31)
 #define   VIDEO_DIP_PORT(port)		((port) << 29)
@@ -3487,7 +3539,7 @@
 #define   VIDEO_DIP_ENABLE_SPD_HSW	(1 << 0)
 
 /* Panel power sequencing */
-#define PP_STATUS	0x61200
+#define PP_STATUS	_MMIO(0x61200)
 #define   PP_ON		(1 << 31)
 /*
  * Indicates that all dependencies of the panel are on:
@@ -3513,14 +3565,14 @@
 #define   PP_SEQUENCE_STATE_ON_S1_2	(0xa << 0)
 #define   PP_SEQUENCE_STATE_ON_S1_3	(0xb << 0)
 #define   PP_SEQUENCE_STATE_RESET	(0xf << 0)
-#define PP_CONTROL	0x61204
+#define PP_CONTROL	_MMIO(0x61204)
 #define   POWER_TARGET_ON	(1 << 0)
-#define PP_ON_DELAYS	0x61208
-#define PP_OFF_DELAYS	0x6120c
-#define PP_DIVISOR	0x61210
+#define PP_ON_DELAYS	_MMIO(0x61208)
+#define PP_OFF_DELAYS	_MMIO(0x6120c)
+#define PP_DIVISOR	_MMIO(0x61210)
 
 /* Panel fitting */
-#define PFIT_CONTROL	(dev_priv->info.display_mmio_offset + 0x61230)
+#define PFIT_CONTROL	_MMIO(dev_priv->info.display_mmio_offset + 0x61230)
 #define   PFIT_ENABLE		(1 << 31)
 #define   PFIT_PIPE_MASK	(3 << 29)
 #define   PFIT_PIPE_SHIFT	29
@@ -3538,7 +3590,7 @@
 #define   PFIT_SCALING_PROGRAMMED (1 << 26)
 #define   PFIT_SCALING_PILLAR	(2 << 26)
 #define   PFIT_SCALING_LETTER	(3 << 26)
-#define PFIT_PGM_RATIOS	(dev_priv->info.display_mmio_offset + 0x61234)
+#define PFIT_PGM_RATIOS _MMIO(dev_priv->info.display_mmio_offset + 0x61234)
 /* Pre-965 */
 #define		PFIT_VERT_SCALE_SHIFT		20
 #define		PFIT_VERT_SCALE_MASK		0xfff00000
@@ -3550,25 +3602,25 @@
 #define		PFIT_HORIZ_SCALE_SHIFT_965	0
 #define		PFIT_HORIZ_SCALE_MASK_965	0x00001fff
 
-#define PFIT_AUTO_RATIOS (dev_priv->info.display_mmio_offset + 0x61238)
+#define PFIT_AUTO_RATIOS _MMIO(dev_priv->info.display_mmio_offset + 0x61238)
 
 #define _VLV_BLC_PWM_CTL2_A (dev_priv->info.display_mmio_offset + 0x61250)
 #define _VLV_BLC_PWM_CTL2_B (dev_priv->info.display_mmio_offset + 0x61350)
-#define VLV_BLC_PWM_CTL2(pipe) _PIPE(pipe, _VLV_BLC_PWM_CTL2_A, \
-				     _VLV_BLC_PWM_CTL2_B)
+#define VLV_BLC_PWM_CTL2(pipe) _MMIO_PIPE(pipe, _VLV_BLC_PWM_CTL2_A, \
+					 _VLV_BLC_PWM_CTL2_B)
 
 #define _VLV_BLC_PWM_CTL_A (dev_priv->info.display_mmio_offset + 0x61254)
 #define _VLV_BLC_PWM_CTL_B (dev_priv->info.display_mmio_offset + 0x61354)
-#define VLV_BLC_PWM_CTL(pipe) _PIPE(pipe, _VLV_BLC_PWM_CTL_A, \
-				    _VLV_BLC_PWM_CTL_B)
+#define VLV_BLC_PWM_CTL(pipe) _MMIO_PIPE(pipe, _VLV_BLC_PWM_CTL_A, \
+					_VLV_BLC_PWM_CTL_B)
 
 #define _VLV_BLC_HIST_CTL_A (dev_priv->info.display_mmio_offset + 0x61260)
 #define _VLV_BLC_HIST_CTL_B (dev_priv->info.display_mmio_offset + 0x61360)
-#define VLV_BLC_HIST_CTL(pipe) _PIPE(pipe, _VLV_BLC_HIST_CTL_A, \
-				     _VLV_BLC_HIST_CTL_B)
+#define VLV_BLC_HIST_CTL(pipe) _MMIO_PIPE(pipe, _VLV_BLC_HIST_CTL_A, \
+					 _VLV_BLC_HIST_CTL_B)
 
 /* Backlight control */
-#define BLC_PWM_CTL2	(dev_priv->info.display_mmio_offset + 0x61250) /* 965+ only */
+#define BLC_PWM_CTL2	_MMIO(dev_priv->info.display_mmio_offset + 0x61250) /* 965+ only */
 #define   BLM_PWM_ENABLE		(1 << 31)
 #define   BLM_COMBINATION_MODE		(1 << 30) /* gen4 only */
 #define   BLM_PIPE_SELECT		(1 << 29)
@@ -3591,7 +3643,7 @@
 #define   BLM_PHASE_IN_COUNT_MASK	(0xff << 8)
 #define   BLM_PHASE_IN_INCR_SHIFT	(0)
 #define   BLM_PHASE_IN_INCR_MASK	(0xff << 0)
-#define BLC_PWM_CTL	(dev_priv->info.display_mmio_offset + 0x61254)
+#define BLC_PWM_CTL	_MMIO(dev_priv->info.display_mmio_offset + 0x61254)
 /*
  * This is the most significant 15 bits of the number of backlight cycles in a
  * complete cycle of the modulated backlight control.
@@ -3613,25 +3665,25 @@
 #define   BACKLIGHT_DUTY_CYCLE_MASK_PNV		(0xfffe)
 #define   BLM_POLARITY_PNV			(1 << 0) /* pnv only */
 
-#define BLC_HIST_CTL	(dev_priv->info.display_mmio_offset + 0x61260)
+#define BLC_HIST_CTL	_MMIO(dev_priv->info.display_mmio_offset + 0x61260)
 #define  BLM_HISTOGRAM_ENABLE			(1 << 31)
 
 /* New registers for PCH-split platforms. Safe where new bits show up, the
  * register layout machtes with gen4 BLC_PWM_CTL[12]. */
-#define BLC_PWM_CPU_CTL2	0x48250
-#define BLC_PWM_CPU_CTL		0x48254
+#define BLC_PWM_CPU_CTL2	_MMIO(0x48250)
+#define BLC_PWM_CPU_CTL		_MMIO(0x48254)
 
-#define HSW_BLC_PWM2_CTL	0x48350
+#define HSW_BLC_PWM2_CTL	_MMIO(0x48350)
 
 /* PCH CTL1 is totally different, all but the below bits are reserved. CTL2 is
  * like the normal CTL from gen4 and earlier. Hooray for confusing naming. */
-#define BLC_PWM_PCH_CTL1	0xc8250
+#define BLC_PWM_PCH_CTL1	_MMIO(0xc8250)
 #define   BLM_PCH_PWM_ENABLE			(1 << 31)
 #define   BLM_PCH_OVERRIDE_ENABLE		(1 << 30)
 #define   BLM_PCH_POLARITY			(1 << 29)
-#define BLC_PWM_PCH_CTL2	0xc8254
+#define BLC_PWM_PCH_CTL2	_MMIO(0xc8254)
 
-#define UTIL_PIN_CTL		0x48400
+#define UTIL_PIN_CTL		_MMIO(0x48400)
 #define   UTIL_PIN_ENABLE	(1 << 31)
 
 #define   UTIL_PIN_PIPE(x)     ((x) << 29)
@@ -3651,18 +3703,18 @@
 #define _BXT_BLC_PWM_FREQ2			0xC8354
 #define _BXT_BLC_PWM_DUTY2			0xC8358
 
-#define BXT_BLC_PWM_CTL(controller)    _PIPE(controller, \
+#define BXT_BLC_PWM_CTL(controller)    _MMIO_PIPE(controller,		\
 					_BXT_BLC_PWM_CTL1, _BXT_BLC_PWM_CTL2)
-#define BXT_BLC_PWM_FREQ(controller)   _PIPE(controller, \
+#define BXT_BLC_PWM_FREQ(controller)   _MMIO_PIPE(controller, \
 					_BXT_BLC_PWM_FREQ1, _BXT_BLC_PWM_FREQ2)
-#define BXT_BLC_PWM_DUTY(controller)   _PIPE(controller, \
+#define BXT_BLC_PWM_DUTY(controller)   _MMIO_PIPE(controller, \
 					_BXT_BLC_PWM_DUTY1, _BXT_BLC_PWM_DUTY2)
 
-#define PCH_GTC_CTL		0xe7000
+#define PCH_GTC_CTL		_MMIO(0xe7000)
 #define   PCH_GTC_ENABLE	(1 << 31)
 
 /* TV port control */
-#define TV_CTL			0x68000
+#define TV_CTL			_MMIO(0x68000)
 /* Enables the TV encoder */
 # define TV_ENC_ENABLE			(1 << 31)
 /* Sources the TV encoder input from pipe B instead of A. */
@@ -3729,7 +3781,7 @@
 # define TV_TEST_MODE_MONITOR_DETECT	(7 << 0)
 # define TV_TEST_MODE_MASK		(7 << 0)
 
-#define TV_DAC			0x68004
+#define TV_DAC			_MMIO(0x68004)
 # define TV_DAC_SAVE		0x00ffff00
 /*
  * Reports that DAC state change logic has reported change (RO).
@@ -3780,13 +3832,13 @@
  * where 2-bit exponents are unsigned n, and 3-bit exponents are signed n with
  * -1 (0x3) being the only legal negative value.
  */
-#define TV_CSC_Y		0x68010
+#define TV_CSC_Y		_MMIO(0x68010)
 # define TV_RY_MASK			0x07ff0000
 # define TV_RY_SHIFT			16
 # define TV_GY_MASK			0x00000fff
 # define TV_GY_SHIFT			0
 
-#define TV_CSC_Y2		0x68014
+#define TV_CSC_Y2		_MMIO(0x68014)
 # define TV_BY_MASK			0x07ff0000
 # define TV_BY_SHIFT			16
 /*
@@ -3797,13 +3849,13 @@
 # define TV_AY_MASK			0x000003ff
 # define TV_AY_SHIFT			0
 
-#define TV_CSC_U		0x68018
+#define TV_CSC_U		_MMIO(0x68018)
 # define TV_RU_MASK			0x07ff0000
 # define TV_RU_SHIFT			16
 # define TV_GU_MASK			0x000007ff
 # define TV_GU_SHIFT			0
 
-#define TV_CSC_U2		0x6801c
+#define TV_CSC_U2		_MMIO(0x6801c)
 # define TV_BU_MASK			0x07ff0000
 # define TV_BU_SHIFT			16
 /*
@@ -3814,13 +3866,13 @@
 # define TV_AU_MASK			0x000003ff
 # define TV_AU_SHIFT			0
 
-#define TV_CSC_V		0x68020
+#define TV_CSC_V		_MMIO(0x68020)
 # define TV_RV_MASK			0x0fff0000
 # define TV_RV_SHIFT			16
 # define TV_GV_MASK			0x000007ff
 # define TV_GV_SHIFT			0
 
-#define TV_CSC_V2		0x68024
+#define TV_CSC_V2		_MMIO(0x68024)
 # define TV_BV_MASK			0x07ff0000
 # define TV_BV_SHIFT			16
 /*
@@ -3831,7 +3883,7 @@
 # define TV_AV_MASK			0x000007ff
 # define TV_AV_SHIFT			0
 
-#define TV_CLR_KNOBS		0x68028
+#define TV_CLR_KNOBS		_MMIO(0x68028)
 /* 2s-complement brightness adjustment */
 # define TV_BRIGHTNESS_MASK		0xff000000
 # define TV_BRIGHTNESS_SHIFT		24
@@ -3845,7 +3897,7 @@
 # define TV_HUE_MASK			0x000000ff
 # define TV_HUE_SHIFT			0
 
-#define TV_CLR_LEVEL		0x6802c
+#define TV_CLR_LEVEL		_MMIO(0x6802c)
 /* Controls the DAC level for black */
 # define TV_BLACK_LEVEL_MASK		0x01ff0000
 # define TV_BLACK_LEVEL_SHIFT		16
@@ -3853,7 +3905,7 @@
 # define TV_BLANK_LEVEL_MASK		0x000001ff
 # define TV_BLANK_LEVEL_SHIFT		0
 
-#define TV_H_CTL_1		0x68030
+#define TV_H_CTL_1		_MMIO(0x68030)
 /* Number of pixels in the hsync. */
 # define TV_HSYNC_END_MASK		0x1fff0000
 # define TV_HSYNC_END_SHIFT		16
@@ -3861,7 +3913,7 @@
 # define TV_HTOTAL_MASK			0x00001fff
 # define TV_HTOTAL_SHIFT		0
 
-#define TV_H_CTL_2		0x68034
+#define TV_H_CTL_2		_MMIO(0x68034)
 /* Enables the colorburst (needed for non-component color) */
 # define TV_BURST_ENA			(1 << 31)
 /* Offset of the colorburst from the start of hsync, in pixels minus one. */
@@ -3871,7 +3923,7 @@
 # define TV_HBURST_LEN_SHIFT		0
 # define TV_HBURST_LEN_MASK		0x0001fff
 
-#define TV_H_CTL_3		0x68038
+#define TV_H_CTL_3		_MMIO(0x68038)
 /* End of hblank, measured in pixels minus one from start of hsync */
 # define TV_HBLANK_END_SHIFT		16
 # define TV_HBLANK_END_MASK		0x1fff0000
@@ -3879,7 +3931,7 @@
 # define TV_HBLANK_START_SHIFT		0
 # define TV_HBLANK_START_MASK		0x0001fff
 
-#define TV_V_CTL_1		0x6803c
+#define TV_V_CTL_1		_MMIO(0x6803c)
 /* XXX */
 # define TV_NBR_END_SHIFT		16
 # define TV_NBR_END_MASK		0x07ff0000
@@ -3890,7 +3942,7 @@
 # define TV_VI_END_F2_SHIFT		0
 # define TV_VI_END_F2_MASK		0x0000003f
 
-#define TV_V_CTL_2		0x68040
+#define TV_V_CTL_2		_MMIO(0x68040)
 /* Length of vsync, in half lines */
 # define TV_VSYNC_LEN_MASK		0x07ff0000
 # define TV_VSYNC_LEN_SHIFT		16
@@ -3906,7 +3958,7 @@
 # define TV_VSYNC_START_F2_MASK		0x0000007f
 # define TV_VSYNC_START_F2_SHIFT	0
 
-#define TV_V_CTL_3		0x68044
+#define TV_V_CTL_3		_MMIO(0x68044)
 /* Enables generation of the equalization signal */
 # define TV_EQUAL_ENA			(1 << 31)
 /* Length of vsync, in half lines */
@@ -3924,7 +3976,7 @@
 # define TV_VEQ_START_F2_MASK		0x000007f
 # define TV_VEQ_START_F2_SHIFT		0
 
-#define TV_V_CTL_4		0x68048
+#define TV_V_CTL_4		_MMIO(0x68048)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3938,7 +3990,7 @@
 # define TV_VBURST_END_F1_MASK		0x000000ff
 # define TV_VBURST_END_F1_SHIFT		0
 
-#define TV_V_CTL_5		0x6804c
+#define TV_V_CTL_5		_MMIO(0x6804c)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3952,7 +4004,7 @@
 # define TV_VBURST_END_F2_MASK		0x000000ff
 # define TV_VBURST_END_F2_SHIFT		0
 
-#define TV_V_CTL_6		0x68050
+#define TV_V_CTL_6		_MMIO(0x68050)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3966,7 +4018,7 @@
 # define TV_VBURST_END_F3_MASK		0x000000ff
 # define TV_VBURST_END_F3_SHIFT		0
 
-#define TV_V_CTL_7		0x68054
+#define TV_V_CTL_7		_MMIO(0x68054)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3980,7 +4032,7 @@
 # define TV_VBURST_END_F4_MASK		0x000000ff
 # define TV_VBURST_END_F4_SHIFT		0
 
-#define TV_SC_CTL_1		0x68060
+#define TV_SC_CTL_1		_MMIO(0x68060)
 /* Turns on the first subcarrier phase generation DDA */
 # define TV_SC_DDA1_EN			(1 << 31)
 /* Turns on the first subcarrier phase generation DDA */
@@ -4002,7 +4054,7 @@
 # define TV_SCDDA1_INC_MASK		0x00000fff
 # define TV_SCDDA1_INC_SHIFT		0
 
-#define TV_SC_CTL_2		0x68064
+#define TV_SC_CTL_2		_MMIO(0x68064)
 /* Sets the rollover for the second subcarrier phase generation DDA */
 # define TV_SCDDA2_SIZE_MASK		0x7fff0000
 # define TV_SCDDA2_SIZE_SHIFT		16
@@ -4010,7 +4062,7 @@
 # define TV_SCDDA2_INC_MASK		0x00007fff
 # define TV_SCDDA2_INC_SHIFT		0
 
-#define TV_SC_CTL_3		0x68068
+#define TV_SC_CTL_3		_MMIO(0x68068)
 /* Sets the rollover for the third subcarrier phase generation DDA */
 # define TV_SCDDA3_SIZE_MASK		0x7fff0000
 # define TV_SCDDA3_SIZE_SHIFT		16
@@ -4018,7 +4070,7 @@
 # define TV_SCDDA3_INC_MASK		0x00007fff
 # define TV_SCDDA3_INC_SHIFT		0
 
-#define TV_WIN_POS		0x68070
+#define TV_WIN_POS		_MMIO(0x68070)
 /* X coordinate of the display from the start of horizontal active */
 # define TV_XPOS_MASK			0x1fff0000
 # define TV_XPOS_SHIFT			16
@@ -4026,7 +4078,7 @@
 # define TV_YPOS_MASK			0x00000fff
 # define TV_YPOS_SHIFT			0
 
-#define TV_WIN_SIZE		0x68074
+#define TV_WIN_SIZE		_MMIO(0x68074)
 /* Horizontal size of the display window, measured in pixels*/
 # define TV_XSIZE_MASK			0x1fff0000
 # define TV_XSIZE_SHIFT			16
@@ -4038,7 +4090,7 @@
 # define TV_YSIZE_MASK			0x00000fff
 # define TV_YSIZE_SHIFT			0
 
-#define TV_FILTER_CTL_1		0x68080
+#define TV_FILTER_CTL_1		_MMIO(0x68080)
 /*
  * Enables automatic scaling calculation.
  *
@@ -4071,7 +4123,7 @@
 # define TV_HSCALE_FRAC_MASK		0x00003fff
 # define TV_HSCALE_FRAC_SHIFT		0
 
-#define TV_FILTER_CTL_2		0x68084
+#define TV_FILTER_CTL_2		_MMIO(0x68084)
 /*
  * Sets the integer part of the 3.15 fixed-point vertical scaling factor.
  *
@@ -4087,7 +4139,7 @@
 # define TV_VSCALE_FRAC_MASK		0x00007fff
 # define TV_VSCALE_FRAC_SHIFT		0
 
-#define TV_FILTER_CTL_3		0x68088
+#define TV_FILTER_CTL_3		_MMIO(0x68088)
 /*
  * Sets the integer part of the 3.15 fixed-point vertical scaling factor.
  *
@@ -4107,7 +4159,7 @@
 # define TV_VSCALE_IP_FRAC_MASK		0x00007fff
 # define TV_VSCALE_IP_FRAC_SHIFT		0
 
-#define TV_CC_CONTROL		0x68090
+#define TV_CC_CONTROL		_MMIO(0x68090)
 # define TV_CC_ENABLE			(1 << 31)
 /*
  * Specifies which field to send the CC data in.
@@ -4123,7 +4175,7 @@
 # define TV_CC_LINE_MASK		0x0000003f
 # define TV_CC_LINE_SHIFT		0
 
-#define TV_CC_DATA		0x68094
+#define TV_CC_DATA		_MMIO(0x68094)
 # define TV_CC_RDY			(1 << 31)
 /* Second word of CC data to be transmitted. */
 # define TV_CC_DATA_2_MASK		0x007f0000
@@ -4132,20 +4184,20 @@
 # define TV_CC_DATA_1_MASK		0x0000007f
 # define TV_CC_DATA_1_SHIFT		0
 
-#define TV_H_LUMA(i)		(0x68100 + (i) * 4) /* 60 registers */
-#define TV_H_CHROMA(i)		(0x68200 + (i) * 4) /* 60 registers */
-#define TV_V_LUMA(i)		(0x68300 + (i) * 4) /* 43 registers */
-#define TV_V_CHROMA(i)		(0x68400 + (i) * 4) /* 43 registers */
+#define TV_H_LUMA(i)		_MMIO(0x68100 + (i) * 4) /* 60 registers */
+#define TV_H_CHROMA(i)		_MMIO(0x68200 + (i) * 4) /* 60 registers */
+#define TV_V_LUMA(i)		_MMIO(0x68300 + (i) * 4) /* 43 registers */
+#define TV_V_CHROMA(i)		_MMIO(0x68400 + (i) * 4) /* 43 registers */
 
 /* Display Port */
-#define DP_A				0x64000 /* eDP */
-#define DP_B				0x64100
-#define DP_C				0x64200
-#define DP_D				0x64300
+#define DP_A			_MMIO(0x64000) /* eDP */
+#define DP_B			_MMIO(0x64100)
+#define DP_C			_MMIO(0x64200)
+#define DP_D			_MMIO(0x64300)
 
-#define VLV_DP_B			(VLV_DISPLAY_BASE + DP_B)
-#define VLV_DP_C			(VLV_DISPLAY_BASE + DP_C)
-#define CHV_DP_D			(VLV_DISPLAY_BASE + DP_D)
+#define VLV_DP_B		_MMIO(VLV_DISPLAY_BASE + 0x64100)
+#define VLV_DP_C		_MMIO(VLV_DISPLAY_BASE + 0x64200)
+#define CHV_DP_D		_MMIO(VLV_DISPLAY_BASE + 0x64300)
 
 #define   DP_PORT_EN			(1 << 31)
 #define   DP_PIPEB_SELECT		(1 << 30)
@@ -4199,7 +4251,7 @@
 
 /* eDP */
 #define   DP_PLL_FREQ_270MHZ		(0 << 16)
-#define   DP_PLL_FREQ_160MHZ		(1 << 16)
+#define   DP_PLL_FREQ_162MHZ		(1 << 16)
 #define   DP_PLL_FREQ_MASK		(3 << 16)
 
 /* locked once port is enabled */
@@ -4232,33 +4284,36 @@
  * is 20 bytes in each direction, hence the 5 fixed
  * data registers
  */
-#define DPA_AUX_CH_CTL			0x64010
-#define DPA_AUX_CH_DATA1		0x64014
-#define DPA_AUX_CH_DATA2		0x64018
-#define DPA_AUX_CH_DATA3		0x6401c
-#define DPA_AUX_CH_DATA4		0x64020
-#define DPA_AUX_CH_DATA5		0x64024
+#define _DPA_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64010)
+#define _DPA_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64014)
+#define _DPA_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64018)
+#define _DPA_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6401c)
+#define _DPA_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64020)
+#define _DPA_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64024)
 
-#define DPB_AUX_CH_CTL			0x64110
-#define DPB_AUX_CH_DATA1		0x64114
-#define DPB_AUX_CH_DATA2		0x64118
-#define DPB_AUX_CH_DATA3		0x6411c
-#define DPB_AUX_CH_DATA4		0x64120
-#define DPB_AUX_CH_DATA5		0x64124
+#define _DPB_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64110)
+#define _DPB_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64114)
+#define _DPB_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64118)
+#define _DPB_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6411c)
+#define _DPB_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64120)
+#define _DPB_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64124)
 
-#define DPC_AUX_CH_CTL			0x64210
-#define DPC_AUX_CH_DATA1		0x64214
-#define DPC_AUX_CH_DATA2		0x64218
-#define DPC_AUX_CH_DATA3		0x6421c
-#define DPC_AUX_CH_DATA4		0x64220
-#define DPC_AUX_CH_DATA5		0x64224
+#define _DPC_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64210)
+#define _DPC_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64214)
+#define _DPC_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64218)
+#define _DPC_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6421c)
+#define _DPC_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64220)
+#define _DPC_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64224)
 
-#define DPD_AUX_CH_CTL			0x64310
-#define DPD_AUX_CH_DATA1		0x64314
-#define DPD_AUX_CH_DATA2		0x64318
-#define DPD_AUX_CH_DATA3		0x6431c
-#define DPD_AUX_CH_DATA4		0x64320
-#define DPD_AUX_CH_DATA5		0x64324
+#define _DPD_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64310)
+#define _DPD_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64314)
+#define _DPD_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64318)
+#define _DPD_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6431c)
+#define _DPD_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64320)
+#define _DPD_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64324)
+
+#define DP_AUX_CH_CTL(port)	_MMIO_PORT(port, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL)
+#define DP_AUX_CH_DATA(port, i)	_MMIO(_PORT(port, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
 
 #define   DP_AUX_CH_CTL_SEND_BUSY	    (1 << 31)
 #define   DP_AUX_CH_CTL_DONE		    (1 << 30)
@@ -4335,10 +4390,10 @@
 #define _PIPEB_LINK_N_G4X	0x71064
 #define   PIPEA_DP_LINK_N_MASK			(0xffffff)
 
-#define PIPE_DATA_M_G4X(pipe) _PIPE(pipe, _PIPEA_DATA_M_G4X, _PIPEB_DATA_M_G4X)
-#define PIPE_DATA_N_G4X(pipe) _PIPE(pipe, _PIPEA_DATA_N_G4X, _PIPEB_DATA_N_G4X)
-#define PIPE_LINK_M_G4X(pipe) _PIPE(pipe, _PIPEA_LINK_M_G4X, _PIPEB_LINK_M_G4X)
-#define PIPE_LINK_N_G4X(pipe) _PIPE(pipe, _PIPEA_LINK_N_G4X, _PIPEB_LINK_N_G4X)
+#define PIPE_DATA_M_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_DATA_M_G4X, _PIPEB_DATA_M_G4X)
+#define PIPE_DATA_N_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_DATA_N_G4X, _PIPEB_DATA_N_G4X)
+#define PIPE_LINK_M_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_LINK_M_G4X, _PIPEB_LINK_M_G4X)
+#define PIPE_LINK_N_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_LINK_N_G4X, _PIPEB_LINK_N_G4X)
 
 /* Display & cursor control */
 
@@ -4454,15 +4509,15 @@
  */
 #define PIPE_EDP_OFFSET	0x7f000
 
-#define _PIPE2(pipe, reg) (dev_priv->info.pipe_offsets[pipe] - \
+#define _MMIO_PIPE2(pipe, reg) _MMIO(dev_priv->info.pipe_offsets[pipe] - \
 	dev_priv->info.pipe_offsets[PIPE_A] + (reg) + \
 	dev_priv->info.display_mmio_offset)
 
-#define PIPECONF(pipe) _PIPE2(pipe, _PIPEACONF)
-#define PIPEDSL(pipe)  _PIPE2(pipe, _PIPEADSL)
-#define PIPEFRAME(pipe) _PIPE2(pipe, _PIPEAFRAMEHIGH)
-#define PIPEFRAMEPIXEL(pipe)  _PIPE2(pipe, _PIPEAFRAMEPIXEL)
-#define PIPESTAT(pipe) _PIPE2(pipe, _PIPEASTAT)
+#define PIPECONF(pipe)		_MMIO_PIPE2(pipe, _PIPEACONF)
+#define PIPEDSL(pipe)		_MMIO_PIPE2(pipe, _PIPEADSL)
+#define PIPEFRAME(pipe)		_MMIO_PIPE2(pipe, _PIPEAFRAMEHIGH)
+#define PIPEFRAMEPIXEL(pipe)	_MMIO_PIPE2(pipe, _PIPEAFRAMEPIXEL)
+#define PIPESTAT(pipe)		_MMIO_PIPE2(pipe, _PIPEASTAT)
 
 #define _PIPE_MISC_A			0x70030
 #define _PIPE_MISC_B			0x71030
@@ -4474,9 +4529,9 @@
 #define   PIPEMISC_DITHER_ENABLE	(1<<4)
 #define   PIPEMISC_DITHER_TYPE_MASK	(3<<2)
 #define   PIPEMISC_DITHER_TYPE_SP	(0<<2)
-#define PIPEMISC(pipe) _PIPE2(pipe, _PIPE_MISC_A)
+#define PIPEMISC(pipe)			_MMIO_PIPE2(pipe, _PIPE_MISC_A)
 
-#define VLV_DPFLIPSTAT				(VLV_DISPLAY_BASE + 0x70028)
+#define VLV_DPFLIPSTAT				_MMIO(VLV_DISPLAY_BASE + 0x70028)
 #define   PIPEB_LINE_COMPARE_INT_EN		(1<<29)
 #define   PIPEB_HLINE_INT_EN			(1<<28)
 #define   PIPEB_VBLANK_INT_EN			(1<<27)
@@ -4497,7 +4552,7 @@
 #define   SPRITEE_FLIPDONE_INT_EN		(1<<9)
 #define   PLANEC_FLIPDONE_INT_EN		(1<<8)
 
-#define DPINVGTT				(VLV_DISPLAY_BASE + 0x7002c) /* VLV/CHV only */
+#define DPINVGTT				_MMIO(VLV_DISPLAY_BASE + 0x7002c) /* VLV/CHV only */
 #define   SPRITEF_INVALID_GTT_INT_EN		(1<<27)
 #define   SPRITEE_INVALID_GTT_INT_EN		(1<<26)
 #define   PLANEC_INVALID_GTT_INT_EN		(1<<25)
@@ -4527,7 +4582,7 @@
 #define   DPINVGTT_STATUS_MASK			0xff
 #define   DPINVGTT_STATUS_MASK_CHV		0xfff
 
-#define DSPARB			(dev_priv->info.display_mmio_offset + 0x70030)
+#define DSPARB			_MMIO(dev_priv->info.display_mmio_offset + 0x70030)
 #define   DSPARB_CSTART_MASK	(0x7f << 7)
 #define   DSPARB_CSTART_SHIFT	7
 #define   DSPARB_BSTART_MASK	(0x7f)
@@ -4542,7 +4597,7 @@
 #define   DSPARB_SPRITEC_MASK_VLV	(0xff << 16)
 #define   DSPARB_SPRITED_SHIFT_VLV	24
 #define   DSPARB_SPRITED_MASK_VLV	(0xff << 24)
-#define DSPARB2			(VLV_DISPLAY_BASE + 0x70060) /* vlv/chv */
+#define DSPARB2				_MMIO(VLV_DISPLAY_BASE + 0x70060) /* vlv/chv */
 #define   DSPARB_SPRITEA_HI_SHIFT_VLV	0
 #define   DSPARB_SPRITEA_HI_MASK_VLV	(0x1 << 0)
 #define   DSPARB_SPRITEB_HI_SHIFT_VLV	4
@@ -4555,14 +4610,14 @@
 #define   DSPARB_SPRITEE_HI_MASK_VLV	(0x1 << 16)
 #define   DSPARB_SPRITEF_HI_SHIFT_VLV	20
 #define   DSPARB_SPRITEF_HI_MASK_VLV	(0x1 << 20)
-#define DSPARB3			(VLV_DISPLAY_BASE + 0x7006c) /* chv */
+#define DSPARB3				_MMIO(VLV_DISPLAY_BASE + 0x7006c) /* chv */
 #define   DSPARB_SPRITEE_SHIFT_VLV	0
 #define   DSPARB_SPRITEE_MASK_VLV	(0xff << 0)
 #define   DSPARB_SPRITEF_SHIFT_VLV	8
 #define   DSPARB_SPRITEF_MASK_VLV	(0xff << 8)
 
 /* pnv/gen4/g4x/vlv/chv */
-#define DSPFW1			(dev_priv->info.display_mmio_offset + 0x70034)
+#define DSPFW1		_MMIO(dev_priv->info.display_mmio_offset + 0x70034)
 #define   DSPFW_SR_SHIFT		23
 #define   DSPFW_SR_MASK			(0x1ff<<23)
 #define   DSPFW_CURSORB_SHIFT		16
@@ -4573,7 +4628,7 @@
 #define   DSPFW_PLANEA_SHIFT		0
 #define   DSPFW_PLANEA_MASK		(0x7f<<0)
 #define   DSPFW_PLANEA_MASK_VLV		(0xff<<0) /* vlv/chv */
-#define DSPFW2			(dev_priv->info.display_mmio_offset + 0x70038)
+#define DSPFW2		_MMIO(dev_priv->info.display_mmio_offset + 0x70038)
 #define   DSPFW_FBC_SR_EN		(1<<31)	  /* g4x */
 #define   DSPFW_FBC_SR_SHIFT		28
 #define   DSPFW_FBC_SR_MASK		(0x7<<28) /* g4x */
@@ -4589,7 +4644,7 @@
 #define   DSPFW_SPRITEA_SHIFT		0
 #define   DSPFW_SPRITEA_MASK		(0x7f<<0) /* g4x */
 #define   DSPFW_SPRITEA_MASK_VLV	(0xff<<0) /* vlv/chv */
-#define DSPFW3			(dev_priv->info.display_mmio_offset + 0x7003c)
+#define DSPFW3		_MMIO(dev_priv->info.display_mmio_offset + 0x7003c)
 #define   DSPFW_HPLL_SR_EN		(1<<31)
 #define   PINEVIEW_SELF_REFRESH_EN	(1<<30)
 #define   DSPFW_CURSOR_SR_SHIFT		24
@@ -4600,14 +4655,14 @@
 #define   DSPFW_HPLL_SR_MASK		(0x1ff<<0)
 
 /* vlv/chv */
-#define DSPFW4			(VLV_DISPLAY_BASE + 0x70070)
+#define DSPFW4		_MMIO(VLV_DISPLAY_BASE + 0x70070)
 #define   DSPFW_SPRITEB_WM1_SHIFT	16
 #define   DSPFW_SPRITEB_WM1_MASK	(0xff<<16)
 #define   DSPFW_CURSORA_WM1_SHIFT	8
 #define   DSPFW_CURSORA_WM1_MASK	(0x3f<<8)
 #define   DSPFW_SPRITEA_WM1_SHIFT	0
 #define   DSPFW_SPRITEA_WM1_MASK	(0xff<<0)
-#define DSPFW5			(VLV_DISPLAY_BASE + 0x70074)
+#define DSPFW5		_MMIO(VLV_DISPLAY_BASE + 0x70074)
 #define   DSPFW_PLANEB_WM1_SHIFT	24
 #define   DSPFW_PLANEB_WM1_MASK		(0xff<<24)
 #define   DSPFW_PLANEA_WM1_SHIFT	16
@@ -4616,11 +4671,11 @@
 #define   DSPFW_CURSORB_WM1_MASK	(0x3f<<8)
 #define   DSPFW_CURSOR_SR_WM1_SHIFT	0
 #define   DSPFW_CURSOR_SR_WM1_MASK	(0x3f<<0)
-#define DSPFW6			(VLV_DISPLAY_BASE + 0x70078)
+#define DSPFW6		_MMIO(VLV_DISPLAY_BASE + 0x70078)
 #define   DSPFW_SR_WM1_SHIFT		0
 #define   DSPFW_SR_WM1_MASK		(0x1ff<<0)
-#define DSPFW7			(VLV_DISPLAY_BASE + 0x7007c)
-#define DSPFW7_CHV		(VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */
+#define DSPFW7		_MMIO(VLV_DISPLAY_BASE + 0x7007c)
+#define DSPFW7_CHV	_MMIO(VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */
 #define   DSPFW_SPRITED_WM1_SHIFT	24
 #define   DSPFW_SPRITED_WM1_MASK	(0xff<<24)
 #define   DSPFW_SPRITED_SHIFT		16
@@ -4629,7 +4684,7 @@
 #define   DSPFW_SPRITEC_WM1_MASK	(0xff<<8)
 #define   DSPFW_SPRITEC_SHIFT		0
 #define   DSPFW_SPRITEC_MASK_VLV	(0xff<<0)
-#define DSPFW8_CHV		(VLV_DISPLAY_BASE + 0x700b8)
+#define DSPFW8_CHV	_MMIO(VLV_DISPLAY_BASE + 0x700b8)
 #define   DSPFW_SPRITEF_WM1_SHIFT	24
 #define   DSPFW_SPRITEF_WM1_MASK	(0xff<<24)
 #define   DSPFW_SPRITEF_SHIFT		16
@@ -4638,7 +4693,7 @@
 #define   DSPFW_SPRITEE_WM1_MASK	(0xff<<8)
 #define   DSPFW_SPRITEE_SHIFT		0
 #define   DSPFW_SPRITEE_MASK_VLV	(0xff<<0)
-#define DSPFW9_CHV		(VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */
+#define DSPFW9_CHV	_MMIO(VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */
 #define   DSPFW_PLANEC_WM1_SHIFT	24
 #define   DSPFW_PLANEC_WM1_MASK		(0xff<<24)
 #define   DSPFW_PLANEC_SHIFT		16
@@ -4649,7 +4704,7 @@
 #define   DSPFW_CURSORC_MASK		(0x3f<<0)
 
 /* vlv/chv high order bits */
-#define DSPHOWM			(VLV_DISPLAY_BASE + 0x70064)
+#define DSPHOWM		_MMIO(VLV_DISPLAY_BASE + 0x70064)
 #define   DSPFW_SR_HI_SHIFT		24
 #define   DSPFW_SR_HI_MASK		(3<<24) /* 2 bits for chv, 1 for vlv */
 #define   DSPFW_SPRITEF_HI_SHIFT	23
@@ -4670,7 +4725,7 @@
 #define   DSPFW_SPRITEA_HI_MASK		(1<<4)
 #define   DSPFW_PLANEA_HI_SHIFT		0
 #define   DSPFW_PLANEA_HI_MASK		(1<<0)
-#define DSPHOWM1		(VLV_DISPLAY_BASE + 0x70068)
+#define DSPHOWM1	_MMIO(VLV_DISPLAY_BASE + 0x70068)
 #define   DSPFW_SR_WM1_HI_SHIFT		24
 #define   DSPFW_SR_WM1_HI_MASK		(3<<24) /* 2 bits for chv, 1 for vlv */
 #define   DSPFW_SPRITEF_WM1_HI_SHIFT	23
@@ -4693,7 +4748,7 @@
 #define   DSPFW_PLANEA_WM1_HI_MASK	(1<<0)
 
 /* drain latency register values*/
-#define VLV_DDL(pipe)			(VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe))
+#define VLV_DDL(pipe)			_MMIO(VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe))
 #define DDL_CURSOR_SHIFT		24
 #define DDL_SPRITE_SHIFT(sprite)	(8+8*(sprite))
 #define DDL_PLANE_SHIFT			0
@@ -4701,7 +4756,7 @@
 #define DDL_PRECISION_LOW		(0<<7)
 #define DRAIN_LATENCY_MASK		0x7f
 
-#define CBR1_VLV			(VLV_DISPLAY_BASE + 0x70400)
+#define CBR1_VLV			_MMIO(VLV_DISPLAY_BASE + 0x70400)
 #define  CBR_PND_DEADLINE_DISABLE	(1<<31)
 #define  CBR_PWM_CLOCK_MUX_SELECT	(1<<30)
 
@@ -4739,51 +4794,51 @@
 #define I965_CURSOR_DFT_WM	8
 
 /* Watermark register definitions for SKL */
-#define CUR_WM_A_0		0x70140
-#define CUR_WM_B_0		0x71140
-#define PLANE_WM_1_A_0		0x70240
-#define PLANE_WM_1_B_0		0x71240
-#define PLANE_WM_2_A_0		0x70340
-#define PLANE_WM_2_B_0		0x71340
-#define PLANE_WM_TRANS_1_A_0	0x70268
-#define PLANE_WM_TRANS_1_B_0	0x71268
-#define PLANE_WM_TRANS_2_A_0	0x70368
-#define PLANE_WM_TRANS_2_B_0	0x71368
-#define CUR_WM_TRANS_A_0	0x70168
-#define CUR_WM_TRANS_B_0	0x71168
+#define _CUR_WM_A_0		0x70140
+#define _CUR_WM_B_0		0x71140
+#define _PLANE_WM_1_A_0		0x70240
+#define _PLANE_WM_1_B_0		0x71240
+#define _PLANE_WM_2_A_0		0x70340
+#define _PLANE_WM_2_B_0		0x71340
+#define _PLANE_WM_TRANS_1_A_0	0x70268
+#define _PLANE_WM_TRANS_1_B_0	0x71268
+#define _PLANE_WM_TRANS_2_A_0	0x70368
+#define _PLANE_WM_TRANS_2_B_0	0x71368
+#define _CUR_WM_TRANS_A_0	0x70168
+#define _CUR_WM_TRANS_B_0	0x71168
 #define   PLANE_WM_EN		(1 << 31)
 #define   PLANE_WM_LINES_SHIFT	14
 #define   PLANE_WM_LINES_MASK	0x1f
 #define   PLANE_WM_BLOCKS_MASK	0x3ff
 
-#define CUR_WM_0(pipe) _PIPE(pipe, CUR_WM_A_0, CUR_WM_B_0)
-#define CUR_WM(pipe, level) (CUR_WM_0(pipe) + ((4) * (level)))
-#define CUR_WM_TRANS(pipe) _PIPE(pipe, CUR_WM_TRANS_A_0, CUR_WM_TRANS_B_0)
+#define _CUR_WM_0(pipe) _PIPE(pipe, _CUR_WM_A_0, _CUR_WM_B_0)
+#define CUR_WM(pipe, level) _MMIO(_CUR_WM_0(pipe) + ((4) * (level)))
+#define CUR_WM_TRANS(pipe) _MMIO_PIPE(pipe, _CUR_WM_TRANS_A_0, _CUR_WM_TRANS_B_0)
 
-#define _PLANE_WM_1(pipe) _PIPE(pipe, PLANE_WM_1_A_0, PLANE_WM_1_B_0)
-#define _PLANE_WM_2(pipe) _PIPE(pipe, PLANE_WM_2_A_0, PLANE_WM_2_B_0)
+#define _PLANE_WM_1(pipe) _PIPE(pipe, _PLANE_WM_1_A_0, _PLANE_WM_1_B_0)
+#define _PLANE_WM_2(pipe) _PIPE(pipe, _PLANE_WM_2_A_0, _PLANE_WM_2_B_0)
 #define _PLANE_WM_BASE(pipe, plane)	\
 			_PLANE(plane, _PLANE_WM_1(pipe), _PLANE_WM_2(pipe))
 #define PLANE_WM(pipe, plane, level)	\
-			(_PLANE_WM_BASE(pipe, plane) + ((4) * (level)))
+			_MMIO(_PLANE_WM_BASE(pipe, plane) + ((4) * (level)))
 #define _PLANE_WM_TRANS_1(pipe)	\
-			_PIPE(pipe, PLANE_WM_TRANS_1_A_0, PLANE_WM_TRANS_1_B_0)
+			_PIPE(pipe, _PLANE_WM_TRANS_1_A_0, _PLANE_WM_TRANS_1_B_0)
 #define _PLANE_WM_TRANS_2(pipe)	\
-			_PIPE(pipe, PLANE_WM_TRANS_2_A_0, PLANE_WM_TRANS_2_B_0)
+			_PIPE(pipe, _PLANE_WM_TRANS_2_A_0, _PLANE_WM_TRANS_2_B_0)
 #define PLANE_WM_TRANS(pipe, plane)	\
-		_PLANE(plane, _PLANE_WM_TRANS_1(pipe), _PLANE_WM_TRANS_2(pipe))
+	_MMIO(_PLANE(plane, _PLANE_WM_TRANS_1(pipe), _PLANE_WM_TRANS_2(pipe)))
 
 /* define the Watermark register on Ironlake */
-#define WM0_PIPEA_ILK		0x45100
+#define WM0_PIPEA_ILK		_MMIO(0x45100)
 #define  WM0_PIPE_PLANE_MASK	(0xffff<<16)
 #define  WM0_PIPE_PLANE_SHIFT	16
 #define  WM0_PIPE_SPRITE_MASK	(0xff<<8)
 #define  WM0_PIPE_SPRITE_SHIFT	8
 #define  WM0_PIPE_CURSOR_MASK	(0xff)
 
-#define WM0_PIPEB_ILK		0x45104
-#define WM0_PIPEC_IVB		0x45200
-#define WM1_LP_ILK		0x45108
+#define WM0_PIPEB_ILK		_MMIO(0x45104)
+#define WM0_PIPEC_IVB		_MMIO(0x45200)
+#define WM1_LP_ILK		_MMIO(0x45108)
 #define  WM1_LP_SR_EN		(1<<31)
 #define  WM1_LP_LATENCY_SHIFT	24
 #define  WM1_LP_LATENCY_MASK	(0x7f<<24)
@@ -4793,13 +4848,13 @@
 #define  WM1_LP_SR_MASK		(0x7ff<<8)
 #define  WM1_LP_SR_SHIFT	8
 #define  WM1_LP_CURSOR_MASK	(0xff)
-#define WM2_LP_ILK		0x4510c
+#define WM2_LP_ILK		_MMIO(0x4510c)
 #define  WM2_LP_EN		(1<<31)
-#define WM3_LP_ILK		0x45110
+#define WM3_LP_ILK		_MMIO(0x45110)
 #define  WM3_LP_EN		(1<<31)
-#define WM1S_LP_ILK		0x45120
-#define WM2S_LP_IVB		0x45124
-#define WM3S_LP_IVB		0x45128
+#define WM1S_LP_ILK		_MMIO(0x45120)
+#define WM2S_LP_IVB		_MMIO(0x45124)
+#define WM3S_LP_IVB		_MMIO(0x45128)
 #define  WM1S_LP_EN		(1<<31)
 
 #define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
@@ -4807,7 +4862,7 @@
 	 ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
 
 /* Memory latency timer register */
-#define MLTR_ILK		0x11222
+#define MLTR_ILK		_MMIO(0x11222)
 #define  MLTR_WM1_SHIFT		0
 #define  MLTR_WM2_SHIFT		8
 /* the unit of memory self-refresh latency time is 0.5us */
@@ -4815,7 +4870,7 @@
 
 
 /* the address where we get all kinds of latency value */
-#define SSKPD			0x5d10
+#define SSKPD			_MMIO(0x5d10)
 #define SSKPD_WM_MASK		0x3f
 #define SSKPD_WM0_SHIFT		0
 #define SSKPD_WM1_SHIFT		8
@@ -4848,8 +4903,8 @@
 /* GM45+ just has to be different */
 #define _PIPEA_FRMCOUNT_G4X	0x70040
 #define _PIPEA_FLIPCOUNT_G4X	0x70044
-#define PIPE_FRMCOUNT_G4X(pipe) _PIPE2(pipe, _PIPEA_FRMCOUNT_G4X)
-#define PIPE_FLIPCOUNT_G4X(pipe) _PIPE2(pipe, _PIPEA_FLIPCOUNT_G4X)
+#define PIPE_FRMCOUNT_G4X(pipe) _MMIO_PIPE2(pipe, _PIPEA_FRMCOUNT_G4X)
+#define PIPE_FLIPCOUNT_G4X(pipe) _MMIO_PIPE2(pipe, _PIPEA_FLIPCOUNT_G4X)
 
 /* Cursor A & B regs */
 #define _CURACNTR		0x70080
@@ -4887,7 +4942,7 @@
 #define   CURSOR_POS_SIGN       0x8000
 #define   CURSOR_X_SHIFT        0
 #define   CURSOR_Y_SHIFT        16
-#define CURSIZE			0x700a0
+#define CURSIZE			_MMIO(0x700a0)
 #define _CURBCNTR		0x700c0
 #define _CURBBASE		0x700c4
 #define _CURBPOS		0x700c8
@@ -4896,7 +4951,7 @@
 #define _CURBBASE_IVB		0x71084
 #define _CURBPOS_IVB		0x71088
 
-#define _CURSOR2(pipe, reg) (dev_priv->info.cursor_offsets[(pipe)] - \
+#define _CURSOR2(pipe, reg) _MMIO(dev_priv->info.cursor_offsets[(pipe)] - \
 	dev_priv->info.cursor_offsets[PIPE_A] + (reg) + \
 	dev_priv->info.display_mmio_offset)
 
@@ -4957,16 +5012,16 @@
 #define _DSPAOFFSET				0x701A4 /* HSW */
 #define _DSPASURFLIVE				0x701AC
 
-#define DSPCNTR(plane) _PIPE2(plane, _DSPACNTR)
-#define DSPADDR(plane) _PIPE2(plane, _DSPAADDR)
-#define DSPSTRIDE(plane) _PIPE2(plane, _DSPASTRIDE)
-#define DSPPOS(plane) _PIPE2(plane, _DSPAPOS)
-#define DSPSIZE(plane) _PIPE2(plane, _DSPASIZE)
-#define DSPSURF(plane) _PIPE2(plane, _DSPASURF)
-#define DSPTILEOFF(plane) _PIPE2(plane, _DSPATILEOFF)
-#define DSPLINOFF(plane) DSPADDR(plane)
-#define DSPOFFSET(plane) _PIPE2(plane, _DSPAOFFSET)
-#define DSPSURFLIVE(plane) _PIPE2(plane, _DSPASURFLIVE)
+#define DSPCNTR(plane)		_MMIO_PIPE2(plane, _DSPACNTR)
+#define DSPADDR(plane)		_MMIO_PIPE2(plane, _DSPAADDR)
+#define DSPSTRIDE(plane)	_MMIO_PIPE2(plane, _DSPASTRIDE)
+#define DSPPOS(plane)		_MMIO_PIPE2(plane, _DSPAPOS)
+#define DSPSIZE(plane)		_MMIO_PIPE2(plane, _DSPASIZE)
+#define DSPSURF(plane)		_MMIO_PIPE2(plane, _DSPASURF)
+#define DSPTILEOFF(plane)	_MMIO_PIPE2(plane, _DSPATILEOFF)
+#define DSPLINOFF(plane)	DSPADDR(plane)
+#define DSPOFFSET(plane)	_MMIO_PIPE2(plane, _DSPAOFFSET)
+#define DSPSURFLIVE(plane)	_MMIO_PIPE2(plane, _DSPASURFLIVE)
 
 /* CHV pipe B blender and primary plane */
 #define _CHV_BLEND_A		0x60a00
@@ -4980,11 +5035,11 @@
 #define _PRIMCNSTALPHA_A	0x60a10
 #define   PRIM_CONST_ALPHA_ENABLE	(1<<31)
 
-#define CHV_BLEND(pipe) _TRANSCODER2(pipe, _CHV_BLEND_A)
-#define CHV_CANVAS(pipe) _TRANSCODER2(pipe, _CHV_CANVAS_A)
-#define PRIMPOS(plane) _TRANSCODER2(plane, _PRIMPOS_A)
-#define PRIMSIZE(plane) _TRANSCODER2(plane, _PRIMSIZE_A)
-#define PRIMCNSTALPHA(plane) _TRANSCODER2(plane, _PRIMCNSTALPHA_A)
+#define CHV_BLEND(pipe)		_MMIO_TRANS2(pipe, _CHV_BLEND_A)
+#define CHV_CANVAS(pipe)	_MMIO_TRANS2(pipe, _CHV_CANVAS_A)
+#define PRIMPOS(plane)		_MMIO_TRANS2(plane, _PRIMPOS_A)
+#define PRIMSIZE(plane)		_MMIO_TRANS2(plane, _PRIMSIZE_A)
+#define PRIMCNSTALPHA(plane)	_MMIO_TRANS2(plane, _PRIMCNSTALPHA_A)
 
 /* Display/Sprite base address macros */
 #define DISP_BASEADDR_MASK	(0xfffff000)
@@ -5002,9 +5057,10 @@
  * [10:1f] all
  * [30:32] all
  */
-#define SWF0(i)	(dev_priv->info.display_mmio_offset + 0x70410 + (i) * 4)
-#define SWF1(i)	(dev_priv->info.display_mmio_offset + 0x71410 + (i) * 4)
-#define SWF3(i)	(dev_priv->info.display_mmio_offset + 0x72414 + (i) * 4)
+#define SWF0(i)	_MMIO(dev_priv->info.display_mmio_offset + 0x70410 + (i) * 4)
+#define SWF1(i)	_MMIO(dev_priv->info.display_mmio_offset + 0x71410 + (i) * 4)
+#define SWF3(i)	_MMIO(dev_priv->info.display_mmio_offset + 0x72414 + (i) * 4)
+#define SWF_ILK(i)	_MMIO(0x4F000 + (i) * 4)
 
 /* Pipe B */
 #define _PIPEBDSL		(dev_priv->info.display_mmio_offset + 0x71000)
@@ -5086,18 +5142,18 @@
 #define _DVSBSCALE		0x73204
 #define _DVSBGAMC		0x73300
 
-#define DVSCNTR(pipe) _PIPE(pipe, _DVSACNTR, _DVSBCNTR)
-#define DVSLINOFF(pipe) _PIPE(pipe, _DVSALINOFF, _DVSBLINOFF)
-#define DVSSTRIDE(pipe) _PIPE(pipe, _DVSASTRIDE, _DVSBSTRIDE)
-#define DVSPOS(pipe) _PIPE(pipe, _DVSAPOS, _DVSBPOS)
-#define DVSSURF(pipe) _PIPE(pipe, _DVSASURF, _DVSBSURF)
-#define DVSKEYMAX(pipe) _PIPE(pipe, _DVSAKEYMAXVAL, _DVSBKEYMAXVAL)
-#define DVSSIZE(pipe) _PIPE(pipe, _DVSASIZE, _DVSBSIZE)
-#define DVSSCALE(pipe) _PIPE(pipe, _DVSASCALE, _DVSBSCALE)
-#define DVSTILEOFF(pipe) _PIPE(pipe, _DVSATILEOFF, _DVSBTILEOFF)
-#define DVSKEYVAL(pipe) _PIPE(pipe, _DVSAKEYVAL, _DVSBKEYVAL)
-#define DVSKEYMSK(pipe) _PIPE(pipe, _DVSAKEYMSK, _DVSBKEYMSK)
-#define DVSSURFLIVE(pipe) _PIPE(pipe, _DVSASURFLIVE, _DVSBSURFLIVE)
+#define DVSCNTR(pipe) _MMIO_PIPE(pipe, _DVSACNTR, _DVSBCNTR)
+#define DVSLINOFF(pipe) _MMIO_PIPE(pipe, _DVSALINOFF, _DVSBLINOFF)
+#define DVSSTRIDE(pipe) _MMIO_PIPE(pipe, _DVSASTRIDE, _DVSBSTRIDE)
+#define DVSPOS(pipe) _MMIO_PIPE(pipe, _DVSAPOS, _DVSBPOS)
+#define DVSSURF(pipe) _MMIO_PIPE(pipe, _DVSASURF, _DVSBSURF)
+#define DVSKEYMAX(pipe) _MMIO_PIPE(pipe, _DVSAKEYMAXVAL, _DVSBKEYMAXVAL)
+#define DVSSIZE(pipe) _MMIO_PIPE(pipe, _DVSASIZE, _DVSBSIZE)
+#define DVSSCALE(pipe) _MMIO_PIPE(pipe, _DVSASCALE, _DVSBSCALE)
+#define DVSTILEOFF(pipe) _MMIO_PIPE(pipe, _DVSATILEOFF, _DVSBTILEOFF)
+#define DVSKEYVAL(pipe) _MMIO_PIPE(pipe, _DVSAKEYVAL, _DVSBKEYVAL)
+#define DVSKEYMSK(pipe) _MMIO_PIPE(pipe, _DVSAKEYMSK, _DVSBKEYMSK)
+#define DVSSURFLIVE(pipe) _MMIO_PIPE(pipe, _DVSASURFLIVE, _DVSBSURFLIVE)
 
 #define _SPRA_CTL		0x70280
 #define   SPRITE_ENABLE			(1<<31)
@@ -5160,20 +5216,20 @@
 #define _SPRB_SCALE		0x71304
 #define _SPRB_GAMC		0x71400
 
-#define SPRCTL(pipe) _PIPE(pipe, _SPRA_CTL, _SPRB_CTL)
-#define SPRLINOFF(pipe) _PIPE(pipe, _SPRA_LINOFF, _SPRB_LINOFF)
-#define SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _SPRB_STRIDE)
-#define SPRPOS(pipe) _PIPE(pipe, _SPRA_POS, _SPRB_POS)
-#define SPRSIZE(pipe) _PIPE(pipe, _SPRA_SIZE, _SPRB_SIZE)
-#define SPRKEYVAL(pipe) _PIPE(pipe, _SPRA_KEYVAL, _SPRB_KEYVAL)
-#define SPRKEYMSK(pipe) _PIPE(pipe, _SPRA_KEYMSK, _SPRB_KEYMSK)
-#define SPRSURF(pipe) _PIPE(pipe, _SPRA_SURF, _SPRB_SURF)
-#define SPRKEYMAX(pipe) _PIPE(pipe, _SPRA_KEYMAX, _SPRB_KEYMAX)
-#define SPRTILEOFF(pipe) _PIPE(pipe, _SPRA_TILEOFF, _SPRB_TILEOFF)
-#define SPROFFSET(pipe) _PIPE(pipe, _SPRA_OFFSET, _SPRB_OFFSET)
-#define SPRSCALE(pipe) _PIPE(pipe, _SPRA_SCALE, _SPRB_SCALE)
-#define SPRGAMC(pipe) _PIPE(pipe, _SPRA_GAMC, _SPRB_GAMC)
-#define SPRSURFLIVE(pipe) _PIPE(pipe, _SPRA_SURFLIVE, _SPRB_SURFLIVE)
+#define SPRCTL(pipe) _MMIO_PIPE(pipe, _SPRA_CTL, _SPRB_CTL)
+#define SPRLINOFF(pipe) _MMIO_PIPE(pipe, _SPRA_LINOFF, _SPRB_LINOFF)
+#define SPRSTRIDE(pipe) _MMIO_PIPE(pipe, _SPRA_STRIDE, _SPRB_STRIDE)
+#define SPRPOS(pipe) _MMIO_PIPE(pipe, _SPRA_POS, _SPRB_POS)
+#define SPRSIZE(pipe) _MMIO_PIPE(pipe, _SPRA_SIZE, _SPRB_SIZE)
+#define SPRKEYVAL(pipe) _MMIO_PIPE(pipe, _SPRA_KEYVAL, _SPRB_KEYVAL)
+#define SPRKEYMSK(pipe) _MMIO_PIPE(pipe, _SPRA_KEYMSK, _SPRB_KEYMSK)
+#define SPRSURF(pipe) _MMIO_PIPE(pipe, _SPRA_SURF, _SPRB_SURF)
+#define SPRKEYMAX(pipe) _MMIO_PIPE(pipe, _SPRA_KEYMAX, _SPRB_KEYMAX)
+#define SPRTILEOFF(pipe) _MMIO_PIPE(pipe, _SPRA_TILEOFF, _SPRB_TILEOFF)
+#define SPROFFSET(pipe) _MMIO_PIPE(pipe, _SPRA_OFFSET, _SPRB_OFFSET)
+#define SPRSCALE(pipe) _MMIO_PIPE(pipe, _SPRA_SCALE, _SPRB_SCALE)
+#define SPRGAMC(pipe) _MMIO_PIPE(pipe, _SPRA_GAMC, _SPRB_GAMC)
+#define SPRSURFLIVE(pipe) _MMIO_PIPE(pipe, _SPRA_SURFLIVE, _SPRB_SURFLIVE)
 
 #define _SPACNTR		(VLV_DISPLAY_BASE + 0x72180)
 #define   SP_ENABLE			(1<<31)
@@ -5223,18 +5279,18 @@
 #define _SPBCONSTALPHA		(VLV_DISPLAY_BASE + 0x722a8)
 #define _SPBGAMC		(VLV_DISPLAY_BASE + 0x722f4)
 
-#define SPCNTR(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPACNTR, _SPBCNTR)
-#define SPLINOFF(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPALINOFF, _SPBLINOFF)
-#define SPSTRIDE(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPASTRIDE, _SPBSTRIDE)
-#define SPPOS(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAPOS, _SPBPOS)
-#define SPSIZE(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPASIZE, _SPBSIZE)
-#define SPKEYMINVAL(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAKEYMINVAL, _SPBKEYMINVAL)
-#define SPKEYMSK(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAKEYMSK, _SPBKEYMSK)
-#define SPSURF(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPASURF, _SPBSURF)
-#define SPKEYMAXVAL(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAKEYMAXVAL, _SPBKEYMAXVAL)
-#define SPTILEOFF(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPATILEOFF, _SPBTILEOFF)
-#define SPCONSTALPHA(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPACONSTALPHA, _SPBCONSTALPHA)
-#define SPGAMC(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAGAMC, _SPBGAMC)
+#define SPCNTR(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPACNTR, _SPBCNTR)
+#define SPLINOFF(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPALINOFF, _SPBLINOFF)
+#define SPSTRIDE(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPASTRIDE, _SPBSTRIDE)
+#define SPPOS(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAPOS, _SPBPOS)
+#define SPSIZE(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPASIZE, _SPBSIZE)
+#define SPKEYMINVAL(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAKEYMINVAL, _SPBKEYMINVAL)
+#define SPKEYMSK(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAKEYMSK, _SPBKEYMSK)
+#define SPSURF(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPASURF, _SPBSURF)
+#define SPKEYMAXVAL(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAKEYMAXVAL, _SPBKEYMAXVAL)
+#define SPTILEOFF(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPATILEOFF, _SPBTILEOFF)
+#define SPCONSTALPHA(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPACONSTALPHA, _SPBCONSTALPHA)
+#define SPGAMC(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAGAMC, _SPBGAMC)
 
 /*
  * CHV pipe B sprite CSC
@@ -5243,29 +5299,29 @@
  * |yg| = |c3 c4 c5| x |yg + yg_ioff| + |yg_ooff|
  * |cb|   |c6 c7 c8|   |cb + cr_ioff|   |cb_ooff|
  */
-#define SPCSCYGOFF(sprite)	(VLV_DISPLAY_BASE + 0x6d900 + (sprite) * 0x1000)
-#define SPCSCCBOFF(sprite)	(VLV_DISPLAY_BASE + 0x6d904 + (sprite) * 0x1000)
-#define SPCSCCROFF(sprite)	(VLV_DISPLAY_BASE + 0x6d908 + (sprite) * 0x1000)
+#define SPCSCYGOFF(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d900 + (sprite) * 0x1000)
+#define SPCSCCBOFF(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d904 + (sprite) * 0x1000)
+#define SPCSCCROFF(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d908 + (sprite) * 0x1000)
 #define  SPCSC_OOFF(x)		(((x) & 0x7ff) << 16) /* s11 */
 #define  SPCSC_IOFF(x)		(((x) & 0x7ff) << 0) /* s11 */
 
-#define SPCSCC01(sprite)	(VLV_DISPLAY_BASE + 0x6d90c + (sprite) * 0x1000)
-#define SPCSCC23(sprite)	(VLV_DISPLAY_BASE + 0x6d910 + (sprite) * 0x1000)
-#define SPCSCC45(sprite)	(VLV_DISPLAY_BASE + 0x6d914 + (sprite) * 0x1000)
-#define SPCSCC67(sprite)	(VLV_DISPLAY_BASE + 0x6d918 + (sprite) * 0x1000)
-#define SPCSCC8(sprite)		(VLV_DISPLAY_BASE + 0x6d91c + (sprite) * 0x1000)
+#define SPCSCC01(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d90c + (sprite) * 0x1000)
+#define SPCSCC23(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d910 + (sprite) * 0x1000)
+#define SPCSCC45(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d914 + (sprite) * 0x1000)
+#define SPCSCC67(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d918 + (sprite) * 0x1000)
+#define SPCSCC8(sprite)		_MMIO(VLV_DISPLAY_BASE + 0x6d91c + (sprite) * 0x1000)
 #define  SPCSC_C1(x)		(((x) & 0x7fff) << 16) /* s3.12 */
 #define  SPCSC_C0(x)		(((x) & 0x7fff) << 0) /* s3.12 */
 
-#define SPCSCYGICLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d920 + (sprite) * 0x1000)
-#define SPCSCCBICLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d924 + (sprite) * 0x1000)
-#define SPCSCCRICLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d928 + (sprite) * 0x1000)
+#define SPCSCYGICLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d920 + (sprite) * 0x1000)
+#define SPCSCCBICLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d924 + (sprite) * 0x1000)
+#define SPCSCCRICLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d928 + (sprite) * 0x1000)
 #define  SPCSC_IMAX(x)		(((x) & 0x7ff) << 16) /* s11 */
 #define  SPCSC_IMIN(x)		(((x) & 0x7ff) << 0) /* s11 */
 
-#define SPCSCYGOCLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d92c + (sprite) * 0x1000)
-#define SPCSCCBOCLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d930 + (sprite) * 0x1000)
-#define SPCSCCROCLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d934 + (sprite) * 0x1000)
+#define SPCSCYGOCLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d92c + (sprite) * 0x1000)
+#define SPCSCCBOCLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d930 + (sprite) * 0x1000)
+#define SPCSCCROCLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d934 + (sprite) * 0x1000)
 #define  SPCSC_OMAX(x)		((x) << 16) /* u10 */
 #define  SPCSC_OMIN(x)		((x) << 0) /* u10 */
 
@@ -5346,7 +5402,7 @@
 #define _PLANE_CTL_2(pipe)	_PIPE(pipe, _PLANE_CTL_2_A, _PLANE_CTL_2_B)
 #define _PLANE_CTL_3(pipe)	_PIPE(pipe, _PLANE_CTL_3_A, _PLANE_CTL_3_B)
 #define PLANE_CTL(pipe, plane)	\
-	_PLANE(plane, _PLANE_CTL_1(pipe), _PLANE_CTL_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_CTL_1(pipe), _PLANE_CTL_2(pipe))
 
 #define _PLANE_STRIDE_1_B			0x71188
 #define _PLANE_STRIDE_2_B			0x71288
@@ -5358,7 +5414,7 @@
 #define _PLANE_STRIDE_3(pipe)	\
 	_PIPE(pipe, _PLANE_STRIDE_3_A, _PLANE_STRIDE_3_B)
 #define PLANE_STRIDE(pipe, plane)	\
-	_PLANE(plane, _PLANE_STRIDE_1(pipe), _PLANE_STRIDE_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_STRIDE_1(pipe), _PLANE_STRIDE_2(pipe))
 
 #define _PLANE_POS_1_B				0x7118c
 #define _PLANE_POS_2_B				0x7128c
@@ -5367,7 +5423,7 @@
 #define _PLANE_POS_2(pipe)	_PIPE(pipe, _PLANE_POS_2_A, _PLANE_POS_2_B)
 #define _PLANE_POS_3(pipe)	_PIPE(pipe, _PLANE_POS_3_A, _PLANE_POS_3_B)
 #define PLANE_POS(pipe, plane)	\
-	_PLANE(plane, _PLANE_POS_1(pipe), _PLANE_POS_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_POS_1(pipe), _PLANE_POS_2(pipe))
 
 #define _PLANE_SIZE_1_B				0x71190
 #define _PLANE_SIZE_2_B				0x71290
@@ -5376,7 +5432,7 @@
 #define _PLANE_SIZE_2(pipe)	_PIPE(pipe, _PLANE_SIZE_2_A, _PLANE_SIZE_2_B)
 #define _PLANE_SIZE_3(pipe)	_PIPE(pipe, _PLANE_SIZE_3_A, _PLANE_SIZE_3_B)
 #define PLANE_SIZE(pipe, plane)	\
-	_PLANE(plane, _PLANE_SIZE_1(pipe), _PLANE_SIZE_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_SIZE_1(pipe), _PLANE_SIZE_2(pipe))
 
 #define _PLANE_SURF_1_B				0x7119c
 #define _PLANE_SURF_2_B				0x7129c
@@ -5385,35 +5441,35 @@
 #define _PLANE_SURF_2(pipe)	_PIPE(pipe, _PLANE_SURF_2_A, _PLANE_SURF_2_B)
 #define _PLANE_SURF_3(pipe)	_PIPE(pipe, _PLANE_SURF_3_A, _PLANE_SURF_3_B)
 #define PLANE_SURF(pipe, plane)	\
-	_PLANE(plane, _PLANE_SURF_1(pipe), _PLANE_SURF_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_SURF_1(pipe), _PLANE_SURF_2(pipe))
 
 #define _PLANE_OFFSET_1_B			0x711a4
 #define _PLANE_OFFSET_2_B			0x712a4
 #define _PLANE_OFFSET_1(pipe) _PIPE(pipe, _PLANE_OFFSET_1_A, _PLANE_OFFSET_1_B)
 #define _PLANE_OFFSET_2(pipe) _PIPE(pipe, _PLANE_OFFSET_2_A, _PLANE_OFFSET_2_B)
 #define PLANE_OFFSET(pipe, plane)	\
-	_PLANE(plane, _PLANE_OFFSET_1(pipe), _PLANE_OFFSET_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_OFFSET_1(pipe), _PLANE_OFFSET_2(pipe))
 
 #define _PLANE_KEYVAL_1_B			0x71194
 #define _PLANE_KEYVAL_2_B			0x71294
 #define _PLANE_KEYVAL_1(pipe) _PIPE(pipe, _PLANE_KEYVAL_1_A, _PLANE_KEYVAL_1_B)
 #define _PLANE_KEYVAL_2(pipe) _PIPE(pipe, _PLANE_KEYVAL_2_A, _PLANE_KEYVAL_2_B)
 #define PLANE_KEYVAL(pipe, plane)	\
-	_PLANE(plane, _PLANE_KEYVAL_1(pipe), _PLANE_KEYVAL_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_KEYVAL_1(pipe), _PLANE_KEYVAL_2(pipe))
 
 #define _PLANE_KEYMSK_1_B			0x71198
 #define _PLANE_KEYMSK_2_B			0x71298
 #define _PLANE_KEYMSK_1(pipe) _PIPE(pipe, _PLANE_KEYMSK_1_A, _PLANE_KEYMSK_1_B)
 #define _PLANE_KEYMSK_2(pipe) _PIPE(pipe, _PLANE_KEYMSK_2_A, _PLANE_KEYMSK_2_B)
 #define PLANE_KEYMSK(pipe, plane)	\
-	_PLANE(plane, _PLANE_KEYMSK_1(pipe), _PLANE_KEYMSK_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_KEYMSK_1(pipe), _PLANE_KEYMSK_2(pipe))
 
 #define _PLANE_KEYMAX_1_B			0x711a0
 #define _PLANE_KEYMAX_2_B			0x712a0
 #define _PLANE_KEYMAX_1(pipe) _PIPE(pipe, _PLANE_KEYMAX_1_A, _PLANE_KEYMAX_1_B)
 #define _PLANE_KEYMAX_2(pipe) _PIPE(pipe, _PLANE_KEYMAX_2_A, _PLANE_KEYMAX_2_B)
 #define PLANE_KEYMAX(pipe, plane)	\
-	_PLANE(plane, _PLANE_KEYMAX_1(pipe), _PLANE_KEYMAX_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_KEYMAX_1(pipe), _PLANE_KEYMAX_2(pipe))
 
 #define _PLANE_BUF_CFG_1_B			0x7127c
 #define _PLANE_BUF_CFG_2_B			0x7137c
@@ -5422,7 +5478,7 @@
 #define _PLANE_BUF_CFG_2(pipe)	\
 	_PIPE(pipe, _PLANE_BUF_CFG_2_A, _PLANE_BUF_CFG_2_B)
 #define PLANE_BUF_CFG(pipe, plane)	\
-	_PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe))
 
 #define _PLANE_NV12_BUF_CFG_1_B		0x71278
 #define _PLANE_NV12_BUF_CFG_2_B		0x71378
@@ -5431,26 +5487,26 @@
 #define _PLANE_NV12_BUF_CFG_2(pipe)	\
 	_PIPE(pipe, _PLANE_NV12_BUF_CFG_2_A, _PLANE_NV12_BUF_CFG_2_B)
 #define PLANE_NV12_BUF_CFG(pipe, plane)	\
-	_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe))
 
 /* SKL new cursor registers */
 #define _CUR_BUF_CFG_A				0x7017c
 #define _CUR_BUF_CFG_B				0x7117c
-#define CUR_BUF_CFG(pipe)	_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B)
+#define CUR_BUF_CFG(pipe)	_MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B)
 
 /* VBIOS regs */
-#define VGACNTRL		0x71400
+#define VGACNTRL		_MMIO(0x71400)
 # define VGA_DISP_DISABLE			(1 << 31)
 # define VGA_2X_MODE				(1 << 30)
 # define VGA_PIPE_B_SELECT			(1 << 29)
 
-#define VLV_VGACNTRL		(VLV_DISPLAY_BASE + 0x71400)
+#define VLV_VGACNTRL		_MMIO(VLV_DISPLAY_BASE + 0x71400)
 
 /* Ironlake */
 
-#define CPU_VGACNTRL	0x41000
+#define CPU_VGACNTRL	_MMIO(0x41000)
 
-#define DIGITAL_PORT_HOTPLUG_CNTRL	0x44030
+#define DIGITAL_PORT_HOTPLUG_CNTRL	_MMIO(0x44030)
 #define  DIGITAL_PORTA_HOTPLUG_ENABLE		(1 << 4)
 #define  DIGITAL_PORTA_PULSE_DURATION_2ms	(0 << 2) /* pre-HSW */
 #define  DIGITAL_PORTA_PULSE_DURATION_4_5ms	(1 << 2) /* pre-HSW */
@@ -5463,26 +5519,26 @@
 #define  DIGITAL_PORTA_HOTPLUG_LONG_DETECT	(2 << 0)
 
 /* refresh rate hardware control */
-#define RR_HW_CTL       0x45300
+#define RR_HW_CTL       _MMIO(0x45300)
 #define  RR_HW_LOW_POWER_FRAMES_MASK    0xff
 #define  RR_HW_HIGH_POWER_FRAMES_MASK   0xff00
 
-#define FDI_PLL_BIOS_0  0x46000
+#define FDI_PLL_BIOS_0  _MMIO(0x46000)
 #define  FDI_PLL_FB_CLOCK_MASK  0xff
-#define FDI_PLL_BIOS_1  0x46004
-#define FDI_PLL_BIOS_2  0x46008
-#define DISPLAY_PORT_PLL_BIOS_0         0x4600c
-#define DISPLAY_PORT_PLL_BIOS_1         0x46010
-#define DISPLAY_PORT_PLL_BIOS_2         0x46014
+#define FDI_PLL_BIOS_1  _MMIO(0x46004)
+#define FDI_PLL_BIOS_2  _MMIO(0x46008)
+#define DISPLAY_PORT_PLL_BIOS_0         _MMIO(0x4600c)
+#define DISPLAY_PORT_PLL_BIOS_1         _MMIO(0x46010)
+#define DISPLAY_PORT_PLL_BIOS_2         _MMIO(0x46014)
 
-#define PCH_3DCGDIS0		0x46020
+#define PCH_3DCGDIS0		_MMIO(0x46020)
 # define MARIUNIT_CLOCK_GATE_DISABLE		(1 << 18)
 # define SVSMUNIT_CLOCK_GATE_DISABLE		(1 << 1)
 
-#define PCH_3DCGDIS1		0x46024
+#define PCH_3DCGDIS1		_MMIO(0x46024)
 # define VFMUNIT_CLOCK_GATE_DISABLE		(1 << 11)
 
-#define FDI_PLL_FREQ_CTL        0x46030
+#define FDI_PLL_FREQ_CTL        _MMIO(0x46030)
 #define  FDI_PLL_FREQ_CHANGE_REQUEST    (1<<24)
 #define  FDI_PLL_FREQ_LOCK_LIMIT_MASK   0xfff00
 #define  FDI_PLL_FREQ_DISABLE_COUNT_LIMIT_MASK  0xff
@@ -5519,14 +5575,14 @@
 #define _PIPEB_LINK_M2		0x61048
 #define _PIPEB_LINK_N2		0x6104c
 
-#define PIPE_DATA_M1(tran) _TRANSCODER2(tran, _PIPEA_DATA_M1)
-#define PIPE_DATA_N1(tran) _TRANSCODER2(tran, _PIPEA_DATA_N1)
-#define PIPE_DATA_M2(tran) _TRANSCODER2(tran, _PIPEA_DATA_M2)
-#define PIPE_DATA_N2(tran) _TRANSCODER2(tran, _PIPEA_DATA_N2)
-#define PIPE_LINK_M1(tran) _TRANSCODER2(tran, _PIPEA_LINK_M1)
-#define PIPE_LINK_N1(tran) _TRANSCODER2(tran, _PIPEA_LINK_N1)
-#define PIPE_LINK_M2(tran) _TRANSCODER2(tran, _PIPEA_LINK_M2)
-#define PIPE_LINK_N2(tran) _TRANSCODER2(tran, _PIPEA_LINK_N2)
+#define PIPE_DATA_M1(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_M1)
+#define PIPE_DATA_N1(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_N1)
+#define PIPE_DATA_M2(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_M2)
+#define PIPE_DATA_N2(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_N2)
+#define PIPE_LINK_M1(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_M1)
+#define PIPE_LINK_N1(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_N1)
+#define PIPE_LINK_M2(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_M2)
+#define PIPE_LINK_N2(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_N2)
 
 /* CPU panel fitter */
 /* IVB+ has 3 fitters, 0 is 7x5 capable, the other two only 3x3 */
@@ -5549,11 +5605,11 @@
 #define _PFA_HSCALE		0x68090
 #define _PFB_HSCALE		0x68890
 
-#define PF_CTL(pipe)		_PIPE(pipe, _PFA_CTL_1, _PFB_CTL_1)
-#define PF_WIN_SZ(pipe)		_PIPE(pipe, _PFA_WIN_SZ, _PFB_WIN_SZ)
-#define PF_WIN_POS(pipe)	_PIPE(pipe, _PFA_WIN_POS, _PFB_WIN_POS)
-#define PF_VSCALE(pipe)		_PIPE(pipe, _PFA_VSCALE, _PFB_VSCALE)
-#define PF_HSCALE(pipe)		_PIPE(pipe, _PFA_HSCALE, _PFB_HSCALE)
+#define PF_CTL(pipe)		_MMIO_PIPE(pipe, _PFA_CTL_1, _PFB_CTL_1)
+#define PF_WIN_SZ(pipe)		_MMIO_PIPE(pipe, _PFA_WIN_SZ, _PFB_WIN_SZ)
+#define PF_WIN_POS(pipe)	_MMIO_PIPE(pipe, _PFA_WIN_POS, _PFB_WIN_POS)
+#define PF_VSCALE(pipe)		_MMIO_PIPE(pipe, _PFA_VSCALE, _PFB_VSCALE)
+#define PF_HSCALE(pipe)		_MMIO_PIPE(pipe, _PFA_HSCALE, _PFB_HSCALE)
 
 #define _PSA_CTL		0x68180
 #define _PSB_CTL		0x68980
@@ -5563,9 +5619,9 @@
 #define _PSA_WIN_POS		0x68170
 #define _PSB_WIN_POS		0x68970
 
-#define PS_CTL(pipe)		_PIPE(pipe, _PSA_CTL, _PSB_CTL)
-#define PS_WIN_SZ(pipe)		_PIPE(pipe, _PSA_WIN_SZ, _PSB_WIN_SZ)
-#define PS_WIN_POS(pipe)	_PIPE(pipe, _PSA_WIN_POS, _PSB_WIN_POS)
+#define PS_CTL(pipe)		_MMIO_PIPE(pipe, _PSA_CTL, _PSB_CTL)
+#define PS_WIN_SZ(pipe)		_MMIO_PIPE(pipe, _PSA_WIN_SZ, _PSB_WIN_SZ)
+#define PS_WIN_POS(pipe)	_MMIO_PIPE(pipe, _PSA_WIN_POS, _PSB_WIN_POS)
 
 /*
  * Skylake scalers
@@ -5654,48 +5710,63 @@
 #define _PS_ECC_STAT_1C     0x691D0
 
 #define _ID(id, a, b) ((a) + (id)*((b)-(a)))
-#define SKL_PS_CTRL(pipe, id) _PIPE(pipe,        \
+#define SKL_PS_CTRL(pipe, id) _MMIO_PIPE(pipe,        \
 			_ID(id, _PS_1A_CTRL, _PS_2A_CTRL),       \
 			_ID(id, _PS_1B_CTRL, _PS_2B_CTRL))
-#define SKL_PS_PWR_GATE(pipe, id) _PIPE(pipe,    \
+#define SKL_PS_PWR_GATE(pipe, id) _MMIO_PIPE(pipe,    \
 			_ID(id, _PS_PWR_GATE_1A, _PS_PWR_GATE_2A), \
 			_ID(id, _PS_PWR_GATE_1B, _PS_PWR_GATE_2B))
-#define SKL_PS_WIN_POS(pipe, id) _PIPE(pipe,     \
+#define SKL_PS_WIN_POS(pipe, id) _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_WIN_POS_1A, _PS_WIN_POS_2A), \
 			_ID(id, _PS_WIN_POS_1B, _PS_WIN_POS_2B))
-#define SKL_PS_WIN_SZ(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_WIN_SZ(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_WIN_SZ_1A, _PS_WIN_SZ_2A),   \
 			_ID(id, _PS_WIN_SZ_1B, _PS_WIN_SZ_2B))
-#define SKL_PS_VSCALE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_VSCALE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_VSCALE_1A, _PS_VSCALE_2A),   \
 			_ID(id, _PS_VSCALE_1B, _PS_VSCALE_2B))
-#define SKL_PS_HSCALE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_HSCALE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_HSCALE_1A, _PS_HSCALE_2A),   \
 			_ID(id, _PS_HSCALE_1B, _PS_HSCALE_2B))
-#define SKL_PS_VPHASE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_VPHASE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_VPHASE_1A, _PS_VPHASE_2A),   \
 			_ID(id, _PS_VPHASE_1B, _PS_VPHASE_2B))
-#define SKL_PS_HPHASE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_HPHASE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_HPHASE_1A, _PS_HPHASE_2A),   \
 			_ID(id, _PS_HPHASE_1B, _PS_HPHASE_2B))
-#define SKL_PS_ECC_STAT(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_ECC_STAT(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A),   \
-			_ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B)
+			_ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B))
 
 /* legacy palette */
 #define _LGC_PALETTE_A           0x4a000
 #define _LGC_PALETTE_B           0x4a800
-#define LGC_PALETTE(pipe, i) (_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4)
+#define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4)
 
 #define _GAMMA_MODE_A		0x4a480
 #define _GAMMA_MODE_B		0x4ac80
-#define GAMMA_MODE(pipe) _PIPE(pipe, _GAMMA_MODE_A, _GAMMA_MODE_B)
+#define GAMMA_MODE(pipe) _MMIO_PIPE(pipe, _GAMMA_MODE_A, _GAMMA_MODE_B)
 #define GAMMA_MODE_MODE_MASK	(3 << 0)
 #define GAMMA_MODE_MODE_8BIT	(0 << 0)
 #define GAMMA_MODE_MODE_10BIT	(1 << 0)
 #define GAMMA_MODE_MODE_12BIT	(2 << 0)
 #define GAMMA_MODE_MODE_SPLIT	(3 << 0)
 
+/* DMC/CSR */
+#define CSR_PROGRAM(i)		_MMIO(0x80000 + (i) * 4)
+#define CSR_SSP_BASE_ADDR_GEN9	0x00002FC0
+#define CSR_HTP_ADDR_SKL	0x00500034
+#define CSR_SSP_BASE		_MMIO(0x8F074)
+#define CSR_HTP_SKL		_MMIO(0x8F004)
+#define CSR_LAST_WRITE		_MMIO(0x8F034)
+#define CSR_LAST_WRITE_VALUE	0xc003b400
+/* MMIO address range for CSR program (0x80000 - 0x82FFF) */
+#define CSR_MMIO_START_RANGE	0x80000
+#define CSR_MMIO_END_RANGE	0x8FFFF
+#define SKL_CSR_DC3_DC5_COUNT	_MMIO(0x80030)
+#define SKL_CSR_DC5_DC6_COUNT	_MMIO(0x8002C)
+#define BXT_CSR_DC3_DC5_COUNT	_MMIO(0x80038)
+
 /* interrupts */
 #define DE_MASTER_IRQ_CONTROL   (1 << 31)
 #define DE_SPRITEB_FLIP_DONE    (1 << 29)
@@ -5747,20 +5818,20 @@
 #define DE_PIPEA_VBLANK_IVB		(1<<0)
 #define DE_PIPE_VBLANK_IVB(pipe)	(1 << ((pipe) * 5))
 
-#define VLV_MASTER_IER			0x4400c /* Gunit master IER */
+#define VLV_MASTER_IER			_MMIO(0x4400c) /* Gunit master IER */
 #define   MASTER_INTERRUPT_ENABLE	(1<<31)
 
-#define DEISR   0x44000
-#define DEIMR   0x44004
-#define DEIIR   0x44008
-#define DEIER   0x4400c
+#define DEISR   _MMIO(0x44000)
+#define DEIMR   _MMIO(0x44004)
+#define DEIIR   _MMIO(0x44008)
+#define DEIER   _MMIO(0x4400c)
 
-#define GTISR   0x44010
-#define GTIMR   0x44014
-#define GTIIR   0x44018
-#define GTIER   0x4401c
+#define GTISR   _MMIO(0x44010)
+#define GTIMR   _MMIO(0x44014)
+#define GTIIR   _MMIO(0x44018)
+#define GTIER   _MMIO(0x4401c)
 
-#define GEN8_MASTER_IRQ			0x44200
+#define GEN8_MASTER_IRQ			_MMIO(0x44200)
 #define  GEN8_MASTER_IRQ_CONTROL	(1<<31)
 #define  GEN8_PCU_IRQ			(1<<30)
 #define  GEN8_DE_PCH_IRQ		(1<<23)
@@ -5777,10 +5848,10 @@
 #define  GEN8_GT_BCS_IRQ		(1<<1)
 #define  GEN8_GT_RCS_IRQ		(1<<0)
 
-#define GEN8_GT_ISR(which) (0x44300 + (0x10 * (which)))
-#define GEN8_GT_IMR(which) (0x44304 + (0x10 * (which)))
-#define GEN8_GT_IIR(which) (0x44308 + (0x10 * (which)))
-#define GEN8_GT_IER(which) (0x4430c + (0x10 * (which)))
+#define GEN8_GT_ISR(which) _MMIO(0x44300 + (0x10 * (which)))
+#define GEN8_GT_IMR(which) _MMIO(0x44304 + (0x10 * (which)))
+#define GEN8_GT_IIR(which) _MMIO(0x44308 + (0x10 * (which)))
+#define GEN8_GT_IER(which) _MMIO(0x4430c + (0x10 * (which)))
 
 #define GEN8_RCS_IRQ_SHIFT 0
 #define GEN8_BCS_IRQ_SHIFT 16
@@ -5789,10 +5860,10 @@
 #define GEN8_VECS_IRQ_SHIFT 0
 #define GEN8_WD_IRQ_SHIFT 16
 
-#define GEN8_DE_PIPE_ISR(pipe) (0x44400 + (0x10 * (pipe)))
-#define GEN8_DE_PIPE_IMR(pipe) (0x44404 + (0x10 * (pipe)))
-#define GEN8_DE_PIPE_IIR(pipe) (0x44408 + (0x10 * (pipe)))
-#define GEN8_DE_PIPE_IER(pipe) (0x4440c + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_ISR(pipe) _MMIO(0x44400 + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_IMR(pipe) _MMIO(0x44404 + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_IIR(pipe) _MMIO(0x44408 + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_IER(pipe) _MMIO(0x4440c + (0x10 * (pipe)))
 #define  GEN8_PIPE_FIFO_UNDERRUN	(1 << 31)
 #define  GEN8_PIPE_CDCLK_CRC_ERROR	(1 << 29)
 #define  GEN8_PIPE_CDCLK_CRC_DONE	(1 << 28)
@@ -5825,10 +5896,10 @@
 	 GEN9_PIPE_PLANE2_FAULT | \
 	 GEN9_PIPE_PLANE1_FAULT)
 
-#define GEN8_DE_PORT_ISR 0x44440
-#define GEN8_DE_PORT_IMR 0x44444
-#define GEN8_DE_PORT_IIR 0x44448
-#define GEN8_DE_PORT_IER 0x4444c
+#define GEN8_DE_PORT_ISR _MMIO(0x44440)
+#define GEN8_DE_PORT_IMR _MMIO(0x44444)
+#define GEN8_DE_PORT_IIR _MMIO(0x44448)
+#define GEN8_DE_PORT_IER _MMIO(0x4444c)
 #define  GEN9_AUX_CHANNEL_D		(1 << 27)
 #define  GEN9_AUX_CHANNEL_C		(1 << 26)
 #define  GEN9_AUX_CHANNEL_B		(1 << 25)
@@ -5842,23 +5913,23 @@
 #define  BXT_DE_PORT_GMBUS		(1 << 1)
 #define  GEN8_AUX_CHANNEL_A		(1 << 0)
 
-#define GEN8_DE_MISC_ISR 0x44460
-#define GEN8_DE_MISC_IMR 0x44464
-#define GEN8_DE_MISC_IIR 0x44468
-#define GEN8_DE_MISC_IER 0x4446c
+#define GEN8_DE_MISC_ISR _MMIO(0x44460)
+#define GEN8_DE_MISC_IMR _MMIO(0x44464)
+#define GEN8_DE_MISC_IIR _MMIO(0x44468)
+#define GEN8_DE_MISC_IER _MMIO(0x4446c)
 #define  GEN8_DE_MISC_GSE		(1 << 27)
 
-#define GEN8_PCU_ISR 0x444e0
-#define GEN8_PCU_IMR 0x444e4
-#define GEN8_PCU_IIR 0x444e8
-#define GEN8_PCU_IER 0x444ec
+#define GEN8_PCU_ISR _MMIO(0x444e0)
+#define GEN8_PCU_IMR _MMIO(0x444e4)
+#define GEN8_PCU_IIR _MMIO(0x444e8)
+#define GEN8_PCU_IER _MMIO(0x444ec)
 
-#define ILK_DISPLAY_CHICKEN2	0x42004
+#define ILK_DISPLAY_CHICKEN2	_MMIO(0x42004)
 /* Required on all Ironlake and Sandybridge according to the B-Spec. */
 #define  ILK_ELPIN_409_SELECT	(1 << 25)
 #define  ILK_DPARB_GATE	(1<<22)
 #define  ILK_VSDPFD_FULL	(1<<21)
-#define FUSE_STRAP			0x42014
+#define FUSE_STRAP			_MMIO(0x42014)
 #define  ILK_INTERNAL_GRAPHICS_DISABLE	(1 << 31)
 #define  ILK_INTERNAL_DISPLAY_DISABLE	(1 << 30)
 #define  ILK_DISPLAY_DEBUG_DISABLE	(1 << 29)
@@ -5867,18 +5938,18 @@
 #define  HSW_CDCLK_LIMIT		(1 << 24)
 #define  ILK_DESKTOP			(1 << 23)
 
-#define ILK_DSPCLK_GATE_D			0x42020
+#define ILK_DSPCLK_GATE_D			_MMIO(0x42020)
 #define   ILK_VRHUNIT_CLOCK_GATE_DISABLE	(1 << 28)
 #define   ILK_DPFCUNIT_CLOCK_GATE_DISABLE	(1 << 9)
 #define   ILK_DPFCRUNIT_CLOCK_GATE_DISABLE	(1 << 8)
 #define   ILK_DPFDUNIT_CLOCK_GATE_ENABLE	(1 << 7)
 #define   ILK_DPARBUNIT_CLOCK_GATE_ENABLE	(1 << 5)
 
-#define IVB_CHICKEN3	0x4200c
+#define IVB_CHICKEN3	_MMIO(0x4200c)
 # define CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE	(1 << 5)
 # define CHICKEN3_DGMG_DONE_FIX_DISABLE		(1 << 2)
 
-#define CHICKEN_PAR1_1		0x42080
+#define CHICKEN_PAR1_1		_MMIO(0x42080)
 #define  DPA_MASK_VBLANK_SRD	(1 << 15)
 #define  FORCE_ARB_IDLE_PLANES	(1 << 14)
 
@@ -5886,70 +5957,70 @@
 #define _CHICKEN_PIPESL_1_B	0x420b4
 #define  HSW_FBCQ_DIS			(1 << 22)
 #define  BDW_DPRS_MASK_VBLANK_SRD	(1 << 0)
-#define CHICKEN_PIPESL_1(pipe) _PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
+#define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
 
-#define DISP_ARB_CTL	0x45000
+#define DISP_ARB_CTL	_MMIO(0x45000)
 #define  DISP_TILE_SURFACE_SWIZZLING	(1<<13)
 #define  DISP_FBC_WM_DIS		(1<<15)
-#define DISP_ARB_CTL2	0x45004
+#define DISP_ARB_CTL2	_MMIO(0x45004)
 #define  DISP_DATA_PARTITION_5_6	(1<<6)
-#define DBUF_CTL	0x45008
+#define DBUF_CTL	_MMIO(0x45008)
 #define  DBUF_POWER_REQUEST		(1<<31)
 #define  DBUF_POWER_STATE		(1<<30)
-#define GEN7_MSG_CTL	0x45010
+#define GEN7_MSG_CTL	_MMIO(0x45010)
 #define  WAIT_FOR_PCH_RESET_ACK		(1<<1)
 #define  WAIT_FOR_PCH_FLR_ACK		(1<<0)
-#define HSW_NDE_RSTWRN_OPT	0x46408
+#define HSW_NDE_RSTWRN_OPT	_MMIO(0x46408)
 #define  RESET_PCH_HANDSHAKE_ENABLE	(1<<4)
 
-#define SKL_DFSM			0x51000
+#define SKL_DFSM			_MMIO(0x51000)
 #define SKL_DFSM_CDCLK_LIMIT_MASK	(3 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_675	(0 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_540	(1 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_450	(2 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_337_5	(3 << 23)
 
-#define FF_SLICE_CS_CHICKEN2			0x20e4
+#define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
 #define  GEN9_TSG_BARRIER_ACK_DISABLE		(1<<8)
 
 /* GEN7 chicken */
-#define GEN7_COMMON_SLICE_CHICKEN1		0x7010
+#define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	((1<<10) | (1<<26))
 # define GEN9_RHWO_OPTIMIZATION_DISABLE		(1<<14)
-#define COMMON_SLICE_CHICKEN2			0x7014
+#define COMMON_SLICE_CHICKEN2			_MMIO(0x7014)
 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE	(1<<0)
 
-#define HIZ_CHICKEN					0x7018
+#define HIZ_CHICKEN					_MMIO(0x7018)
 # define CHV_HZ_8X8_MODE_IN_1X				(1<<15)
 # define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE	(1<<3)
 
-#define GEN9_SLICE_COMMON_ECO_CHICKEN0		0x7308
+#define GEN9_SLICE_COMMON_ECO_CHICKEN0		_MMIO(0x7308)
 #define  DISABLE_PIXEL_MASK_CAMMING		(1<<14)
 
-#define GEN7_L3SQCREG1				0xB010
+#define GEN7_L3SQCREG1				_MMIO(0xB010)
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
-#define GEN8_L3SQCREG1				0xB100
+#define GEN8_L3SQCREG1				_MMIO(0xB100)
 #define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
 
-#define GEN7_L3CNTLREG1				0xB01C
+#define GEN7_L3CNTLREG1				_MMIO(0xB01C)
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
 #define  GEN7_L3AGDIS				(1<<19)
-#define GEN7_L3CNTLREG2				0xB020
-#define GEN7_L3CNTLREG3				0xB024
+#define GEN7_L3CNTLREG2				_MMIO(0xB020)
+#define GEN7_L3CNTLREG3				_MMIO(0xB024)
 
-#define GEN7_L3_CHICKEN_MODE_REGISTER		0xB030
+#define GEN7_L3_CHICKEN_MODE_REGISTER		_MMIO(0xB030)
 #define  GEN7_WA_L3_CHICKEN_MODE				0x20000000
 
-#define GEN7_L3SQCREG4				0xb034
+#define GEN7_L3SQCREG4				_MMIO(0xb034)
 #define  L3SQ_URB_READ_CAM_MATCH_DISABLE	(1<<27)
 
-#define GEN8_L3SQCREG4				0xb118
+#define GEN8_L3SQCREG4				_MMIO(0xb118)
 #define  GEN8_LQSC_RO_PERF_DIS			(1<<27)
 #define  GEN8_LQSC_FLUSH_COHERENT_LINES		(1<<21)
 
 /* GEN8 chicken */
-#define HDC_CHICKEN0				0x7300
+#define HDC_CHICKEN0				_MMIO(0x7300)
 #define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1<<15)
 #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
 #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
@@ -5958,17 +6029,17 @@
 #define  HDC_BARRIER_PERFORMANCE_DISABLE	(1<<10)
 
 /* GEN9 chicken */
-#define SLICE_ECO_CHICKEN0			0x7308
+#define SLICE_ECO_CHICKEN0			_MMIO(0x7308)
 #define   PIXEL_MASK_CAMMING_DISABLE		(1 << 14)
 
 /* WaCatErrorRejectionIssue */
-#define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
+#define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		_MMIO(0x9030)
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
 
-#define HSW_SCRATCH1				0xb038
+#define HSW_SCRATCH1				_MMIO(0xb038)
 #define  HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE	(1<<27)
 
-#define BDW_SCRATCH1					0xb11c
+#define BDW_SCRATCH1					_MMIO(0xb11c)
 #define  GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE	(1<<2)
 
 /* PCH */
@@ -6062,12 +6133,12 @@
 				 SDE_FDI_RXB_CPT | \
 				 SDE_FDI_RXA_CPT)
 
-#define SDEISR  0xc4000
-#define SDEIMR  0xc4004
-#define SDEIIR  0xc4008
-#define SDEIER  0xc400c
+#define SDEISR  _MMIO(0xc4000)
+#define SDEIMR  _MMIO(0xc4004)
+#define SDEIIR  _MMIO(0xc4008)
+#define SDEIER  _MMIO(0xc400c)
 
-#define SERR_INT			0xc4040
+#define SERR_INT			_MMIO(0xc4040)
 #define  SERR_INT_POISON		(1<<31)
 #define  SERR_INT_TRANS_C_FIFO_UNDERRUN	(1<<6)
 #define  SERR_INT_TRANS_B_FIFO_UNDERRUN	(1<<3)
@@ -6075,7 +6146,7 @@
 #define  SERR_INT_TRANS_FIFO_UNDERRUN(pipe)	(1<<((pipe)*3))
 
 /* digital port hotplug */
-#define PCH_PORT_HOTPLUG		0xc4030	/* SHOTPLUG_CTL */
+#define PCH_PORT_HOTPLUG		_MMIO(0xc4030)	/* SHOTPLUG_CTL */
 #define  PORTA_HOTPLUG_ENABLE		(1 << 28) /* LPT:LP+ & BXT */
 #define  PORTA_HOTPLUG_STATUS_MASK	(3 << 24) /* SPT+ & BXT */
 #define  PORTA_HOTPLUG_NO_DETECT	(0 << 24) /* SPT+ & BXT */
@@ -6112,42 +6183,42 @@
 #define  PORTB_HOTPLUG_SHORT_DETECT	(1 << 0)
 #define  PORTB_HOTPLUG_LONG_DETECT	(2 << 0)
 
-#define PCH_PORT_HOTPLUG2		0xc403C	/* SHOTPLUG_CTL2 SPT+ */
+#define PCH_PORT_HOTPLUG2		_MMIO(0xc403C)	/* SHOTPLUG_CTL2 SPT+ */
 #define  PORTE_HOTPLUG_ENABLE		(1 << 4)
 #define  PORTE_HOTPLUG_STATUS_MASK	(3 << 0)
 #define  PORTE_HOTPLUG_NO_DETECT	(0 << 0)
 #define  PORTE_HOTPLUG_SHORT_DETECT	(1 << 0)
 #define  PORTE_HOTPLUG_LONG_DETECT	(2 << 0)
 
-#define PCH_GPIOA               0xc5010
-#define PCH_GPIOB               0xc5014
-#define PCH_GPIOC               0xc5018
-#define PCH_GPIOD               0xc501c
-#define PCH_GPIOE               0xc5020
-#define PCH_GPIOF               0xc5024
+#define PCH_GPIOA               _MMIO(0xc5010)
+#define PCH_GPIOB               _MMIO(0xc5014)
+#define PCH_GPIOC               _MMIO(0xc5018)
+#define PCH_GPIOD               _MMIO(0xc501c)
+#define PCH_GPIOE               _MMIO(0xc5020)
+#define PCH_GPIOF               _MMIO(0xc5024)
 
-#define PCH_GMBUS0		0xc5100
-#define PCH_GMBUS1		0xc5104
-#define PCH_GMBUS2		0xc5108
-#define PCH_GMBUS3		0xc510c
-#define PCH_GMBUS4		0xc5110
-#define PCH_GMBUS5		0xc5120
+#define PCH_GMBUS0		_MMIO(0xc5100)
+#define PCH_GMBUS1		_MMIO(0xc5104)
+#define PCH_GMBUS2		_MMIO(0xc5108)
+#define PCH_GMBUS3		_MMIO(0xc510c)
+#define PCH_GMBUS4		_MMIO(0xc5110)
+#define PCH_GMBUS5		_MMIO(0xc5120)
 
 #define _PCH_DPLL_A              0xc6014
 #define _PCH_DPLL_B              0xc6018
-#define PCH_DPLL(pll) (pll == 0 ? _PCH_DPLL_A : _PCH_DPLL_B)
+#define PCH_DPLL(pll) _MMIO(pll == 0 ? _PCH_DPLL_A : _PCH_DPLL_B)
 
 #define _PCH_FPA0                0xc6040
 #define  FP_CB_TUNE		(0x3<<22)
 #define _PCH_FPA1                0xc6044
 #define _PCH_FPB0                0xc6048
 #define _PCH_FPB1                0xc604c
-#define PCH_FP0(pll) (pll == 0 ? _PCH_FPA0 : _PCH_FPB0)
-#define PCH_FP1(pll) (pll == 0 ? _PCH_FPA1 : _PCH_FPB1)
+#define PCH_FP0(pll) _MMIO(pll == 0 ? _PCH_FPA0 : _PCH_FPB0)
+#define PCH_FP1(pll) _MMIO(pll == 0 ? _PCH_FPA1 : _PCH_FPB1)
 
-#define PCH_DPLL_TEST           0xc606c
+#define PCH_DPLL_TEST           _MMIO(0xc606c)
 
-#define PCH_DREF_CONTROL        0xC6200
+#define PCH_DREF_CONTROL        _MMIO(0xC6200)
 #define  DREF_CONTROL_MASK      0x7fc3
 #define  DREF_CPU_SOURCE_OUTPUT_DISABLE         (0<<13)
 #define  DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD      (2<<13)
@@ -6170,19 +6241,19 @@
 #define  DREF_SSC4_DISABLE                      (0)
 #define  DREF_SSC4_ENABLE                       (1)
 
-#define PCH_RAWCLK_FREQ         0xc6204
+#define PCH_RAWCLK_FREQ         _MMIO(0xc6204)
 #define  FDL_TP1_TIMER_SHIFT    12
 #define  FDL_TP1_TIMER_MASK     (3<<12)
 #define  FDL_TP2_TIMER_SHIFT    10
 #define  FDL_TP2_TIMER_MASK     (3<<10)
 #define  RAWCLK_FREQ_MASK       0x3ff
 
-#define PCH_DPLL_TMR_CFG        0xc6208
+#define PCH_DPLL_TMR_CFG        _MMIO(0xc6208)
 
-#define PCH_SSC4_PARMS          0xc6210
-#define PCH_SSC4_AUX_PARMS      0xc6214
+#define PCH_SSC4_PARMS          _MMIO(0xc6210)
+#define PCH_SSC4_AUX_PARMS      _MMIO(0xc6214)
 
-#define PCH_DPLL_SEL		0xc7000
+#define PCH_DPLL_SEL		_MMIO(0xc7000)
 #define	 TRANS_DPLLB_SEL(pipe)		(1 << ((pipe) * 4))
 #define	 TRANS_DPLLA_SEL(pipe)		0
 #define  TRANS_DPLL_ENABLE(pipe)	(1 << ((pipe) * 4 + 3))
@@ -6230,79 +6301,73 @@
 #define _VIDEO_DIP_DATA_B        0xe1208
 #define _VIDEO_DIP_GCP_B         0xe1210
 
-#define TVIDEO_DIP_CTL(pipe) _PIPE(pipe, _VIDEO_DIP_CTL_A, _VIDEO_DIP_CTL_B)
-#define TVIDEO_DIP_DATA(pipe) _PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B)
-#define TVIDEO_DIP_GCP(pipe) _PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B)
+#define TVIDEO_DIP_CTL(pipe) _MMIO_PIPE(pipe, _VIDEO_DIP_CTL_A, _VIDEO_DIP_CTL_B)
+#define TVIDEO_DIP_DATA(pipe) _MMIO_PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B)
+#define TVIDEO_DIP_GCP(pipe) _MMIO_PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B)
 
 /* Per-transcoder DIP controls (VLV) */
-#define VLV_VIDEO_DIP_CTL_A		(VLV_DISPLAY_BASE + 0x60200)
-#define VLV_VIDEO_DIP_DATA_A		(VLV_DISPLAY_BASE + 0x60208)
-#define VLV_VIDEO_DIP_GDCP_PAYLOAD_A	(VLV_DISPLAY_BASE + 0x60210)
+#define _VLV_VIDEO_DIP_CTL_A		(VLV_DISPLAY_BASE + 0x60200)
+#define _VLV_VIDEO_DIP_DATA_A		(VLV_DISPLAY_BASE + 0x60208)
+#define _VLV_VIDEO_DIP_GDCP_PAYLOAD_A	(VLV_DISPLAY_BASE + 0x60210)
 
-#define VLV_VIDEO_DIP_CTL_B		(VLV_DISPLAY_BASE + 0x61170)
-#define VLV_VIDEO_DIP_DATA_B		(VLV_DISPLAY_BASE + 0x61174)
-#define VLV_VIDEO_DIP_GDCP_PAYLOAD_B	(VLV_DISPLAY_BASE + 0x61178)
+#define _VLV_VIDEO_DIP_CTL_B		(VLV_DISPLAY_BASE + 0x61170)
+#define _VLV_VIDEO_DIP_DATA_B		(VLV_DISPLAY_BASE + 0x61174)
+#define _VLV_VIDEO_DIP_GDCP_PAYLOAD_B	(VLV_DISPLAY_BASE + 0x61178)
 
-#define CHV_VIDEO_DIP_CTL_C		(VLV_DISPLAY_BASE + 0x611f0)
-#define CHV_VIDEO_DIP_DATA_C		(VLV_DISPLAY_BASE + 0x611f4)
-#define CHV_VIDEO_DIP_GDCP_PAYLOAD_C	(VLV_DISPLAY_BASE + 0x611f8)
+#define _CHV_VIDEO_DIP_CTL_C		(VLV_DISPLAY_BASE + 0x611f0)
+#define _CHV_VIDEO_DIP_DATA_C		(VLV_DISPLAY_BASE + 0x611f4)
+#define _CHV_VIDEO_DIP_GDCP_PAYLOAD_C	(VLV_DISPLAY_BASE + 0x611f8)
 
 #define VLV_TVIDEO_DIP_CTL(pipe) \
-	_PIPE3((pipe), VLV_VIDEO_DIP_CTL_A, \
-	       VLV_VIDEO_DIP_CTL_B, CHV_VIDEO_DIP_CTL_C)
+	_MMIO_PIPE3((pipe), _VLV_VIDEO_DIP_CTL_A, \
+	       _VLV_VIDEO_DIP_CTL_B, _CHV_VIDEO_DIP_CTL_C)
 #define VLV_TVIDEO_DIP_DATA(pipe) \
-	_PIPE3((pipe), VLV_VIDEO_DIP_DATA_A, \
-	       VLV_VIDEO_DIP_DATA_B, CHV_VIDEO_DIP_DATA_C)
+	_MMIO_PIPE3((pipe), _VLV_VIDEO_DIP_DATA_A, \
+	       _VLV_VIDEO_DIP_DATA_B, _CHV_VIDEO_DIP_DATA_C)
 #define VLV_TVIDEO_DIP_GCP(pipe) \
-	_PIPE3((pipe), VLV_VIDEO_DIP_GDCP_PAYLOAD_A, \
-		VLV_VIDEO_DIP_GDCP_PAYLOAD_B, CHV_VIDEO_DIP_GDCP_PAYLOAD_C)
+	_MMIO_PIPE3((pipe), _VLV_VIDEO_DIP_GDCP_PAYLOAD_A, \
+		_VLV_VIDEO_DIP_GDCP_PAYLOAD_B, _CHV_VIDEO_DIP_GDCP_PAYLOAD_C)
 
 /* Haswell DIP controls */
-#define HSW_VIDEO_DIP_CTL_A		0x60200
-#define HSW_VIDEO_DIP_AVI_DATA_A	0x60220
-#define HSW_VIDEO_DIP_VS_DATA_A		0x60260
-#define HSW_VIDEO_DIP_SPD_DATA_A	0x602A0
-#define HSW_VIDEO_DIP_GMP_DATA_A	0x602E0
-#define HSW_VIDEO_DIP_VSC_DATA_A	0x60320
-#define HSW_VIDEO_DIP_AVI_ECC_A		0x60240
-#define HSW_VIDEO_DIP_VS_ECC_A		0x60280
-#define HSW_VIDEO_DIP_SPD_ECC_A		0x602C0
-#define HSW_VIDEO_DIP_GMP_ECC_A		0x60300
-#define HSW_VIDEO_DIP_VSC_ECC_A		0x60344
-#define HSW_VIDEO_DIP_GCP_A		0x60210
 
-#define HSW_VIDEO_DIP_CTL_B		0x61200
-#define HSW_VIDEO_DIP_AVI_DATA_B	0x61220
-#define HSW_VIDEO_DIP_VS_DATA_B		0x61260
-#define HSW_VIDEO_DIP_SPD_DATA_B	0x612A0
-#define HSW_VIDEO_DIP_GMP_DATA_B	0x612E0
-#define HSW_VIDEO_DIP_VSC_DATA_B	0x61320
-#define HSW_VIDEO_DIP_BVI_ECC_B		0x61240
-#define HSW_VIDEO_DIP_VS_ECC_B		0x61280
-#define HSW_VIDEO_DIP_SPD_ECC_B		0x612C0
-#define HSW_VIDEO_DIP_GMP_ECC_B		0x61300
-#define HSW_VIDEO_DIP_VSC_ECC_B		0x61344
-#define HSW_VIDEO_DIP_GCP_B		0x61210
+#define _HSW_VIDEO_DIP_CTL_A		0x60200
+#define _HSW_VIDEO_DIP_AVI_DATA_A	0x60220
+#define _HSW_VIDEO_DIP_VS_DATA_A	0x60260
+#define _HSW_VIDEO_DIP_SPD_DATA_A	0x602A0
+#define _HSW_VIDEO_DIP_GMP_DATA_A	0x602E0
+#define _HSW_VIDEO_DIP_VSC_DATA_A	0x60320
+#define _HSW_VIDEO_DIP_AVI_ECC_A	0x60240
+#define _HSW_VIDEO_DIP_VS_ECC_A		0x60280
+#define _HSW_VIDEO_DIP_SPD_ECC_A	0x602C0
+#define _HSW_VIDEO_DIP_GMP_ECC_A	0x60300
+#define _HSW_VIDEO_DIP_VSC_ECC_A	0x60344
+#define _HSW_VIDEO_DIP_GCP_A		0x60210
 
-#define HSW_TVIDEO_DIP_CTL(trans) \
-	 _TRANSCODER2(trans, HSW_VIDEO_DIP_CTL_A)
-#define HSW_TVIDEO_DIP_AVI_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_AVI_DATA_A) + (i) * 4)
-#define HSW_TVIDEO_DIP_VS_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_VS_DATA_A) + (i) * 4)
-#define HSW_TVIDEO_DIP_SPD_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_SPD_DATA_A) + (i) * 4)
-#define HSW_TVIDEO_DIP_GCP(trans) \
-	_TRANSCODER2(trans, HSW_VIDEO_DIP_GCP_A)
-#define HSW_TVIDEO_DIP_VSC_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_VSC_DATA_A) + (i) * 4)
+#define _HSW_VIDEO_DIP_CTL_B		0x61200
+#define _HSW_VIDEO_DIP_AVI_DATA_B	0x61220
+#define _HSW_VIDEO_DIP_VS_DATA_B	0x61260
+#define _HSW_VIDEO_DIP_SPD_DATA_B	0x612A0
+#define _HSW_VIDEO_DIP_GMP_DATA_B	0x612E0
+#define _HSW_VIDEO_DIP_VSC_DATA_B	0x61320
+#define _HSW_VIDEO_DIP_BVI_ECC_B	0x61240
+#define _HSW_VIDEO_DIP_VS_ECC_B		0x61280
+#define _HSW_VIDEO_DIP_SPD_ECC_B	0x612C0
+#define _HSW_VIDEO_DIP_GMP_ECC_B	0x61300
+#define _HSW_VIDEO_DIP_VSC_ECC_B	0x61344
+#define _HSW_VIDEO_DIP_GCP_B		0x61210
 
-#define HSW_STEREO_3D_CTL_A	0x70020
-#define   S3D_ENABLE		(1<<31)
-#define HSW_STEREO_3D_CTL_B	0x71020
+#define HSW_TVIDEO_DIP_CTL(trans)		_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_CTL_A)
+#define HSW_TVIDEO_DIP_AVI_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_AVI_DATA_A + (i) * 4)
+#define HSW_TVIDEO_DIP_VS_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_VS_DATA_A + (i) * 4)
+#define HSW_TVIDEO_DIP_SPD_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_SPD_DATA_A + (i) * 4)
+#define HSW_TVIDEO_DIP_GCP(trans)		_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_GCP_A)
+#define HSW_TVIDEO_DIP_VSC_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_VSC_DATA_A + (i) * 4)
 
-#define HSW_STEREO_3D_CTL(trans) \
-	_PIPE2(trans, HSW_STEREO_3D_CTL_A)
+#define _HSW_STEREO_3D_CTL_A		0x70020
+#define   S3D_ENABLE			(1<<31)
+#define _HSW_STEREO_3D_CTL_B		0x71020
+
+#define HSW_STEREO_3D_CTL(trans)	_MMIO_PIPE2(trans, _HSW_STEREO_3D_CTL_A)
 
 #define _PCH_TRANS_HTOTAL_B          0xe1000
 #define _PCH_TRANS_HBLANK_B          0xe1004
@@ -6310,16 +6375,15 @@
 #define _PCH_TRANS_VTOTAL_B          0xe100c
 #define _PCH_TRANS_VBLANK_B          0xe1010
 #define _PCH_TRANS_VSYNC_B           0xe1014
-#define _PCH_TRANS_VSYNCSHIFT_B	 0xe1028
+#define _PCH_TRANS_VSYNCSHIFT_B 0xe1028
 
-#define PCH_TRANS_HTOTAL(pipe) _PIPE(pipe, _PCH_TRANS_HTOTAL_A, _PCH_TRANS_HTOTAL_B)
-#define PCH_TRANS_HBLANK(pipe) _PIPE(pipe, _PCH_TRANS_HBLANK_A, _PCH_TRANS_HBLANK_B)
-#define PCH_TRANS_HSYNC(pipe) _PIPE(pipe, _PCH_TRANS_HSYNC_A, _PCH_TRANS_HSYNC_B)
-#define PCH_TRANS_VTOTAL(pipe) _PIPE(pipe, _PCH_TRANS_VTOTAL_A, _PCH_TRANS_VTOTAL_B)
-#define PCH_TRANS_VBLANK(pipe) _PIPE(pipe, _PCH_TRANS_VBLANK_A, _PCH_TRANS_VBLANK_B)
-#define PCH_TRANS_VSYNC(pipe) _PIPE(pipe, _PCH_TRANS_VSYNC_A, _PCH_TRANS_VSYNC_B)
-#define PCH_TRANS_VSYNCSHIFT(pipe) _PIPE(pipe, _PCH_TRANS_VSYNCSHIFT_A, \
-					 _PCH_TRANS_VSYNCSHIFT_B)
+#define PCH_TRANS_HTOTAL(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_HTOTAL_A, _PCH_TRANS_HTOTAL_B)
+#define PCH_TRANS_HBLANK(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_HBLANK_A, _PCH_TRANS_HBLANK_B)
+#define PCH_TRANS_HSYNC(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_HSYNC_A, _PCH_TRANS_HSYNC_B)
+#define PCH_TRANS_VTOTAL(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_VTOTAL_A, _PCH_TRANS_VTOTAL_B)
+#define PCH_TRANS_VBLANK(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_VBLANK_A, _PCH_TRANS_VBLANK_B)
+#define PCH_TRANS_VSYNC(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_VSYNC_A, _PCH_TRANS_VSYNC_B)
+#define PCH_TRANS_VSYNCSHIFT(pipe)	_MMIO_PIPE(pipe, _PCH_TRANS_VSYNCSHIFT_A, _PCH_TRANS_VSYNCSHIFT_B)
 
 #define _PCH_TRANSB_DATA_M1	0xe1030
 #define _PCH_TRANSB_DATA_N1	0xe1034
@@ -6330,19 +6394,19 @@
 #define _PCH_TRANSB_LINK_M2	0xe1048
 #define _PCH_TRANSB_LINK_N2	0xe104c
 
-#define PCH_TRANS_DATA_M1(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_M1, _PCH_TRANSB_DATA_M1)
-#define PCH_TRANS_DATA_N1(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_N1, _PCH_TRANSB_DATA_N1)
-#define PCH_TRANS_DATA_M2(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_M2, _PCH_TRANSB_DATA_M2)
-#define PCH_TRANS_DATA_N2(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_N2, _PCH_TRANSB_DATA_N2)
-#define PCH_TRANS_LINK_M1(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_M1, _PCH_TRANSB_LINK_M1)
-#define PCH_TRANS_LINK_N1(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_N1, _PCH_TRANSB_LINK_N1)
-#define PCH_TRANS_LINK_M2(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_M2, _PCH_TRANSB_LINK_M2)
-#define PCH_TRANS_LINK_N2(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_N2, _PCH_TRANSB_LINK_N2)
+#define PCH_TRANS_DATA_M1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_M1, _PCH_TRANSB_DATA_M1)
+#define PCH_TRANS_DATA_N1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_N1, _PCH_TRANSB_DATA_N1)
+#define PCH_TRANS_DATA_M2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_M2, _PCH_TRANSB_DATA_M2)
+#define PCH_TRANS_DATA_N2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_N2, _PCH_TRANSB_DATA_N2)
+#define PCH_TRANS_LINK_M1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_M1, _PCH_TRANSB_LINK_M1)
+#define PCH_TRANS_LINK_N1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_N1, _PCH_TRANSB_LINK_N1)
+#define PCH_TRANS_LINK_M2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_M2, _PCH_TRANSB_LINK_M2)
+#define PCH_TRANS_LINK_N2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_N2, _PCH_TRANSB_LINK_N2)
 
 #define _PCH_TRANSACONF              0xf0008
 #define _PCH_TRANSBCONF              0xf1008
-#define PCH_TRANSCONF(pipe) _PIPE(pipe, _PCH_TRANSACONF, _PCH_TRANSBCONF)
-#define LPT_TRANSCONF		_PCH_TRANSACONF /* lpt has only one transcoder */
+#define PCH_TRANSCONF(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSACONF, _PCH_TRANSBCONF)
+#define LPT_TRANSCONF		PCH_TRANSCONF(PIPE_A) /* lpt has only one transcoder */
 #define  TRANS_DISABLE          (0<<31)
 #define  TRANS_ENABLE           (1<<31)
 #define  TRANS_STATE_MASK       (1<<30)
@@ -6363,47 +6427,47 @@
 
 #define _TRANSA_CHICKEN1	 0xf0060
 #define _TRANSB_CHICKEN1	 0xf1060
-#define TRANS_CHICKEN1(pipe) _PIPE(pipe, _TRANSA_CHICKEN1, _TRANSB_CHICKEN1)
+#define TRANS_CHICKEN1(pipe)	_MMIO_PIPE(pipe, _TRANSA_CHICKEN1, _TRANSB_CHICKEN1)
 #define  TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE	(1<<10)
 #define  TRANS_CHICKEN1_DP0UNIT_GC_DISABLE	(1<<4)
 #define _TRANSA_CHICKEN2	 0xf0064
 #define _TRANSB_CHICKEN2	 0xf1064
-#define TRANS_CHICKEN2(pipe) _PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2)
+#define TRANS_CHICKEN2(pipe)	_MMIO_PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2)
 #define  TRANS_CHICKEN2_TIMING_OVERRIDE			(1<<31)
 #define  TRANS_CHICKEN2_FDI_POLARITY_REVERSED		(1<<29)
 #define  TRANS_CHICKEN2_FRAME_START_DELAY_MASK		(3<<27)
 #define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER	(1<<26)
 #define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH	(1<<25)
 
-#define SOUTH_CHICKEN1		0xc2000
+#define SOUTH_CHICKEN1		_MMIO(0xc2000)
 #define  FDIA_PHASE_SYNC_SHIFT_OVR	19
 #define  FDIA_PHASE_SYNC_SHIFT_EN	18
 #define  FDI_PHASE_SYNC_OVR(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_OVR - ((pipe) * 2)))
 #define  FDI_PHASE_SYNC_EN(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_EN - ((pipe) * 2)))
 #define  FDI_BC_BIFURCATION_SELECT	(1 << 12)
 #define  SPT_PWM_GRANULARITY		(1<<0)
-#define SOUTH_CHICKEN2		0xc2004
+#define SOUTH_CHICKEN2		_MMIO(0xc2004)
 #define  FDI_MPHY_IOSFSB_RESET_STATUS	(1<<13)
 #define  FDI_MPHY_IOSFSB_RESET_CTL	(1<<12)
 #define  LPT_PWM_GRANULARITY		(1<<5)
 #define  DPLS_EDP_PPS_FIX_DIS		(1<<0)
 
-#define _FDI_RXA_CHICKEN         0xc200c
-#define _FDI_RXB_CHICKEN         0xc2010
+#define _FDI_RXA_CHICKEN        0xc200c
+#define _FDI_RXB_CHICKEN        0xc2010
 #define  FDI_RX_PHASE_SYNC_POINTER_OVR	(1<<1)
 #define  FDI_RX_PHASE_SYNC_POINTER_EN	(1<<0)
-#define FDI_RX_CHICKEN(pipe) _PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN)
+#define FDI_RX_CHICKEN(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN)
 
-#define SOUTH_DSPCLK_GATE_D	0xc2020
+#define SOUTH_DSPCLK_GATE_D	_MMIO(0xc2020)
 #define  PCH_DPLUNIT_CLOCK_GATE_DISABLE (1<<30)
 #define  PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1<<29)
 #define  PCH_CPUNIT_CLOCK_GATE_DISABLE (1<<14)
 #define  PCH_LP_PARTITION_LEVEL_DISABLE  (1<<12)
 
 /* CPU: FDI_TX */
-#define _FDI_TXA_CTL             0x60100
-#define _FDI_TXB_CTL             0x61100
-#define FDI_TX_CTL(pipe) _PIPE(pipe, _FDI_TXA_CTL, _FDI_TXB_CTL)
+#define _FDI_TXA_CTL            0x60100
+#define _FDI_TXB_CTL            0x61100
+#define FDI_TX_CTL(pipe)	_MMIO_PIPE(pipe, _FDI_TXA_CTL, _FDI_TXB_CTL)
 #define  FDI_TX_DISABLE         (0<<31)
 #define  FDI_TX_ENABLE          (1<<31)
 #define  FDI_LINK_TRAIN_PATTERN_1       (0<<28)
@@ -6453,7 +6517,7 @@
 /* FDI_RX, FDI_X is hard-wired to Transcoder_X */
 #define _FDI_RXA_CTL             0xf000c
 #define _FDI_RXB_CTL             0xf100c
-#define FDI_RX_CTL(pipe) _PIPE(pipe, _FDI_RXA_CTL, _FDI_RXB_CTL)
+#define FDI_RX_CTL(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_CTL, _FDI_RXB_CTL)
 #define  FDI_RX_ENABLE          (1<<31)
 /* train, dp width same as FDI_TX */
 #define  FDI_FS_ERRC_ENABLE		(1<<27)
@@ -6489,14 +6553,14 @@
 #define  FDI_RX_TP1_TO_TP2_48		(2<<20)
 #define  FDI_RX_TP1_TO_TP2_64		(3<<20)
 #define  FDI_RX_FDI_DELAY_90		(0x90<<0)
-#define FDI_RX_MISC(pipe) _PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
+#define FDI_RX_MISC(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
 
-#define _FDI_RXA_TUSIZE1         0xf0030
-#define _FDI_RXA_TUSIZE2         0xf0038
-#define _FDI_RXB_TUSIZE1         0xf1030
-#define _FDI_RXB_TUSIZE2         0xf1038
-#define FDI_RX_TUSIZE1(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE1, _FDI_RXB_TUSIZE1)
-#define FDI_RX_TUSIZE2(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE2, _FDI_RXB_TUSIZE2)
+#define _FDI_RXA_TUSIZE1        0xf0030
+#define _FDI_RXA_TUSIZE2        0xf0038
+#define _FDI_RXB_TUSIZE1        0xf1030
+#define _FDI_RXB_TUSIZE2        0xf1038
+#define FDI_RX_TUSIZE1(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_TUSIZE1, _FDI_RXB_TUSIZE1)
+#define FDI_RX_TUSIZE2(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_TUSIZE2, _FDI_RXB_TUSIZE2)
 
 /* FDI_RX interrupt register format */
 #define FDI_RX_INTER_LANE_ALIGN         (1<<10)
@@ -6511,44 +6575,41 @@
 #define FDI_RX_CROSS_CLOCK_OVERFLOW     (1<<1)
 #define FDI_RX_SYMBOL_QUEUE_OVERFLOW    (1<<0)
 
-#define _FDI_RXA_IIR             0xf0014
-#define _FDI_RXA_IMR             0xf0018
-#define _FDI_RXB_IIR             0xf1014
-#define _FDI_RXB_IMR             0xf1018
-#define FDI_RX_IIR(pipe) _PIPE(pipe, _FDI_RXA_IIR, _FDI_RXB_IIR)
-#define FDI_RX_IMR(pipe) _PIPE(pipe, _FDI_RXA_IMR, _FDI_RXB_IMR)
+#define _FDI_RXA_IIR            0xf0014
+#define _FDI_RXA_IMR            0xf0018
+#define _FDI_RXB_IIR            0xf1014
+#define _FDI_RXB_IMR            0xf1018
+#define FDI_RX_IIR(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_IIR, _FDI_RXB_IIR)
+#define FDI_RX_IMR(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_IMR, _FDI_RXB_IMR)
 
-#define FDI_PLL_CTL_1           0xfe000
-#define FDI_PLL_CTL_2           0xfe004
+#define FDI_PLL_CTL_1           _MMIO(0xfe000)
+#define FDI_PLL_CTL_2           _MMIO(0xfe004)
 
-#define PCH_LVDS	0xe1180
+#define PCH_LVDS	_MMIO(0xe1180)
 #define  LVDS_DETECTED	(1 << 1)
 
 /* vlv has 2 sets of panel control regs. */
-#define PIPEA_PP_STATUS         (VLV_DISPLAY_BASE + 0x61200)
-#define PIPEA_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61204)
-#define PIPEA_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61208)
+#define _PIPEA_PP_STATUS         (VLV_DISPLAY_BASE + 0x61200)
+#define _PIPEA_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61204)
+#define _PIPEA_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61208)
 #define  PANEL_PORT_SELECT_VLV(port)	((port) << 30)
-#define PIPEA_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6120c)
-#define PIPEA_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61210)
+#define _PIPEA_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6120c)
+#define _PIPEA_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61210)
 
-#define PIPEB_PP_STATUS         (VLV_DISPLAY_BASE + 0x61300)
-#define PIPEB_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61304)
-#define PIPEB_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61308)
-#define PIPEB_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6130c)
-#define PIPEB_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61310)
+#define _PIPEB_PP_STATUS         (VLV_DISPLAY_BASE + 0x61300)
+#define _PIPEB_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61304)
+#define _PIPEB_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61308)
+#define _PIPEB_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6130c)
+#define _PIPEB_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61310)
 
-#define VLV_PIPE_PP_STATUS(pipe) _PIPE(pipe, PIPEA_PP_STATUS, PIPEB_PP_STATUS)
-#define VLV_PIPE_PP_CONTROL(pipe) _PIPE(pipe, PIPEA_PP_CONTROL, PIPEB_PP_CONTROL)
-#define VLV_PIPE_PP_ON_DELAYS(pipe) \
-		_PIPE(pipe, PIPEA_PP_ON_DELAYS, PIPEB_PP_ON_DELAYS)
-#define VLV_PIPE_PP_OFF_DELAYS(pipe) \
-		_PIPE(pipe, PIPEA_PP_OFF_DELAYS, PIPEB_PP_OFF_DELAYS)
-#define VLV_PIPE_PP_DIVISOR(pipe) \
-		_PIPE(pipe, PIPEA_PP_DIVISOR, PIPEB_PP_DIVISOR)
+#define VLV_PIPE_PP_STATUS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_STATUS, _PIPEB_PP_STATUS)
+#define VLV_PIPE_PP_CONTROL(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_CONTROL, _PIPEB_PP_CONTROL)
+#define VLV_PIPE_PP_ON_DELAYS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_ON_DELAYS, _PIPEB_PP_ON_DELAYS)
+#define VLV_PIPE_PP_OFF_DELAYS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_OFF_DELAYS, _PIPEB_PP_OFF_DELAYS)
+#define VLV_PIPE_PP_DIVISOR(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_DIVISOR, _PIPEB_PP_DIVISOR)
 
-#define PCH_PP_STATUS		0xc7200
-#define PCH_PP_CONTROL		0xc7204
+#define _PCH_PP_STATUS		0xc7200
+#define _PCH_PP_CONTROL		0xc7204
 #define  PANEL_UNLOCK_REGS	(0xabcd << 16)
 #define  PANEL_UNLOCK_MASK	(0xffff << 16)
 #define  BXT_POWER_CYCLE_DELAY_MASK	(0x1f0)
@@ -6558,7 +6619,7 @@
 #define  PANEL_POWER_RESET	(1 << 1)
 #define  PANEL_POWER_OFF	(0 << 0)
 #define  PANEL_POWER_ON		(1 << 0)
-#define PCH_PP_ON_DELAYS	0xc7208
+#define _PCH_PP_ON_DELAYS	0xc7208
 #define  PANEL_PORT_SELECT_MASK	(3 << 30)
 #define  PANEL_PORT_SELECT_LVDS	(0 << 30)
 #define  PANEL_PORT_SELECT_DPA	(1 << 30)
@@ -6569,52 +6630,64 @@
 #define  PANEL_LIGHT_ON_DELAY_MASK	(0x1fff)
 #define  PANEL_LIGHT_ON_DELAY_SHIFT	0
 
-#define PCH_PP_OFF_DELAYS	0xc720c
+#define _PCH_PP_OFF_DELAYS		0xc720c
 #define  PANEL_POWER_DOWN_DELAY_MASK	(0x1fff0000)
 #define  PANEL_POWER_DOWN_DELAY_SHIFT	16
 #define  PANEL_LIGHT_OFF_DELAY_MASK	(0x1fff)
 #define  PANEL_LIGHT_OFF_DELAY_SHIFT	0
 
-#define PCH_PP_DIVISOR		0xc7210
+#define _PCH_PP_DIVISOR			0xc7210
 #define  PP_REFERENCE_DIVIDER_MASK	(0xffffff00)
 #define  PP_REFERENCE_DIVIDER_SHIFT	8
 #define  PANEL_POWER_CYCLE_DELAY_MASK	(0x1f)
 #define  PANEL_POWER_CYCLE_DELAY_SHIFT	0
 
+#define PCH_PP_STATUS			_MMIO(_PCH_PP_STATUS)
+#define PCH_PP_CONTROL			_MMIO(_PCH_PP_CONTROL)
+#define PCH_PP_ON_DELAYS		_MMIO(_PCH_PP_ON_DELAYS)
+#define PCH_PP_OFF_DELAYS		_MMIO(_PCH_PP_OFF_DELAYS)
+#define PCH_PP_DIVISOR			_MMIO(_PCH_PP_DIVISOR)
+
 /* BXT PPS changes - 2nd set of PPS registers */
 #define _BXT_PP_STATUS2 	0xc7300
 #define _BXT_PP_CONTROL2 	0xc7304
 #define _BXT_PP_ON_DELAYS2	0xc7308
 #define _BXT_PP_OFF_DELAYS2	0xc730c
 
-#define BXT_PP_STATUS(n)	_PIPE(n, PCH_PP_STATUS, _BXT_PP_STATUS2)
-#define BXT_PP_CONTROL(n)	_PIPE(n, PCH_PP_CONTROL, _BXT_PP_CONTROL2)
-#define BXT_PP_ON_DELAYS(n)	_PIPE(n, PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2)
-#define BXT_PP_OFF_DELAYS(n)	_PIPE(n, PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2)
+#define BXT_PP_STATUS(n)	_MMIO_PIPE(n, _PCH_PP_STATUS, _BXT_PP_STATUS2)
+#define BXT_PP_CONTROL(n)	_MMIO_PIPE(n, _PCH_PP_CONTROL, _BXT_PP_CONTROL2)
+#define BXT_PP_ON_DELAYS(n)	_MMIO_PIPE(n, _PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2)
+#define BXT_PP_OFF_DELAYS(n)	_MMIO_PIPE(n, _PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2)
 
-#define PCH_DP_B		0xe4100
-#define PCH_DPB_AUX_CH_CTL	0xe4110
-#define PCH_DPB_AUX_CH_DATA1	0xe4114
-#define PCH_DPB_AUX_CH_DATA2	0xe4118
-#define PCH_DPB_AUX_CH_DATA3	0xe411c
-#define PCH_DPB_AUX_CH_DATA4	0xe4120
-#define PCH_DPB_AUX_CH_DATA5	0xe4124
+#define _PCH_DP_B		0xe4100
+#define PCH_DP_B		_MMIO(_PCH_DP_B)
+#define _PCH_DPB_AUX_CH_CTL	0xe4110
+#define _PCH_DPB_AUX_CH_DATA1	0xe4114
+#define _PCH_DPB_AUX_CH_DATA2	0xe4118
+#define _PCH_DPB_AUX_CH_DATA3	0xe411c
+#define _PCH_DPB_AUX_CH_DATA4	0xe4120
+#define _PCH_DPB_AUX_CH_DATA5	0xe4124
 
-#define PCH_DP_C		0xe4200
-#define PCH_DPC_AUX_CH_CTL	0xe4210
-#define PCH_DPC_AUX_CH_DATA1	0xe4214
-#define PCH_DPC_AUX_CH_DATA2	0xe4218
-#define PCH_DPC_AUX_CH_DATA3	0xe421c
-#define PCH_DPC_AUX_CH_DATA4	0xe4220
-#define PCH_DPC_AUX_CH_DATA5	0xe4224
+#define _PCH_DP_C		0xe4200
+#define PCH_DP_C		_MMIO(_PCH_DP_C)
+#define _PCH_DPC_AUX_CH_CTL	0xe4210
+#define _PCH_DPC_AUX_CH_DATA1	0xe4214
+#define _PCH_DPC_AUX_CH_DATA2	0xe4218
+#define _PCH_DPC_AUX_CH_DATA3	0xe421c
+#define _PCH_DPC_AUX_CH_DATA4	0xe4220
+#define _PCH_DPC_AUX_CH_DATA5	0xe4224
 
-#define PCH_DP_D		0xe4300
-#define PCH_DPD_AUX_CH_CTL	0xe4310
-#define PCH_DPD_AUX_CH_DATA1	0xe4314
-#define PCH_DPD_AUX_CH_DATA2	0xe4318
-#define PCH_DPD_AUX_CH_DATA3	0xe431c
-#define PCH_DPD_AUX_CH_DATA4	0xe4320
-#define PCH_DPD_AUX_CH_DATA5	0xe4324
+#define _PCH_DP_D		0xe4300
+#define PCH_DP_D		_MMIO(_PCH_DP_D)
+#define _PCH_DPD_AUX_CH_CTL	0xe4310
+#define _PCH_DPD_AUX_CH_DATA1	0xe4314
+#define _PCH_DPD_AUX_CH_DATA2	0xe4318
+#define _PCH_DPD_AUX_CH_DATA3	0xe431c
+#define _PCH_DPD_AUX_CH_DATA4	0xe4320
+#define _PCH_DPD_AUX_CH_DATA5	0xe4324
+
+#define PCH_DP_AUX_CH_CTL(port)		_MMIO_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL)
+#define PCH_DP_AUX_CH_DATA(port, i)	_MMIO(_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
 
 /* CPT */
 #define  PORT_TRANS_A_SEL_CPT	0
@@ -6627,10 +6700,10 @@
 #define  SDVO_PORT_TO_PIPE_CHV(val)	(((val) & (3<<24)) >> 24)
 #define  DP_PORT_TO_PIPE_CHV(val)	(((val) & (3<<16)) >> 16)
 
-#define TRANS_DP_CTL_A		0xe0300
-#define TRANS_DP_CTL_B		0xe1300
-#define TRANS_DP_CTL_C		0xe2300
-#define TRANS_DP_CTL(pipe)	_PIPE(pipe, TRANS_DP_CTL_A, TRANS_DP_CTL_B)
+#define _TRANS_DP_CTL_A		0xe0300
+#define _TRANS_DP_CTL_B		0xe1300
+#define _TRANS_DP_CTL_C		0xe2300
+#define TRANS_DP_CTL(pipe)	_MMIO_PIPE(pipe, _TRANS_DP_CTL_A, _TRANS_DP_CTL_B)
 #define  TRANS_DP_OUTPUT_ENABLE	(1<<31)
 #define  TRANS_DP_PORT_SEL_B	(0<<29)
 #define  TRANS_DP_PORT_SEL_C	(1<<29)
@@ -6683,40 +6756,40 @@
 
 #define  EDP_LINK_TRAIN_VOL_EMP_MASK_IVB	(0x3f<<22)
 
-#define  VLV_PMWGICZ				0x1300a4
+#define  VLV_PMWGICZ				_MMIO(0x1300a4)
 
-#define  FORCEWAKE				0xA18C
-#define  FORCEWAKE_VLV				0x1300b0
-#define  FORCEWAKE_ACK_VLV			0x1300b4
-#define  FORCEWAKE_MEDIA_VLV			0x1300b8
-#define  FORCEWAKE_ACK_MEDIA_VLV		0x1300bc
-#define  FORCEWAKE_ACK_HSW			0x130044
-#define  FORCEWAKE_ACK				0x130090
-#define  VLV_GTLC_WAKE_CTRL			0x130090
+#define  FORCEWAKE				_MMIO(0xA18C)
+#define  FORCEWAKE_VLV				_MMIO(0x1300b0)
+#define  FORCEWAKE_ACK_VLV			_MMIO(0x1300b4)
+#define  FORCEWAKE_MEDIA_VLV			_MMIO(0x1300b8)
+#define  FORCEWAKE_ACK_MEDIA_VLV		_MMIO(0x1300bc)
+#define  FORCEWAKE_ACK_HSW			_MMIO(0x130044)
+#define  FORCEWAKE_ACK				_MMIO(0x130090)
+#define  VLV_GTLC_WAKE_CTRL			_MMIO(0x130090)
 #define   VLV_GTLC_RENDER_CTX_EXISTS		(1 << 25)
 #define   VLV_GTLC_MEDIA_CTX_EXISTS		(1 << 24)
 #define   VLV_GTLC_ALLOWWAKEREQ			(1 << 0)
 
-#define  VLV_GTLC_PW_STATUS			0x130094
+#define  VLV_GTLC_PW_STATUS			_MMIO(0x130094)
 #define   VLV_GTLC_ALLOWWAKEACK			(1 << 0)
 #define   VLV_GTLC_ALLOWWAKEERR			(1 << 1)
 #define   VLV_GTLC_PW_MEDIA_STATUS_MASK		(1 << 5)
 #define   VLV_GTLC_PW_RENDER_STATUS_MASK	(1 << 7)
-#define  FORCEWAKE_MT				0xa188 /* multi-threaded */
-#define  FORCEWAKE_MEDIA_GEN9			0xa270
-#define  FORCEWAKE_RENDER_GEN9			0xa278
-#define  FORCEWAKE_BLITTER_GEN9			0xa188
-#define  FORCEWAKE_ACK_MEDIA_GEN9		0x0D88
-#define  FORCEWAKE_ACK_RENDER_GEN9		0x0D84
-#define  FORCEWAKE_ACK_BLITTER_GEN9		0x130044
+#define  FORCEWAKE_MT				_MMIO(0xa188) /* multi-threaded */
+#define  FORCEWAKE_MEDIA_GEN9			_MMIO(0xa270)
+#define  FORCEWAKE_RENDER_GEN9			_MMIO(0xa278)
+#define  FORCEWAKE_BLITTER_GEN9			_MMIO(0xa188)
+#define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
+#define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
+#define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
-#define  FORCEWAKE_MT_ACK			0x130040
-#define  ECOBUS					0xa180
+#define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
+#define  ECOBUS					_MMIO(0xa180)
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
-#define  VLV_SPAREG2H				0xA194
+#define  VLV_SPAREG2H				_MMIO(0xA194)
 
-#define  GTFIFODBG				0x120000
+#define  GTFIFODBG				_MMIO(0x120000)
 #define    GT_FIFO_SBDROPERR			(1<<6)
 #define    GT_FIFO_BLOBDROPERR			(1<<5)
 #define    GT_FIFO_SB_READ_ABORTERR		(1<<4)
@@ -6725,23 +6798,23 @@
 #define    GT_FIFO_IAWRERR			(1<<1)
 #define    GT_FIFO_IARDERR			(1<<0)
 
-#define  GTFIFOCTL				0x120008
+#define  GTFIFOCTL				_MMIO(0x120008)
 #define    GT_FIFO_FREE_ENTRIES_MASK		0x7f
 #define    GT_FIFO_NUM_RESERVED_ENTRIES		20
 #define    GT_FIFO_CTL_BLOCK_ALL_POLICY_STALL	(1 << 12)
 #define    GT_FIFO_CTL_RC6_POLICY_STALL		(1 << 11)
 
-#define  HSW_IDICR				0x9008
+#define  HSW_IDICR				_MMIO(0x9008)
 #define    IDIHASHMSK(x)			(((x) & 0x3f) << 16)
-#define  HSW_EDRAM_PRESENT			0x120010
+#define  HSW_EDRAM_PRESENT			_MMIO(0x120010)
 #define    EDRAM_ENABLED			0x1
 
-#define GEN6_UCGCTL1				0x9400
+#define GEN6_UCGCTL1				_MMIO(0x9400)
 # define GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE		(1 << 16)
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
 
-#define GEN6_UCGCTL2				0x9404
+#define GEN6_UCGCTL2				_MMIO(0x9404)
 # define GEN6_VFUNIT_CLOCK_GATE_DISABLE			(1 << 31)
 # define GEN7_VDSUNIT_CLOCK_GATE_DISABLE		(1 << 30)
 # define GEN7_TDLUNIT_CLOCK_GATE_DISABLE		(1 << 22)
@@ -6749,30 +6822,30 @@
 # define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE		(1 << 12)
 # define GEN6_RCCUNIT_CLOCK_GATE_DISABLE		(1 << 11)
 
-#define GEN6_UCGCTL3				0x9408
+#define GEN6_UCGCTL3				_MMIO(0x9408)
 
-#define GEN7_UCGCTL4				0x940c
+#define GEN7_UCGCTL4				_MMIO(0x940c)
 #define  GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1<<25)
 
-#define GEN6_RCGCTL1				0x9410
-#define GEN6_RCGCTL2				0x9414
-#define GEN6_RSTCTL				0x9420
+#define GEN6_RCGCTL1				_MMIO(0x9410)
+#define GEN6_RCGCTL2				_MMIO(0x9414)
+#define GEN6_RSTCTL				_MMIO(0x9420)
 
-#define GEN8_UCGCTL6				0x9430
+#define GEN8_UCGCTL6				_MMIO(0x9430)
 #define   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE	(1<<24)
 #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE	(1<<14)
 #define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28)
 
-#define GEN6_GFXPAUSE				0xA000
-#define GEN6_RPNSWREQ				0xA008
+#define GEN6_GFXPAUSE				_MMIO(0xA000)
+#define GEN6_RPNSWREQ				_MMIO(0xA008)
 #define   GEN6_TURBO_DISABLE			(1<<31)
 #define   GEN6_FREQUENCY(x)			((x)<<25)
 #define   HSW_FREQUENCY(x)			((x)<<24)
 #define   GEN9_FREQUENCY(x)			((x)<<23)
 #define   GEN6_OFFSET(x)			((x)<<19)
 #define   GEN6_AGGRESSIVE_TURBO			(0<<15)
-#define GEN6_RC_VIDEO_FREQ			0xA00C
-#define GEN6_RC_CONTROL				0xA090
+#define GEN6_RC_VIDEO_FREQ			_MMIO(0xA00C)
+#define GEN6_RC_CONTROL				_MMIO(0xA090)
 #define   GEN6_RC_CTL_RC6pp_ENABLE		(1<<16)
 #define   GEN6_RC_CTL_RC6p_ENABLE		(1<<17)
 #define   GEN6_RC_CTL_RC6_ENABLE		(1<<18)
@@ -6782,16 +6855,16 @@
 #define   GEN7_RC_CTL_TO_MODE			(1<<28)
 #define   GEN6_RC_CTL_EI_MODE(x)		((x)<<27)
 #define   GEN6_RC_CTL_HW_ENABLE			(1<<31)
-#define GEN6_RP_DOWN_TIMEOUT			0xA010
-#define GEN6_RP_INTERRUPT_LIMITS		0xA014
-#define GEN6_RPSTAT1				0xA01C
+#define GEN6_RP_DOWN_TIMEOUT			_MMIO(0xA010)
+#define GEN6_RP_INTERRUPT_LIMITS		_MMIO(0xA014)
+#define GEN6_RPSTAT1				_MMIO(0xA01C)
 #define   GEN6_CAGF_SHIFT			8
 #define   HSW_CAGF_SHIFT			7
 #define   GEN9_CAGF_SHIFT			23
 #define   GEN6_CAGF_MASK			(0x7f << GEN6_CAGF_SHIFT)
 #define   HSW_CAGF_MASK				(0x7f << HSW_CAGF_SHIFT)
 #define   GEN9_CAGF_MASK			(0x1ff << GEN9_CAGF_SHIFT)
-#define GEN6_RP_CONTROL				0xA024
+#define GEN6_RP_CONTROL				_MMIO(0xA024)
 #define   GEN6_RP_MEDIA_TURBO			(1<<11)
 #define   GEN6_RP_MEDIA_MODE_MASK		(3<<9)
 #define   GEN6_RP_MEDIA_HW_TURBO_MODE		(3<<9)
@@ -6805,53 +6878,53 @@
 #define   GEN6_RP_UP_BUSY_CONT			(0x4<<3)
 #define   GEN6_RP_DOWN_IDLE_AVG			(0x2<<0)
 #define   GEN6_RP_DOWN_IDLE_CONT		(0x1<<0)
-#define GEN6_RP_UP_THRESHOLD			0xA02C
-#define GEN6_RP_DOWN_THRESHOLD			0xA030
-#define GEN6_RP_CUR_UP_EI			0xA050
+#define GEN6_RP_UP_THRESHOLD			_MMIO(0xA02C)
+#define GEN6_RP_DOWN_THRESHOLD			_MMIO(0xA030)
+#define GEN6_RP_CUR_UP_EI			_MMIO(0xA050)
 #define   GEN6_CURICONT_MASK			0xffffff
-#define GEN6_RP_CUR_UP				0xA054
+#define GEN6_RP_CUR_UP				_MMIO(0xA054)
 #define   GEN6_CURBSYTAVG_MASK			0xffffff
-#define GEN6_RP_PREV_UP				0xA058
-#define GEN6_RP_CUR_DOWN_EI			0xA05C
+#define GEN6_RP_PREV_UP				_MMIO(0xA058)
+#define GEN6_RP_CUR_DOWN_EI			_MMIO(0xA05C)
 #define   GEN6_CURIAVG_MASK			0xffffff
-#define GEN6_RP_CUR_DOWN			0xA060
-#define GEN6_RP_PREV_DOWN			0xA064
-#define GEN6_RP_UP_EI				0xA068
-#define GEN6_RP_DOWN_EI				0xA06C
-#define GEN6_RP_IDLE_HYSTERSIS			0xA070
-#define GEN6_RPDEUHWTC				0xA080
-#define GEN6_RPDEUC				0xA084
-#define GEN6_RPDEUCSW				0xA088
-#define GEN6_RC_STATE				0xA094
-#define GEN6_RC1_WAKE_RATE_LIMIT		0xA098
-#define GEN6_RC6_WAKE_RATE_LIMIT		0xA09C
-#define GEN6_RC6pp_WAKE_RATE_LIMIT		0xA0A0
-#define GEN6_RC_EVALUATION_INTERVAL		0xA0A8
-#define GEN6_RC_IDLE_HYSTERSIS			0xA0AC
-#define GEN6_RC_SLEEP				0xA0B0
-#define GEN6_RCUBMABDTMR			0xA0B0
-#define GEN6_RC1e_THRESHOLD			0xA0B4
-#define GEN6_RC6_THRESHOLD			0xA0B8
-#define GEN6_RC6p_THRESHOLD			0xA0BC
-#define VLV_RCEDATA				0xA0BC
-#define GEN6_RC6pp_THRESHOLD			0xA0C0
-#define GEN6_PMINTRMSK				0xA168
+#define GEN6_RP_CUR_DOWN			_MMIO(0xA060)
+#define GEN6_RP_PREV_DOWN			_MMIO(0xA064)
+#define GEN6_RP_UP_EI				_MMIO(0xA068)
+#define GEN6_RP_DOWN_EI				_MMIO(0xA06C)
+#define GEN6_RP_IDLE_HYSTERSIS			_MMIO(0xA070)
+#define GEN6_RPDEUHWTC				_MMIO(0xA080)
+#define GEN6_RPDEUC				_MMIO(0xA084)
+#define GEN6_RPDEUCSW				_MMIO(0xA088)
+#define GEN6_RC_STATE				_MMIO(0xA094)
+#define GEN6_RC1_WAKE_RATE_LIMIT		_MMIO(0xA098)
+#define GEN6_RC6_WAKE_RATE_LIMIT		_MMIO(0xA09C)
+#define GEN6_RC6pp_WAKE_RATE_LIMIT		_MMIO(0xA0A0)
+#define GEN6_RC_EVALUATION_INTERVAL		_MMIO(0xA0A8)
+#define GEN6_RC_IDLE_HYSTERSIS			_MMIO(0xA0AC)
+#define GEN6_RC_SLEEP				_MMIO(0xA0B0)
+#define GEN6_RCUBMABDTMR			_MMIO(0xA0B0)
+#define GEN6_RC1e_THRESHOLD			_MMIO(0xA0B4)
+#define GEN6_RC6_THRESHOLD			_MMIO(0xA0B8)
+#define GEN6_RC6p_THRESHOLD			_MMIO(0xA0BC)
+#define VLV_RCEDATA				_MMIO(0xA0BC)
+#define GEN6_RC6pp_THRESHOLD			_MMIO(0xA0C0)
+#define GEN6_PMINTRMSK				_MMIO(0xA168)
 #define GEN8_PMINTR_REDIRECT_TO_NON_DISP	(1<<31)
-#define VLV_PWRDWNUPCTL				0xA294
-#define GEN9_MEDIA_PG_IDLE_HYSTERESIS		0xA0C4
-#define GEN9_RENDER_PG_IDLE_HYSTERESIS		0xA0C8
-#define GEN9_PG_ENABLE				0xA210
+#define VLV_PWRDWNUPCTL				_MMIO(0xA294)
+#define GEN9_MEDIA_PG_IDLE_HYSTERESIS		_MMIO(0xA0C4)
+#define GEN9_RENDER_PG_IDLE_HYSTERESIS		_MMIO(0xA0C8)
+#define GEN9_PG_ENABLE				_MMIO(0xA210)
 #define GEN9_RENDER_PG_ENABLE			(1<<0)
 #define GEN9_MEDIA_PG_ENABLE			(1<<1)
 
-#define VLV_CHICKEN_3				(VLV_DISPLAY_BASE + 0x7040C)
+#define VLV_CHICKEN_3				_MMIO(VLV_DISPLAY_BASE + 0x7040C)
 #define  PIXEL_OVERLAP_CNT_MASK			(3 << 30)
 #define  PIXEL_OVERLAP_CNT_SHIFT		30
 
-#define GEN6_PMISR				0x44020
-#define GEN6_PMIMR				0x44024 /* rps_lock */
-#define GEN6_PMIIR				0x44028
-#define GEN6_PMIER				0x4402C
+#define GEN6_PMISR				_MMIO(0x44020)
+#define GEN6_PMIMR				_MMIO(0x44024) /* rps_lock */
+#define GEN6_PMIIR				_MMIO(0x44028)
+#define GEN6_PMIER				_MMIO(0x4402C)
 #define  GEN6_PM_MBOX_EVENT			(1<<25)
 #define  GEN6_PM_THERMAL_EVENT			(1<<24)
 #define  GEN6_PM_RP_DOWN_TIMEOUT		(1<<6)
@@ -6863,30 +6936,30 @@
 						 GEN6_PM_RP_DOWN_THRESHOLD | \
 						 GEN6_PM_RP_DOWN_TIMEOUT)
 
-#define GEN7_GT_SCRATCH(i)			(0x4F100 + (i) * 4)
+#define GEN7_GT_SCRATCH(i)			_MMIO(0x4F100 + (i) * 4)
 #define GEN7_GT_SCRATCH_REG_NUM			8
 
-#define VLV_GTLC_SURVIVABILITY_REG              0x130098
+#define VLV_GTLC_SURVIVABILITY_REG              _MMIO(0x130098)
 #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
 #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
 
-#define GEN6_GT_GFX_RC6_LOCKED			0x138104
-#define VLV_COUNTER_CONTROL			0x138104
+#define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
+#define VLV_COUNTER_CONTROL			_MMIO(0x138104)
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
 #define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
 #define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
-#define GEN6_GT_GFX_RC6				0x138108
-#define VLV_GT_RENDER_RC6			0x138108
-#define VLV_GT_MEDIA_RC6			0x13810C
+#define GEN6_GT_GFX_RC6				_MMIO(0x138108)
+#define VLV_GT_RENDER_RC6			_MMIO(0x138108)
+#define VLV_GT_MEDIA_RC6			_MMIO(0x13810C)
 
-#define GEN6_GT_GFX_RC6p			0x13810C
-#define GEN6_GT_GFX_RC6pp			0x138110
-#define VLV_RENDER_C0_COUNT			0x138118
-#define VLV_MEDIA_C0_COUNT			0x13811C
+#define GEN6_GT_GFX_RC6p			_MMIO(0x13810C)
+#define GEN6_GT_GFX_RC6pp			_MMIO(0x138110)
+#define VLV_RENDER_C0_COUNT			_MMIO(0x138118)
+#define VLV_MEDIA_C0_COUNT			_MMIO(0x13811C)
 
-#define GEN6_PCODE_MAILBOX			0x138124
+#define GEN6_PCODE_MAILBOX			_MMIO(0x138124)
 #define   GEN6_PCODE_READY			(1<<31)
 #define	  GEN6_PCODE_WRITE_RC6VIDS		0x4
 #define	  GEN6_PCODE_READ_RC6VIDS		0x5
@@ -6909,12 +6982,12 @@
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
 #define   DISPLAY_IPS_CONTROL			0x19
 #define	  HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL	0x1A
-#define GEN6_PCODE_DATA				0x138128
+#define GEN6_PCODE_DATA				_MMIO(0x138128)
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT	16
-#define GEN6_PCODE_DATA1			0x13812C
+#define GEN6_PCODE_DATA1			_MMIO(0x13812C)
 
-#define GEN6_GT_CORE_STATUS		0x138060
+#define GEN6_GT_CORE_STATUS		_MMIO(0x138060)
 #define   GEN6_CORE_CPD_STATE_MASK	(7<<4)
 #define   GEN6_RCn_MASK			7
 #define   GEN6_RC0			0
@@ -6922,26 +6995,26 @@
 #define   GEN6_RC6			3
 #define   GEN6_RC7			4
 
-#define GEN8_GT_SLICE_INFO		0x138064
+#define GEN8_GT_SLICE_INFO		_MMIO(0x138064)
 #define   GEN8_LSLICESTAT_MASK		0x7
 
-#define CHV_POWER_SS0_SIG1		0xa720
-#define CHV_POWER_SS1_SIG1		0xa728
+#define CHV_POWER_SS0_SIG1		_MMIO(0xa720)
+#define CHV_POWER_SS1_SIG1		_MMIO(0xa728)
 #define   CHV_SS_PG_ENABLE		(1<<1)
 #define   CHV_EU08_PG_ENABLE		(1<<9)
 #define   CHV_EU19_PG_ENABLE		(1<<17)
 #define   CHV_EU210_PG_ENABLE		(1<<25)
 
-#define CHV_POWER_SS0_SIG2		0xa724
-#define CHV_POWER_SS1_SIG2		0xa72c
+#define CHV_POWER_SS0_SIG2		_MMIO(0xa724)
+#define CHV_POWER_SS1_SIG2		_MMIO(0xa72c)
 #define   CHV_EU311_PG_ENABLE		(1<<1)
 
-#define GEN9_SLICE_PGCTL_ACK(slice)	(0x804c + (slice)*0x4)
+#define GEN9_SLICE_PGCTL_ACK(slice)	_MMIO(0x804c + (slice)*0x4)
 #define   GEN9_PGCTL_SLICE_ACK		(1 << 0)
 #define   GEN9_PGCTL_SS_ACK(subslice)	(1 << (2 + (subslice)*2))
 
-#define GEN9_SS01_EU_PGCTL_ACK(slice)	(0x805c + (slice)*0x8)
-#define GEN9_SS23_EU_PGCTL_ACK(slice)	(0x8060 + (slice)*0x8)
+#define GEN9_SS01_EU_PGCTL_ACK(slice)	_MMIO(0x805c + (slice)*0x8)
+#define GEN9_SS23_EU_PGCTL_ACK(slice)	_MMIO(0x8060 + (slice)*0x8)
 #define   GEN9_PGCTL_SSA_EU08_ACK	(1 << 0)
 #define   GEN9_PGCTL_SSA_EU19_ACK	(1 << 2)
 #define   GEN9_PGCTL_SSA_EU210_ACK	(1 << 4)
@@ -6951,18 +7024,17 @@
 #define   GEN9_PGCTL_SSB_EU210_ACK	(1 << 12)
 #define   GEN9_PGCTL_SSB_EU311_ACK	(1 << 14)
 
-#define GEN7_MISCCPCTL			(0x9424)
+#define GEN7_MISCCPCTL				_MMIO(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE		(1<<0)
 #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE	(1<<2)
 #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE	(1<<4)
 #define   GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE     (1<<6)
 
-#define GEN8_GARBCNTL                   0xB004
+#define GEN8_GARBCNTL                   _MMIO(0xB004)
 #define   GEN9_GAPS_TSV_CREDIT_DISABLE  (1<<7)
 
 /* IVYBRIDGE DPF */
-#define GEN7_L3CDERRST1			0xB008 /* L3CD Error Status 1 */
-#define HSW_L3CDERRST11			0xB208 /* L3CD Error Status register 1 slice 1 */
+#define GEN7_L3CDERRST1(slice)		_MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */
 #define   GEN7_L3CDERRST1_ROW_MASK	(0x7ff<<14)
 #define   GEN7_PARITY_ERROR_VALID	(1<<13)
 #define   GEN7_L3CDERRST1_BANK_MASK	(3<<11)
@@ -6975,119 +7047,102 @@
 		((reg & GEN7_L3CDERRST1_SUBBANK_MASK) >> 8)
 #define   GEN7_L3CDERRST1_ENABLE	(1<<7)
 
-#define GEN7_L3LOG_BASE			0xB070
-#define HSW_L3LOG_BASE_SLICE1		0xB270
+#define GEN7_L3LOG(slice, i)		_MMIO(0xB070 + (slice) * 0x200 + (i) * 4)
 #define GEN7_L3LOG_SIZE			0x80
 
-#define GEN7_HALF_SLICE_CHICKEN1	0xe100 /* IVB GT1 + VLV */
-#define GEN7_HALF_SLICE_CHICKEN1_GT2	0xf100
+#define GEN7_HALF_SLICE_CHICKEN1	_MMIO(0xe100) /* IVB GT1 + VLV */
+#define GEN7_HALF_SLICE_CHICKEN1_GT2	_MMIO(0xf100)
 #define   GEN7_MAX_PS_THREAD_DEP		(8<<12)
 #define   GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE	(1<<10)
 #define   GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE	(1<<4)
 #define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE	(1<<3)
 
-#define GEN9_HALF_SLICE_CHICKEN5	0xe188
+#define GEN9_HALF_SLICE_CHICKEN5	_MMIO(0xe188)
 #define   GEN9_DG_MIRROR_FIX_ENABLE	(1<<5)
 #define   GEN9_CCS_TLB_PREFETCH_ENABLE	(1<<3)
 
-#define GEN8_ROW_CHICKEN		0xe4f0
+#define GEN8_ROW_CHICKEN		_MMIO(0xe4f0)
 #define   PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE	(1<<8)
 #define   STALL_DOP_GATING_DISABLE		(1<<5)
 
-#define GEN7_ROW_CHICKEN2		0xe4f4
-#define GEN7_ROW_CHICKEN2_GT2		0xf4f4
+#define GEN7_ROW_CHICKEN2		_MMIO(0xe4f4)
+#define GEN7_ROW_CHICKEN2_GT2		_MMIO(0xf4f4)
 #define   DOP_CLOCK_GATING_DISABLE	(1<<0)
 
-#define HSW_ROW_CHICKEN3		0xe49c
+#define HSW_ROW_CHICKEN3		_MMIO(0xe49c)
 #define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE    (1 << 6)
 
-#define HALF_SLICE_CHICKEN2		0xe180
+#define HALF_SLICE_CHICKEN2		_MMIO(0xe180)
 #define   GEN8_ST_PO_DISABLE		(1<<13)
 
-#define HALF_SLICE_CHICKEN3		0xe184
+#define HALF_SLICE_CHICKEN3		_MMIO(0xe184)
 #define   HSW_SAMPLE_C_PERFORMANCE	(1<<9)
 #define   GEN8_CENTROID_PIXEL_OPT_DIS	(1<<8)
 #define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC	(1<<5)
 #define   GEN8_SAMPLER_POWER_BYPASS_DIS	(1<<1)
 
-#define GEN9_HALF_SLICE_CHICKEN7	0xe194
+#define GEN9_HALF_SLICE_CHICKEN7	_MMIO(0xe194)
 #define   GEN9_ENABLE_YV12_BUGFIX	(1<<4)
 
 /* Audio */
-#define G4X_AUD_VID_DID			(dev_priv->info.display_mmio_offset + 0x62020)
+#define G4X_AUD_VID_DID			_MMIO(dev_priv->info.display_mmio_offset + 0x62020)
 #define   INTEL_AUDIO_DEVCL		0x808629FB
 #define   INTEL_AUDIO_DEVBLC		0x80862801
 #define   INTEL_AUDIO_DEVCTG		0x80862802
 
-#define G4X_AUD_CNTL_ST			0x620B4
+#define G4X_AUD_CNTL_ST			_MMIO(0x620B4)
 #define   G4X_ELDV_DEVCL_DEVBLC		(1 << 13)
 #define   G4X_ELDV_DEVCTG		(1 << 14)
 #define   G4X_ELD_ADDR_MASK		(0xf << 5)
 #define   G4X_ELD_ACK			(1 << 4)
-#define G4X_HDMIW_HDMIEDID		0x6210C
+#define G4X_HDMIW_HDMIEDID		_MMIO(0x6210C)
 
 #define _IBX_HDMIW_HDMIEDID_A		0xE2050
 #define _IBX_HDMIW_HDMIEDID_B		0xE2150
-#define IBX_HDMIW_HDMIEDID(pipe) _PIPE(pipe, \
-					_IBX_HDMIW_HDMIEDID_A, \
-					_IBX_HDMIW_HDMIEDID_B)
+#define IBX_HDMIW_HDMIEDID(pipe)	_MMIO_PIPE(pipe, _IBX_HDMIW_HDMIEDID_A, \
+						  _IBX_HDMIW_HDMIEDID_B)
 #define _IBX_AUD_CNTL_ST_A		0xE20B4
 #define _IBX_AUD_CNTL_ST_B		0xE21B4
-#define IBX_AUD_CNTL_ST(pipe) _PIPE(pipe, \
-					_IBX_AUD_CNTL_ST_A, \
-					_IBX_AUD_CNTL_ST_B)
+#define IBX_AUD_CNTL_ST(pipe)		_MMIO_PIPE(pipe, _IBX_AUD_CNTL_ST_A, \
+						  _IBX_AUD_CNTL_ST_B)
 #define   IBX_ELD_BUFFER_SIZE_MASK	(0x1f << 10)
 #define   IBX_ELD_ADDRESS_MASK		(0x1f << 5)
 #define   IBX_ELD_ACK			(1 << 4)
-#define IBX_AUD_CNTL_ST2		0xE20C0
+#define IBX_AUD_CNTL_ST2		_MMIO(0xE20C0)
 #define   IBX_CP_READY(port)		((1 << 1) << (((port) - 1) * 4))
 #define   IBX_ELD_VALID(port)		((1 << 0) << (((port) - 1) * 4))
 
 #define _CPT_HDMIW_HDMIEDID_A		0xE5050
 #define _CPT_HDMIW_HDMIEDID_B		0xE5150
-#define CPT_HDMIW_HDMIEDID(pipe) _PIPE(pipe, \
-					_CPT_HDMIW_HDMIEDID_A, \
-					_CPT_HDMIW_HDMIEDID_B)
+#define CPT_HDMIW_HDMIEDID(pipe)	_MMIO_PIPE(pipe, _CPT_HDMIW_HDMIEDID_A, _CPT_HDMIW_HDMIEDID_B)
 #define _CPT_AUD_CNTL_ST_A		0xE50B4
 #define _CPT_AUD_CNTL_ST_B		0xE51B4
-#define CPT_AUD_CNTL_ST(pipe) _PIPE(pipe, \
-					_CPT_AUD_CNTL_ST_A, \
-					_CPT_AUD_CNTL_ST_B)
-#define CPT_AUD_CNTRL_ST2		0xE50C0
+#define CPT_AUD_CNTL_ST(pipe)		_MMIO_PIPE(pipe, _CPT_AUD_CNTL_ST_A, _CPT_AUD_CNTL_ST_B)
+#define CPT_AUD_CNTRL_ST2		_MMIO(0xE50C0)
 
 #define _VLV_HDMIW_HDMIEDID_A		(VLV_DISPLAY_BASE + 0x62050)
 #define _VLV_HDMIW_HDMIEDID_B		(VLV_DISPLAY_BASE + 0x62150)
-#define VLV_HDMIW_HDMIEDID(pipe) _PIPE(pipe, \
-					_VLV_HDMIW_HDMIEDID_A, \
-					_VLV_HDMIW_HDMIEDID_B)
+#define VLV_HDMIW_HDMIEDID(pipe)	_MMIO_PIPE(pipe, _VLV_HDMIW_HDMIEDID_A, _VLV_HDMIW_HDMIEDID_B)
 #define _VLV_AUD_CNTL_ST_A		(VLV_DISPLAY_BASE + 0x620B4)
 #define _VLV_AUD_CNTL_ST_B		(VLV_DISPLAY_BASE + 0x621B4)
-#define VLV_AUD_CNTL_ST(pipe) _PIPE(pipe, \
-					_VLV_AUD_CNTL_ST_A, \
-					_VLV_AUD_CNTL_ST_B)
-#define VLV_AUD_CNTL_ST2		(VLV_DISPLAY_BASE + 0x620C0)
+#define VLV_AUD_CNTL_ST(pipe)		_MMIO_PIPE(pipe, _VLV_AUD_CNTL_ST_A, _VLV_AUD_CNTL_ST_B)
+#define VLV_AUD_CNTL_ST2		_MMIO(VLV_DISPLAY_BASE + 0x620C0)
 
 /* These are the 4 32-bit write offset registers for each stream
  * output buffer.  It determines the offset from the
  * 3DSTATE_SO_BUFFERs that the next streamed vertex output goes to.
  */
-#define GEN7_SO_WRITE_OFFSET(n)		(0x5280 + (n) * 4)
+#define GEN7_SO_WRITE_OFFSET(n)		_MMIO(0x5280 + (n) * 4)
 
 #define _IBX_AUD_CONFIG_A		0xe2000
 #define _IBX_AUD_CONFIG_B		0xe2100
-#define IBX_AUD_CFG(pipe) _PIPE(pipe, \
-					_IBX_AUD_CONFIG_A, \
-					_IBX_AUD_CONFIG_B)
+#define IBX_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _IBX_AUD_CONFIG_A, _IBX_AUD_CONFIG_B)
 #define _CPT_AUD_CONFIG_A		0xe5000
 #define _CPT_AUD_CONFIG_B		0xe5100
-#define CPT_AUD_CFG(pipe) _PIPE(pipe, \
-					_CPT_AUD_CONFIG_A, \
-					_CPT_AUD_CONFIG_B)
+#define CPT_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _CPT_AUD_CONFIG_A, _CPT_AUD_CONFIG_B)
 #define _VLV_AUD_CONFIG_A		(VLV_DISPLAY_BASE + 0x62000)
 #define _VLV_AUD_CONFIG_B		(VLV_DISPLAY_BASE + 0x62100)
-#define VLV_AUD_CFG(pipe) _PIPE(pipe, \
-					_VLV_AUD_CONFIG_A, \
-					_VLV_AUD_CONFIG_B)
+#define VLV_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _VLV_AUD_CONFIG_A, _VLV_AUD_CONFIG_B)
 
 #define   AUD_CONFIG_N_VALUE_INDEX		(1 << 29)
 #define   AUD_CONFIG_N_PROG_ENABLE		(1 << 28)
@@ -7112,72 +7167,62 @@
 /* HSW Audio */
 #define _HSW_AUD_CONFIG_A		0x65000
 #define _HSW_AUD_CONFIG_B		0x65100
-#define HSW_AUD_CFG(pipe) _PIPE(pipe, \
-					_HSW_AUD_CONFIG_A, \
-					_HSW_AUD_CONFIG_B)
+#define HSW_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_CONFIG_A, _HSW_AUD_CONFIG_B)
 
 #define _HSW_AUD_MISC_CTRL_A		0x65010
 #define _HSW_AUD_MISC_CTRL_B		0x65110
-#define HSW_AUD_MISC_CTRL(pipe) _PIPE(pipe, \
-					_HSW_AUD_MISC_CTRL_A, \
-					_HSW_AUD_MISC_CTRL_B)
+#define HSW_AUD_MISC_CTRL(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_MISC_CTRL_A, _HSW_AUD_MISC_CTRL_B)
 
 #define _HSW_AUD_DIP_ELD_CTRL_ST_A	0x650b4
 #define _HSW_AUD_DIP_ELD_CTRL_ST_B	0x651b4
-#define HSW_AUD_DIP_ELD_CTRL(pipe) _PIPE(pipe, \
-					_HSW_AUD_DIP_ELD_CTRL_ST_A, \
-					_HSW_AUD_DIP_ELD_CTRL_ST_B)
+#define HSW_AUD_DIP_ELD_CTRL(pipe)	_MMIO_PIPE(pipe, _HSW_AUD_DIP_ELD_CTRL_ST_A, _HSW_AUD_DIP_ELD_CTRL_ST_B)
 
 /* Audio Digital Converter */
 #define _HSW_AUD_DIG_CNVT_1		0x65080
 #define _HSW_AUD_DIG_CNVT_2		0x65180
-#define AUD_DIG_CNVT(pipe) _PIPE(pipe, \
-					_HSW_AUD_DIG_CNVT_1, \
-					_HSW_AUD_DIG_CNVT_2)
+#define AUD_DIG_CNVT(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_DIG_CNVT_1, _HSW_AUD_DIG_CNVT_2)
 #define DIP_PORT_SEL_MASK		0x3
 
 #define _HSW_AUD_EDID_DATA_A		0x65050
 #define _HSW_AUD_EDID_DATA_B		0x65150
-#define HSW_AUD_EDID_DATA(pipe) _PIPE(pipe, \
-					_HSW_AUD_EDID_DATA_A, \
-					_HSW_AUD_EDID_DATA_B)
+#define HSW_AUD_EDID_DATA(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_EDID_DATA_A, _HSW_AUD_EDID_DATA_B)
 
-#define HSW_AUD_PIPE_CONV_CFG		0x6507c
-#define HSW_AUD_PIN_ELD_CP_VLD		0x650c0
+#define HSW_AUD_PIPE_CONV_CFG		_MMIO(0x6507c)
+#define HSW_AUD_PIN_ELD_CP_VLD		_MMIO(0x650c0)
 #define   AUDIO_INACTIVE(trans)		((1 << 3) << ((trans) * 4))
 #define   AUDIO_OUTPUT_ENABLE(trans)	((1 << 2) << ((trans) * 4))
 #define   AUDIO_CP_READY(trans)		((1 << 1) << ((trans) * 4))
 #define   AUDIO_ELD_VALID(trans)	((1 << 0) << ((trans) * 4))
 
-#define HSW_AUD_CHICKENBIT			0x65f10
+#define HSW_AUD_CHICKENBIT			_MMIO(0x65f10)
 #define   SKL_AUD_CODEC_WAKE_SIGNAL		(1 << 15)
 
 /* HSW Power Wells */
-#define HSW_PWR_WELL_BIOS			0x45400 /* CTL1 */
-#define HSW_PWR_WELL_DRIVER			0x45404 /* CTL2 */
-#define HSW_PWR_WELL_KVMR			0x45408 /* CTL3 */
-#define HSW_PWR_WELL_DEBUG			0x4540C /* CTL4 */
+#define HSW_PWR_WELL_BIOS			_MMIO(0x45400) /* CTL1 */
+#define HSW_PWR_WELL_DRIVER			_MMIO(0x45404) /* CTL2 */
+#define HSW_PWR_WELL_KVMR			_MMIO(0x45408) /* CTL3 */
+#define HSW_PWR_WELL_DEBUG			_MMIO(0x4540C) /* CTL4 */
 #define   HSW_PWR_WELL_ENABLE_REQUEST		(1<<31)
 #define   HSW_PWR_WELL_STATE_ENABLED		(1<<30)
-#define HSW_PWR_WELL_CTL5			0x45410
+#define HSW_PWR_WELL_CTL5			_MMIO(0x45410)
 #define   HSW_PWR_WELL_ENABLE_SINGLE_STEP	(1<<31)
 #define   HSW_PWR_WELL_PWR_GATE_OVERRIDE	(1<<20)
 #define   HSW_PWR_WELL_FORCE_ON			(1<<19)
-#define HSW_PWR_WELL_CTL6			0x45414
+#define HSW_PWR_WELL_CTL6			_MMIO(0x45414)
 
 /* SKL Fuse Status */
-#define SKL_FUSE_STATUS				0x42000
+#define SKL_FUSE_STATUS				_MMIO(0x42000)
 #define  SKL_FUSE_DOWNLOAD_STATUS              (1<<31)
 #define  SKL_FUSE_PG0_DIST_STATUS              (1<<27)
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
 /* Per-pipe DDI Function Control */
-#define TRANS_DDI_FUNC_CTL_A		0x60400
-#define TRANS_DDI_FUNC_CTL_B		0x61400
-#define TRANS_DDI_FUNC_CTL_C		0x62400
-#define TRANS_DDI_FUNC_CTL_EDP		0x6F400
-#define TRANS_DDI_FUNC_CTL(tran) _TRANSCODER2(tran, TRANS_DDI_FUNC_CTL_A)
+#define _TRANS_DDI_FUNC_CTL_A		0x60400
+#define _TRANS_DDI_FUNC_CTL_B		0x61400
+#define _TRANS_DDI_FUNC_CTL_C		0x62400
+#define _TRANS_DDI_FUNC_CTL_EDP		0x6F400
+#define TRANS_DDI_FUNC_CTL(tran) _MMIO_TRANS2(tran, _TRANS_DDI_FUNC_CTL_A)
 
 #define  TRANS_DDI_FUNC_ENABLE		(1<<31)
 /* Those bits are ignored by pipe EDP since it can only connect to DDI A */
@@ -7207,9 +7252,9 @@
 #define  TRANS_DDI_BFI_ENABLE		(1<<4)
 
 /* DisplayPort Transport Control */
-#define DP_TP_CTL_A			0x64040
-#define DP_TP_CTL_B			0x64140
-#define DP_TP_CTL(port) _PORT(port, DP_TP_CTL_A, DP_TP_CTL_B)
+#define _DP_TP_CTL_A			0x64040
+#define _DP_TP_CTL_B			0x64140
+#define DP_TP_CTL(port) _MMIO_PORT(port, _DP_TP_CTL_A, _DP_TP_CTL_B)
 #define  DP_TP_CTL_ENABLE			(1<<31)
 #define  DP_TP_CTL_MODE_SST			(0<<27)
 #define  DP_TP_CTL_MODE_MST			(1<<27)
@@ -7225,9 +7270,9 @@
 #define  DP_TP_CTL_SCRAMBLE_DISABLE		(1<<7)
 
 /* DisplayPort Transport Status */
-#define DP_TP_STATUS_A			0x64044
-#define DP_TP_STATUS_B			0x64144
-#define DP_TP_STATUS(port) _PORT(port, DP_TP_STATUS_A, DP_TP_STATUS_B)
+#define _DP_TP_STATUS_A			0x64044
+#define _DP_TP_STATUS_B			0x64144
+#define DP_TP_STATUS(port) _MMIO_PORT(port, _DP_TP_STATUS_A, _DP_TP_STATUS_B)
 #define  DP_TP_STATUS_IDLE_DONE			(1<<25)
 #define  DP_TP_STATUS_ACT_SENT			(1<<24)
 #define  DP_TP_STATUS_MODE_STATUS_MST		(1<<23)
@@ -7237,9 +7282,9 @@
 #define  DP_TP_STATUS_PAYLOAD_MAPPING_VC0	(3 << 0)
 
 /* DDI Buffer Control */
-#define DDI_BUF_CTL_A				0x64000
-#define DDI_BUF_CTL_B				0x64100
-#define DDI_BUF_CTL(port) _PORT(port, DDI_BUF_CTL_A, DDI_BUF_CTL_B)
+#define _DDI_BUF_CTL_A				0x64000
+#define _DDI_BUF_CTL_B				0x64100
+#define DDI_BUF_CTL(port) _MMIO_PORT(port, _DDI_BUF_CTL_A, _DDI_BUF_CTL_B)
 #define  DDI_BUF_CTL_ENABLE			(1<<31)
 #define  DDI_BUF_TRANS_SELECT(n)	((n) << 24)
 #define  DDI_BUF_EMP_MASK			(0xf<<24)
@@ -7252,17 +7297,17 @@
 #define  DDI_INIT_DISPLAY_DETECTED		(1<<0)
 
 /* DDI Buffer Translations */
-#define DDI_BUF_TRANS_A				0x64E00
-#define DDI_BUF_TRANS_B				0x64E60
-#define DDI_BUF_TRANS_LO(port, i) (_PORT(port, DDI_BUF_TRANS_A, DDI_BUF_TRANS_B) + (i) * 8)
-#define DDI_BUF_TRANS_HI(port, i) (_PORT(port, DDI_BUF_TRANS_A, DDI_BUF_TRANS_B) + (i) * 8 + 4)
+#define _DDI_BUF_TRANS_A		0x64E00
+#define _DDI_BUF_TRANS_B		0x64E60
+#define DDI_BUF_TRANS_LO(port, i)	_MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8)
+#define DDI_BUF_TRANS_HI(port, i)	_MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4)
 
 /* Sideband Interface (SBI) is programmed indirectly, via
  * SBI_ADDR, which contains the register offset; and SBI_DATA,
  * which contains the payload */
-#define SBI_ADDR			0xC6000
-#define SBI_DATA			0xC6004
-#define SBI_CTL_STAT			0xC6008
+#define SBI_ADDR			_MMIO(0xC6000)
+#define SBI_DATA			_MMIO(0xC6004)
+#define SBI_CTL_STAT			_MMIO(0xC6008)
 #define  SBI_CTL_DEST_ICLK		(0x0<<16)
 #define  SBI_CTL_DEST_MPHY		(0x1<<16)
 #define  SBI_CTL_OP_IORD		(0x2<<8)
@@ -7293,12 +7338,12 @@
 #define   SBI_GEN0_CFG_BUFFENABLE_DISABLE	(1<<0)
 
 /* LPT PIXCLK_GATE */
-#define PIXCLK_GATE			0xC6020
+#define PIXCLK_GATE			_MMIO(0xC6020)
 #define  PIXCLK_GATE_UNGATE		(1<<0)
 #define  PIXCLK_GATE_GATE		(0<<0)
 
 /* SPLL */
-#define SPLL_CTL			0x46020
+#define SPLL_CTL			_MMIO(0x46020)
 #define  SPLL_PLL_ENABLE		(1<<31)
 #define  SPLL_PLL_SSC			(1<<28)
 #define  SPLL_PLL_NON_SSC		(2<<28)
@@ -7310,9 +7355,9 @@
 #define  SPLL_PLL_FREQ_MASK		(3<<26)
 
 /* WRPLL */
-#define WRPLL_CTL1			0x46040
-#define WRPLL_CTL2			0x46060
-#define WRPLL_CTL(pll)			(pll == 0 ? WRPLL_CTL1 : WRPLL_CTL2)
+#define _WRPLL_CTL1			0x46040
+#define _WRPLL_CTL2			0x46060
+#define WRPLL_CTL(pll)			_MMIO_PIPE(pll, _WRPLL_CTL1, _WRPLL_CTL2)
 #define  WRPLL_PLL_ENABLE		(1<<31)
 #define  WRPLL_PLL_SSC			(1<<28)
 #define  WRPLL_PLL_NON_SSC		(2<<28)
@@ -7329,9 +7374,9 @@
 #define  WRPLL_DIVIDER_FB_MASK		(0xff<<16)
 
 /* Port clock selection */
-#define PORT_CLK_SEL_A			0x46100
-#define PORT_CLK_SEL_B			0x46104
-#define PORT_CLK_SEL(port) _PORT(port, PORT_CLK_SEL_A, PORT_CLK_SEL_B)
+#define _PORT_CLK_SEL_A			0x46100
+#define _PORT_CLK_SEL_B			0x46104
+#define PORT_CLK_SEL(port) _MMIO_PORT(port, _PORT_CLK_SEL_A, _PORT_CLK_SEL_B)
 #define  PORT_CLK_SEL_LCPLL_2700	(0<<29)
 #define  PORT_CLK_SEL_LCPLL_1350	(1<<29)
 #define  PORT_CLK_SEL_LCPLL_810		(2<<29)
@@ -7343,18 +7388,18 @@
 #define  PORT_CLK_SEL_MASK		(7<<29)
 
 /* Transcoder clock selection */
-#define TRANS_CLK_SEL_A			0x46140
-#define TRANS_CLK_SEL_B			0x46144
-#define TRANS_CLK_SEL(tran) _TRANSCODER(tran, TRANS_CLK_SEL_A, TRANS_CLK_SEL_B)
+#define _TRANS_CLK_SEL_A		0x46140
+#define _TRANS_CLK_SEL_B		0x46144
+#define TRANS_CLK_SEL(tran) _MMIO_TRANS(tran, _TRANS_CLK_SEL_A, _TRANS_CLK_SEL_B)
 /* For each transcoder, we need to select the corresponding port clock */
 #define  TRANS_CLK_SEL_DISABLED		(0x0<<29)
 #define  TRANS_CLK_SEL_PORT(x)		(((x)+1)<<29)
 
-#define TRANSA_MSA_MISC			0x60410
-#define TRANSB_MSA_MISC			0x61410
-#define TRANSC_MSA_MISC			0x62410
-#define TRANS_EDP_MSA_MISC		0x6f410
-#define TRANS_MSA_MISC(tran) _TRANSCODER2(tran, TRANSA_MSA_MISC)
+#define _TRANSA_MSA_MISC		0x60410
+#define _TRANSB_MSA_MISC		0x61410
+#define _TRANSC_MSA_MISC		0x62410
+#define _TRANS_EDP_MSA_MISC		0x6f410
+#define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC)
 
 #define  TRANS_MSA_SYNC_CLK		(1<<0)
 #define  TRANS_MSA_6_BPC		(0<<5)
@@ -7364,7 +7409,7 @@
 #define  TRANS_MSA_16_BPC		(4<<5)
 
 /* LCPLL Control */
-#define LCPLL_CTL			0x130040
+#define LCPLL_CTL			_MMIO(0x130040)
 #define  LCPLL_PLL_DISABLE		(1<<31)
 #define  LCPLL_PLL_LOCK			(1<<30)
 #define  LCPLL_CLK_FREQ_MASK		(3<<26)
@@ -7384,7 +7429,7 @@
  */
 
 /* CDCLK_CTL */
-#define CDCLK_CTL			0x46000
+#define CDCLK_CTL			_MMIO(0x46000)
 #define  CDCLK_FREQ_SEL_MASK		(3<<26)
 #define  CDCLK_FREQ_450_432		(0<<26)
 #define  CDCLK_FREQ_540			(1<<26)
@@ -7400,12 +7445,12 @@
 #define  BXT_CDCLK_SSA_PRECHARGE_ENABLE	(1<<16)
 
 /* LCPLL_CTL */
-#define LCPLL1_CTL		0x46010
-#define LCPLL2_CTL		0x46014
+#define LCPLL1_CTL		_MMIO(0x46010)
+#define LCPLL2_CTL		_MMIO(0x46014)
 #define  LCPLL_PLL_ENABLE	(1<<31)
 
 /* DPLL control1 */
-#define DPLL_CTRL1		0x6C058
+#define DPLL_CTRL1		_MMIO(0x6C058)
 #define  DPLL_CTRL1_HDMI_MODE(id)		(1<<((id)*6+5))
 #define  DPLL_CTRL1_SSC(id)			(1<<((id)*6+4))
 #define  DPLL_CTRL1_LINK_RATE_MASK(id)		(7<<((id)*6+1))
@@ -7420,7 +7465,7 @@
 #define  DPLL_CTRL1_LINK_RATE_2160		5
 
 /* DPLL control2 */
-#define DPLL_CTRL2				0x6C05C
+#define DPLL_CTRL2				_MMIO(0x6C05C)
 #define  DPLL_CTRL2_DDI_CLK_OFF(port)		(1<<((port)+15))
 #define  DPLL_CTRL2_DDI_CLK_SEL_MASK(port)	(3<<((port)*3+1))
 #define  DPLL_CTRL2_DDI_CLK_SEL_SHIFT(port)    ((port)*3+1)
@@ -7428,21 +7473,21 @@
 #define  DPLL_CTRL2_DDI_SEL_OVERRIDE(port)     (1<<((port)*3))
 
 /* DPLL Status */
-#define DPLL_STATUS	0x6C060
+#define DPLL_STATUS	_MMIO(0x6C060)
 #define  DPLL_LOCK(id) (1<<((id)*8))
 
 /* DPLL cfg */
-#define DPLL1_CFGCR1	0x6C040
-#define DPLL2_CFGCR1	0x6C048
-#define DPLL3_CFGCR1	0x6C050
+#define _DPLL1_CFGCR1	0x6C040
+#define _DPLL2_CFGCR1	0x6C048
+#define _DPLL3_CFGCR1	0x6C050
 #define  DPLL_CFGCR1_FREQ_ENABLE	(1<<31)
 #define  DPLL_CFGCR1_DCO_FRACTION_MASK	(0x7fff<<9)
 #define  DPLL_CFGCR1_DCO_FRACTION(x)	((x)<<9)
 #define  DPLL_CFGCR1_DCO_INTEGER_MASK	(0x1ff)
 
-#define DPLL1_CFGCR2	0x6C044
-#define DPLL2_CFGCR2	0x6C04C
-#define DPLL3_CFGCR2	0x6C054
+#define _DPLL1_CFGCR2	0x6C044
+#define _DPLL2_CFGCR2	0x6C04C
+#define _DPLL3_CFGCR2	0x6C054
 #define  DPLL_CFGCR2_QDIV_RATIO_MASK	(0xff<<8)
 #define  DPLL_CFGCR2_QDIV_RATIO(x)	((x)<<8)
 #define  DPLL_CFGCR2_QDIV_MODE(x)	((x)<<7)
@@ -7460,58 +7505,58 @@
 #define  DPLL_CFGCR2_PDIV_7 (4<<2)
 #define  DPLL_CFGCR2_CENTRAL_FREQ_MASK	(3)
 
-#define DPLL_CFGCR1(id) (DPLL1_CFGCR1 + ((id) - SKL_DPLL1) * 8)
-#define DPLL_CFGCR2(id) (DPLL1_CFGCR2 + ((id) - SKL_DPLL1) * 8)
+#define DPLL_CFGCR1(id)	_MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR2)
+#define DPLL_CFGCR2(id)	_MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR2, _DPLL2_CFGCR2)
 
 /* BXT display engine PLL */
-#define BXT_DE_PLL_CTL			0x6d000
+#define BXT_DE_PLL_CTL			_MMIO(0x6d000)
 #define   BXT_DE_PLL_RATIO(x)		(x)	/* {60,65,100} * 19.2MHz */
 #define   BXT_DE_PLL_RATIO_MASK		0xff
 
-#define BXT_DE_PLL_ENABLE		0x46070
+#define BXT_DE_PLL_ENABLE		_MMIO(0x46070)
 #define   BXT_DE_PLL_PLL_ENABLE		(1 << 31)
 #define   BXT_DE_PLL_LOCK		(1 << 30)
 
 /* GEN9 DC */
-#define DC_STATE_EN			0x45504
+#define DC_STATE_EN			_MMIO(0x45504)
+#define  DC_STATE_DISABLE		0
 #define  DC_STATE_EN_UPTO_DC5		(1<<0)
 #define  DC_STATE_EN_DC9		(1<<3)
 #define  DC_STATE_EN_UPTO_DC6		(2<<0)
 #define  DC_STATE_EN_UPTO_DC5_DC6_MASK   0x3
 
-#define  DC_STATE_DEBUG                  0x45520
+#define  DC_STATE_DEBUG                  _MMIO(0x45520)
 #define  DC_STATE_DEBUG_MASK_MEMORY_UP	(1<<1)
 
 /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register,
  * since on HSW we can't write to it using I915_WRITE. */
-#define D_COMP_HSW			(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
-#define D_COMP_BDW			0x138144
+#define D_COMP_HSW			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
+#define D_COMP_BDW			_MMIO(0x138144)
 #define  D_COMP_RCOMP_IN_PROGRESS	(1<<9)
 #define  D_COMP_COMP_FORCE		(1<<8)
 #define  D_COMP_COMP_DISABLE		(1<<0)
 
 /* Pipe WM_LINETIME - watermark line time */
-#define PIPE_WM_LINETIME_A		0x45270
-#define PIPE_WM_LINETIME_B		0x45274
-#define PIPE_WM_LINETIME(pipe) _PIPE(pipe, PIPE_WM_LINETIME_A, \
-					   PIPE_WM_LINETIME_B)
+#define _PIPE_WM_LINETIME_A		0x45270
+#define _PIPE_WM_LINETIME_B		0x45274
+#define PIPE_WM_LINETIME(pipe) _MMIO_PIPE(pipe, _PIPE_WM_LINETIME_A, _PIPE_WM_LINETIME_B)
 #define   PIPE_WM_LINETIME_MASK			(0x1ff)
 #define   PIPE_WM_LINETIME_TIME(x)		((x))
 #define   PIPE_WM_LINETIME_IPS_LINETIME_MASK	(0x1ff<<16)
 #define   PIPE_WM_LINETIME_IPS_LINETIME(x)	((x)<<16)
 
 /* SFUSE_STRAP */
-#define SFUSE_STRAP			0xc2014
+#define SFUSE_STRAP			_MMIO(0xc2014)
 #define  SFUSE_STRAP_FUSE_LOCK		(1<<13)
 #define  SFUSE_STRAP_DISPLAY_DISABLED	(1<<7)
 #define  SFUSE_STRAP_DDIB_DETECTED	(1<<2)
 #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
 #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
 
-#define WM_MISC				0x45260
+#define WM_MISC				_MMIO(0x45260)
 #define  WM_MISC_DATA_PARTITION_5_6	(1 << 0)
 
-#define WM_DBG				0x45280
+#define WM_DBG				_MMIO(0x45280)
 #define  WM_DBG_DISALLOW_MULTIPLE_LP	(1<<0)
 #define  WM_DBG_DISALLOW_MAXFIFO	(1<<1)
 #define  WM_DBG_DISALLOW_SPRITE		(1<<2)
@@ -7548,28 +7593,29 @@
 #define _PIPE_B_CSC_POSTOFF_ME	0x49144
 #define _PIPE_B_CSC_POSTOFF_LO	0x49148
 
-#define PIPE_CSC_COEFF_RY_GY(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_RY_GY, _PIPE_B_CSC_COEFF_RY_GY)
-#define PIPE_CSC_COEFF_BY(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_BY, _PIPE_B_CSC_COEFF_BY)
-#define PIPE_CSC_COEFF_RU_GU(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_RU_GU, _PIPE_B_CSC_COEFF_RU_GU)
-#define PIPE_CSC_COEFF_BU(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_BU, _PIPE_B_CSC_COEFF_BU)
-#define PIPE_CSC_COEFF_RV_GV(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_RV_GV, _PIPE_B_CSC_COEFF_RV_GV)
-#define PIPE_CSC_COEFF_BV(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_BV, _PIPE_B_CSC_COEFF_BV)
-#define PIPE_CSC_MODE(pipe) _PIPE(pipe, _PIPE_A_CSC_MODE, _PIPE_B_CSC_MODE)
-#define PIPE_CSC_PREOFF_HI(pipe) _PIPE(pipe, _PIPE_A_CSC_PREOFF_HI, _PIPE_B_CSC_PREOFF_HI)
-#define PIPE_CSC_PREOFF_ME(pipe) _PIPE(pipe, _PIPE_A_CSC_PREOFF_ME, _PIPE_B_CSC_PREOFF_ME)
-#define PIPE_CSC_PREOFF_LO(pipe) _PIPE(pipe, _PIPE_A_CSC_PREOFF_LO, _PIPE_B_CSC_PREOFF_LO)
-#define PIPE_CSC_POSTOFF_HI(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_HI, _PIPE_B_CSC_POSTOFF_HI)
-#define PIPE_CSC_POSTOFF_ME(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_ME, _PIPE_B_CSC_POSTOFF_ME)
-#define PIPE_CSC_POSTOFF_LO(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_LO, _PIPE_B_CSC_POSTOFF_LO)
+#define PIPE_CSC_COEFF_RY_GY(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_RY_GY, _PIPE_B_CSC_COEFF_RY_GY)
+#define PIPE_CSC_COEFF_BY(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_BY, _PIPE_B_CSC_COEFF_BY)
+#define PIPE_CSC_COEFF_RU_GU(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_RU_GU, _PIPE_B_CSC_COEFF_RU_GU)
+#define PIPE_CSC_COEFF_BU(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_BU, _PIPE_B_CSC_COEFF_BU)
+#define PIPE_CSC_COEFF_RV_GV(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_RV_GV, _PIPE_B_CSC_COEFF_RV_GV)
+#define PIPE_CSC_COEFF_BV(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_BV, _PIPE_B_CSC_COEFF_BV)
+#define PIPE_CSC_MODE(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_MODE, _PIPE_B_CSC_MODE)
+#define PIPE_CSC_PREOFF_HI(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_PREOFF_HI, _PIPE_B_CSC_PREOFF_HI)
+#define PIPE_CSC_PREOFF_ME(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_PREOFF_ME, _PIPE_B_CSC_PREOFF_ME)
+#define PIPE_CSC_PREOFF_LO(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_PREOFF_LO, _PIPE_B_CSC_PREOFF_LO)
+#define PIPE_CSC_POSTOFF_HI(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_HI, _PIPE_B_CSC_POSTOFF_HI)
+#define PIPE_CSC_POSTOFF_ME(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_ME, _PIPE_B_CSC_POSTOFF_ME)
+#define PIPE_CSC_POSTOFF_LO(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_LO, _PIPE_B_CSC_POSTOFF_LO)
 
 /* MIPI DSI registers */
 
 #define _MIPI_PORT(port, a, c)	_PORT3(port, a, 0, c)	/* ports A and C only */
+#define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))
 
 /* BXT MIPI clock controls */
 #define BXT_MAX_VAR_OUTPUT_KHZ			39500
 
-#define BXT_MIPI_CLOCK_CTL			0x46090
+#define BXT_MIPI_CLOCK_CTL			_MMIO(0x46090)
 #define  BXT_MIPI1_DIV_SHIFT			26
 #define  BXT_MIPI2_DIV_SHIFT			10
 #define  BXT_MIPI_DIV_SHIFT(port)		\
@@ -7631,20 +7677,20 @@
 /* BXT MIPI mode configure */
 #define  _BXT_MIPIA_TRANS_HACTIVE			0x6B0F8
 #define  _BXT_MIPIC_TRANS_HACTIVE			0x6B8F8
-#define  BXT_MIPI_TRANS_HACTIVE(tc)	_MIPI_PORT(tc, \
+#define  BXT_MIPI_TRANS_HACTIVE(tc)	_MMIO_MIPI(tc, \
 		_BXT_MIPIA_TRANS_HACTIVE, _BXT_MIPIC_TRANS_HACTIVE)
 
 #define  _BXT_MIPIA_TRANS_VACTIVE			0x6B0FC
 #define  _BXT_MIPIC_TRANS_VACTIVE			0x6B8FC
-#define  BXT_MIPI_TRANS_VACTIVE(tc)	_MIPI_PORT(tc, \
+#define  BXT_MIPI_TRANS_VACTIVE(tc)	_MMIO_MIPI(tc, \
 		_BXT_MIPIA_TRANS_VACTIVE, _BXT_MIPIC_TRANS_VACTIVE)
 
 #define  _BXT_MIPIA_TRANS_VTOTAL			0x6B100
 #define  _BXT_MIPIC_TRANS_VTOTAL			0x6B900
-#define  BXT_MIPI_TRANS_VTOTAL(tc)	_MIPI_PORT(tc, \
+#define  BXT_MIPI_TRANS_VTOTAL(tc)	_MMIO_MIPI(tc, \
 		_BXT_MIPIA_TRANS_VTOTAL, _BXT_MIPIC_TRANS_VTOTAL)
 
-#define BXT_DSI_PLL_CTL			0x161000
+#define BXT_DSI_PLL_CTL			_MMIO(0x161000)
 #define  BXT_DSI_PLL_PVD_RATIO_SHIFT	16
 #define  BXT_DSI_PLL_PVD_RATIO_MASK	(3 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
 #define  BXT_DSI_PLL_PVD_RATIO_1	(1 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
@@ -7662,19 +7708,18 @@
 #define BXT_DSI_PLL_RATIO_MASK		0xFF
 #define BXT_REF_CLOCK_KHZ		19500
 
-#define BXT_DSI_PLL_ENABLE		0x46080
+#define BXT_DSI_PLL_ENABLE		_MMIO(0x46080)
 #define  BXT_DSI_PLL_DO_ENABLE		(1 << 31)
 #define  BXT_DSI_PLL_LOCKED		(1 << 30)
 
 #define _MIPIA_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61190)
 #define _MIPIC_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61700)
-#define MIPI_PORT_CTRL(port)	_MIPI_PORT(port, _MIPIA_PORT_CTRL, _MIPIC_PORT_CTRL)
+#define MIPI_PORT_CTRL(port)	_MMIO_MIPI(port, _MIPIA_PORT_CTRL, _MIPIC_PORT_CTRL)
 
  /* BXT port control */
 #define _BXT_MIPIA_PORT_CTRL				0x6B0C0
 #define _BXT_MIPIC_PORT_CTRL				0x6B8C0
-#define BXT_MIPI_PORT_CTRL(tc)	_MIPI_PORT(tc, _BXT_MIPIA_PORT_CTRL, \
-						_BXT_MIPIC_PORT_CTRL)
+#define BXT_MIPI_PORT_CTRL(tc)	_MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL)
 
 #define  DPI_ENABLE					(1 << 31) /* A + C */
 #define  MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT		27
@@ -7718,8 +7763,7 @@
 
 #define _MIPIA_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61194)
 #define _MIPIC_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61704)
-#define MIPI_TEARING_CTRL(port)			_MIPI_PORT(port, \
-				_MIPIA_TEARING_CTRL, _MIPIC_TEARING_CTRL)
+#define MIPI_TEARING_CTRL(port)			_MMIO_MIPI(port, _MIPIA_TEARING_CTRL, _MIPIC_TEARING_CTRL)
 #define  TEARING_EFFECT_DELAY_SHIFT			0
 #define  TEARING_EFFECT_DELAY_MASK			(0xffff << 0)
 
@@ -7730,8 +7774,7 @@
 
 #define _MIPIA_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb000)
 #define _MIPIC_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb800)
-#define MIPI_DEVICE_READY(port)		_MIPI_PORT(port, _MIPIA_DEVICE_READY, \
-						_MIPIC_DEVICE_READY)
+#define MIPI_DEVICE_READY(port)		_MMIO_MIPI(port, _MIPIA_DEVICE_READY, _MIPIC_DEVICE_READY)
 #define  BUS_POSSESSION					(1 << 3) /* set to give bus to receiver */
 #define  ULPS_STATE_MASK				(3 << 1)
 #define  ULPS_STATE_ENTER				(2 << 1)
@@ -7741,12 +7784,10 @@
 
 #define _MIPIA_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb004)
 #define _MIPIC_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb804)
-#define MIPI_INTR_STAT(port)		_MIPI_PORT(port, _MIPIA_INTR_STAT, \
-					_MIPIC_INTR_STAT)
+#define MIPI_INTR_STAT(port)		_MMIO_MIPI(port, _MIPIA_INTR_STAT, _MIPIC_INTR_STAT)
 #define _MIPIA_INTR_EN			(dev_priv->mipi_mmio_base + 0xb008)
 #define _MIPIC_INTR_EN			(dev_priv->mipi_mmio_base + 0xb808)
-#define MIPI_INTR_EN(port)		_MIPI_PORT(port, _MIPIA_INTR_EN, \
-					_MIPIC_INTR_EN)
+#define MIPI_INTR_EN(port)		_MMIO_MIPI(port, _MIPIA_INTR_EN, _MIPIC_INTR_EN)
 #define  TEARING_EFFECT					(1 << 31)
 #define  SPL_PKT_SENT_INTERRUPT				(1 << 30)
 #define  GEN_READ_DATA_AVAIL				(1 << 29)
@@ -7782,8 +7823,7 @@
 
 #define _MIPIA_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb00c)
 #define _MIPIC_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb80c)
-#define MIPI_DSI_FUNC_PRG(port)		_MIPI_PORT(port, _MIPIA_DSI_FUNC_PRG, \
-						_MIPIC_DSI_FUNC_PRG)
+#define MIPI_DSI_FUNC_PRG(port)		_MMIO_MIPI(port, _MIPIA_DSI_FUNC_PRG, _MIPIC_DSI_FUNC_PRG)
 #define  CMD_MODE_DATA_WIDTH_MASK			(7 << 13)
 #define  CMD_MODE_NOT_SUPPORTED				(0 << 13)
 #define  CMD_MODE_DATA_WIDTH_16_BIT			(1 << 13)
@@ -7806,32 +7846,27 @@
 
 #define _MIPIA_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb010)
 #define _MIPIC_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb810)
-#define MIPI_HS_TX_TIMEOUT(port)	_MIPI_PORT(port, _MIPIA_HS_TX_TIMEOUT, \
-					_MIPIC_HS_TX_TIMEOUT)
+#define MIPI_HS_TX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_HS_TX_TIMEOUT, _MIPIC_HS_TX_TIMEOUT)
 #define  HIGH_SPEED_TX_TIMEOUT_COUNTER_MASK		0xffffff
 
 #define _MIPIA_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb014)
 #define _MIPIC_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb814)
-#define MIPI_LP_RX_TIMEOUT(port)	_MIPI_PORT(port, _MIPIA_LP_RX_TIMEOUT, \
-					_MIPIC_LP_RX_TIMEOUT)
+#define MIPI_LP_RX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_LP_RX_TIMEOUT, _MIPIC_LP_RX_TIMEOUT)
 #define  LOW_POWER_RX_TIMEOUT_COUNTER_MASK		0xffffff
 
 #define _MIPIA_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb018)
 #define _MIPIC_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb818)
-#define MIPI_TURN_AROUND_TIMEOUT(port)	_MIPI_PORT(port, \
-			_MIPIA_TURN_AROUND_TIMEOUT, _MIPIC_TURN_AROUND_TIMEOUT)
+#define MIPI_TURN_AROUND_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_TURN_AROUND_TIMEOUT, _MIPIC_TURN_AROUND_TIMEOUT)
 #define  TURN_AROUND_TIMEOUT_MASK			0x3f
 
 #define _MIPIA_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb01c)
 #define _MIPIC_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb81c)
-#define MIPI_DEVICE_RESET_TIMER(port)	_MIPI_PORT(port, \
-			_MIPIA_DEVICE_RESET_TIMER, _MIPIC_DEVICE_RESET_TIMER)
+#define MIPI_DEVICE_RESET_TIMER(port)	_MMIO_MIPI(port, _MIPIA_DEVICE_RESET_TIMER, _MIPIC_DEVICE_RESET_TIMER)
 #define  DEVICE_RESET_TIMER_MASK			0xffff
 
 #define _MIPIA_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb020)
 #define _MIPIC_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb820)
-#define MIPI_DPI_RESOLUTION(port)	_MIPI_PORT(port, _MIPIA_DPI_RESOLUTION, \
-					_MIPIC_DPI_RESOLUTION)
+#define MIPI_DPI_RESOLUTION(port)	_MMIO_MIPI(port, _MIPIA_DPI_RESOLUTION, _MIPIC_DPI_RESOLUTION)
 #define  VERTICAL_ADDRESS_SHIFT				16
 #define  VERTICAL_ADDRESS_MASK				(0xffff << 16)
 #define  HORIZONTAL_ADDRESS_SHIFT			0
@@ -7839,8 +7874,7 @@
 
 #define _MIPIA_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb024)
 #define _MIPIC_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb824)
-#define MIPI_DBI_FIFO_THROTTLE(port)	_MIPI_PORT(port, \
-			_MIPIA_DBI_FIFO_THROTTLE, _MIPIC_DBI_FIFO_THROTTLE)
+#define MIPI_DBI_FIFO_THROTTLE(port)	_MMIO_MIPI(port, _MIPIA_DBI_FIFO_THROTTLE, _MIPIC_DBI_FIFO_THROTTLE)
 #define  DBI_FIFO_EMPTY_HALF				(0 << 0)
 #define  DBI_FIFO_EMPTY_QUARTER				(1 << 0)
 #define  DBI_FIFO_EMPTY_7_LOCATIONS			(2 << 0)
@@ -7848,50 +7882,41 @@
 /* regs below are bits 15:0 */
 #define _MIPIA_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb028)
 #define _MIPIC_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb828)
-#define MIPI_HSYNC_PADDING_COUNT(port)	_MIPI_PORT(port, \
-			_MIPIA_HSYNC_PADDING_COUNT, _MIPIC_HSYNC_PADDING_COUNT)
+#define MIPI_HSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HSYNC_PADDING_COUNT, _MIPIC_HSYNC_PADDING_COUNT)
 
 #define _MIPIA_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb02c)
 #define _MIPIC_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb82c)
-#define MIPI_HBP_COUNT(port)		_MIPI_PORT(port, _MIPIA_HBP_COUNT, \
-					_MIPIC_HBP_COUNT)
+#define MIPI_HBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HBP_COUNT, _MIPIC_HBP_COUNT)
 
 #define _MIPIA_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb030)
 #define _MIPIC_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb830)
-#define MIPI_HFP_COUNT(port)		_MIPI_PORT(port, _MIPIA_HFP_COUNT, \
-					_MIPIC_HFP_COUNT)
+#define MIPI_HFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HFP_COUNT, _MIPIC_HFP_COUNT)
 
 #define _MIPIA_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb034)
 #define _MIPIC_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb834)
-#define MIPI_HACTIVE_AREA_COUNT(port)	_MIPI_PORT(port, \
-			_MIPIA_HACTIVE_AREA_COUNT, _MIPIC_HACTIVE_AREA_COUNT)
+#define MIPI_HACTIVE_AREA_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HACTIVE_AREA_COUNT, _MIPIC_HACTIVE_AREA_COUNT)
 
 #define _MIPIA_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb038)
 #define _MIPIC_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb838)
-#define MIPI_VSYNC_PADDING_COUNT(port)	_MIPI_PORT(port, \
-			_MIPIA_VSYNC_PADDING_COUNT, _MIPIC_VSYNC_PADDING_COUNT)
+#define MIPI_VSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_VSYNC_PADDING_COUNT, _MIPIC_VSYNC_PADDING_COUNT)
 
 #define _MIPIA_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb03c)
 #define _MIPIC_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb83c)
-#define MIPI_VBP_COUNT(port)		_MIPI_PORT(port, _MIPIA_VBP_COUNT, \
-					_MIPIC_VBP_COUNT)
+#define MIPI_VBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VBP_COUNT, _MIPIC_VBP_COUNT)
 
 #define _MIPIA_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb040)
 #define _MIPIC_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb840)
-#define MIPI_VFP_COUNT(port)		_MIPI_PORT(port, _MIPIA_VFP_COUNT, \
-					_MIPIC_VFP_COUNT)
+#define MIPI_VFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VFP_COUNT, _MIPIC_VFP_COUNT)
 
 #define _MIPIA_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb044)
 #define _MIPIC_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb844)
-#define MIPI_HIGH_LOW_SWITCH_COUNT(port)	_MIPI_PORT(port,	\
-		_MIPIA_HIGH_LOW_SWITCH_COUNT, _MIPIC_HIGH_LOW_SWITCH_COUNT)
+#define MIPI_HIGH_LOW_SWITCH_COUNT(port)	_MMIO_MIPI(port,	_MIPIA_HIGH_LOW_SWITCH_COUNT, _MIPIC_HIGH_LOW_SWITCH_COUNT)
 
 /* regs above are bits 15:0 */
 
 #define _MIPIA_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb048)
 #define _MIPIC_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb848)
-#define MIPI_DPI_CONTROL(port)		_MIPI_PORT(port, _MIPIA_DPI_CONTROL, \
-					_MIPIC_DPI_CONTROL)
+#define MIPI_DPI_CONTROL(port)		_MMIO_MIPI(port, _MIPIA_DPI_CONTROL, _MIPIC_DPI_CONTROL)
 #define  DPI_LP_MODE					(1 << 6)
 #define  BACKLIGHT_OFF					(1 << 5)
 #define  BACKLIGHT_ON					(1 << 4)
@@ -7902,29 +7927,26 @@
 
 #define _MIPIA_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb04c)
 #define _MIPIC_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb84c)
-#define MIPI_DPI_DATA(port)		_MIPI_PORT(port, _MIPIA_DPI_DATA, \
-					_MIPIC_DPI_DATA)
+#define MIPI_DPI_DATA(port)		_MMIO_MIPI(port, _MIPIA_DPI_DATA, _MIPIC_DPI_DATA)
 #define  COMMAND_BYTE_SHIFT				0
 #define  COMMAND_BYTE_MASK				(0x3f << 0)
 
 #define _MIPIA_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb050)
 #define _MIPIC_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb850)
-#define MIPI_INIT_COUNT(port)		_MIPI_PORT(port, _MIPIA_INIT_COUNT, \
-					_MIPIC_INIT_COUNT)
+#define MIPI_INIT_COUNT(port)		_MMIO_MIPI(port, _MIPIA_INIT_COUNT, _MIPIC_INIT_COUNT)
 #define  MASTER_INIT_TIMER_SHIFT			0
 #define  MASTER_INIT_TIMER_MASK				(0xffff << 0)
 
 #define _MIPIA_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb054)
 #define _MIPIC_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb854)
-#define MIPI_MAX_RETURN_PKT_SIZE(port)	_MIPI_PORT(port, \
+#define MIPI_MAX_RETURN_PKT_SIZE(port)	_MMIO_MIPI(port, \
 			_MIPIA_MAX_RETURN_PKT_SIZE, _MIPIC_MAX_RETURN_PKT_SIZE)
 #define  MAX_RETURN_PKT_SIZE_SHIFT			0
 #define  MAX_RETURN_PKT_SIZE_MASK			(0x3ff << 0)
 
 #define _MIPIA_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb058)
 #define _MIPIC_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb858)
-#define MIPI_VIDEO_MODE_FORMAT(port)	_MIPI_PORT(port, \
-			_MIPIA_VIDEO_MODE_FORMAT, _MIPIC_VIDEO_MODE_FORMAT)
+#define MIPI_VIDEO_MODE_FORMAT(port)	_MMIO_MIPI(port, _MIPIA_VIDEO_MODE_FORMAT, _MIPIC_VIDEO_MODE_FORMAT)
 #define  RANDOM_DPI_DISPLAY_RESOLUTION			(1 << 4)
 #define  DISABLE_VIDEO_BTA				(1 << 3)
 #define  IP_TG_CONFIG					(1 << 2)
@@ -7934,8 +7956,7 @@
 
 #define _MIPIA_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb05c)
 #define _MIPIC_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb85c)
-#define MIPI_EOT_DISABLE(port)		_MIPI_PORT(port, _MIPIA_EOT_DISABLE, \
-					_MIPIC_EOT_DISABLE)
+#define MIPI_EOT_DISABLE(port)		_MMIO_MIPI(port, _MIPIA_EOT_DISABLE, _MIPIC_EOT_DISABLE)
 #define  LP_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 7)
 #define  HS_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 6)
 #define  LOW_CONTENTION_RECOVERY_DISABLE		(1 << 5)
@@ -7947,31 +7968,26 @@
 
 #define _MIPIA_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb060)
 #define _MIPIC_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb860)
-#define MIPI_LP_BYTECLK(port)		_MIPI_PORT(port, _MIPIA_LP_BYTECLK, \
-					_MIPIC_LP_BYTECLK)
+#define MIPI_LP_BYTECLK(port)		_MMIO_MIPI(port, _MIPIA_LP_BYTECLK, _MIPIC_LP_BYTECLK)
 #define  LP_BYTECLK_SHIFT				0
 #define  LP_BYTECLK_MASK				(0xffff << 0)
 
 /* bits 31:0 */
 #define _MIPIA_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb064)
 #define _MIPIC_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb864)
-#define MIPI_LP_GEN_DATA(port)		_MIPI_PORT(port, _MIPIA_LP_GEN_DATA, \
-					_MIPIC_LP_GEN_DATA)
+#define MIPI_LP_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_DATA, _MIPIC_LP_GEN_DATA)
 
 /* bits 31:0 */
 #define _MIPIA_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb068)
 #define _MIPIC_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb868)
-#define MIPI_HS_GEN_DATA(port)		_MIPI_PORT(port, _MIPIA_HS_GEN_DATA, \
-					_MIPIC_HS_GEN_DATA)
+#define MIPI_HS_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_DATA, _MIPIC_HS_GEN_DATA)
 
 #define _MIPIA_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb06c)
 #define _MIPIC_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb86c)
-#define MIPI_LP_GEN_CTRL(port)		_MIPI_PORT(port, _MIPIA_LP_GEN_CTRL, \
-					_MIPIC_LP_GEN_CTRL)
+#define MIPI_LP_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_CTRL, _MIPIC_LP_GEN_CTRL)
 #define _MIPIA_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb070)
 #define _MIPIC_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb870)
-#define MIPI_HS_GEN_CTRL(port)		_MIPI_PORT(port, _MIPIA_HS_GEN_CTRL, \
-					_MIPIC_HS_GEN_CTRL)
+#define MIPI_HS_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_CTRL, _MIPIC_HS_GEN_CTRL)
 #define  LONG_PACKET_WORD_COUNT_SHIFT			8
 #define  LONG_PACKET_WORD_COUNT_MASK			(0xffff << 8)
 #define  SHORT_PACKET_PARAM_SHIFT			8
@@ -7984,8 +8000,7 @@
 
 #define _MIPIA_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb074)
 #define _MIPIC_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb874)
-#define MIPI_GEN_FIFO_STAT(port)	_MIPI_PORT(port, _MIPIA_GEN_FIFO_STAT, \
-					_MIPIC_GEN_FIFO_STAT)
+#define MIPI_GEN_FIFO_STAT(port)	_MMIO_MIPI(port, _MIPIA_GEN_FIFO_STAT, _MIPIC_GEN_FIFO_STAT)
 #define  DPI_FIFO_EMPTY					(1 << 28)
 #define  DBI_FIFO_EMPTY					(1 << 27)
 #define  LP_CTRL_FIFO_EMPTY				(1 << 26)
@@ -8003,16 +8018,14 @@
 
 #define _MIPIA_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb078)
 #define _MIPIC_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb878)
-#define MIPI_HS_LP_DBI_ENABLE(port)	_MIPI_PORT(port, \
-			_MIPIA_HS_LS_DBI_ENABLE, _MIPIC_HS_LS_DBI_ENABLE)
+#define MIPI_HS_LP_DBI_ENABLE(port)	_MMIO_MIPI(port, _MIPIA_HS_LS_DBI_ENABLE, _MIPIC_HS_LS_DBI_ENABLE)
 #define  DBI_HS_LP_MODE_MASK				(1 << 0)
 #define  DBI_LP_MODE					(1 << 0)
 #define  DBI_HS_MODE					(0 << 0)
 
 #define _MIPIA_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb080)
 #define _MIPIC_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb880)
-#define MIPI_DPHY_PARAM(port)		_MIPI_PORT(port, _MIPIA_DPHY_PARAM, \
-					_MIPIC_DPHY_PARAM)
+#define MIPI_DPHY_PARAM(port)		_MMIO_MIPI(port, _MIPIA_DPHY_PARAM, _MIPIC_DPHY_PARAM)
 #define  EXIT_ZERO_COUNT_SHIFT				24
 #define  EXIT_ZERO_COUNT_MASK				(0x3f << 24)
 #define  TRAIL_COUNT_SHIFT				16
@@ -8025,15 +8038,11 @@
 /* bits 31:0 */
 #define _MIPIA_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb084)
 #define _MIPIC_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb884)
-#define MIPI_DBI_BW_CTRL(port)		_MIPI_PORT(port, _MIPIA_DBI_BW_CTRL, \
-					_MIPIC_DBI_BW_CTRL)
+#define MIPI_DBI_BW_CTRL(port)		_MMIO_MIPI(port, _MIPIA_DBI_BW_CTRL, _MIPIC_DBI_BW_CTRL)
 
-#define _MIPIA_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base \
-							+ 0xb088)
-#define _MIPIC_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base \
-							+ 0xb888)
-#define MIPI_CLK_LANE_SWITCH_TIME_CNT(port)	_MIPI_PORT(port, \
-	_MIPIA_CLK_LANE_SWITCH_TIME_CNT, _MIPIC_CLK_LANE_SWITCH_TIME_CNT)
+#define _MIPIA_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb088)
+#define _MIPIC_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb888)
+#define MIPI_CLK_LANE_SWITCH_TIME_CNT(port)	_MMIO_MIPI(port, _MIPIA_CLK_LANE_SWITCH_TIME_CNT, _MIPIC_CLK_LANE_SWITCH_TIME_CNT)
 #define  LP_HS_SSW_CNT_SHIFT				16
 #define  LP_HS_SSW_CNT_MASK				(0xffff << 16)
 #define  HS_LP_PWR_SW_CNT_SHIFT				0
@@ -8041,19 +8050,16 @@
 
 #define _MIPIA_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb08c)
 #define _MIPIC_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb88c)
-#define MIPI_STOP_STATE_STALL(port)	_MIPI_PORT(port, \
-			_MIPIA_STOP_STATE_STALL, _MIPIC_STOP_STATE_STALL)
+#define MIPI_STOP_STATE_STALL(port)	_MMIO_MIPI(port, _MIPIA_STOP_STATE_STALL, _MIPIC_STOP_STATE_STALL)
 #define  STOP_STATE_STALL_COUNTER_SHIFT			0
 #define  STOP_STATE_STALL_COUNTER_MASK			(0xff << 0)
 
 #define _MIPIA_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb090)
 #define _MIPIC_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb890)
-#define MIPI_INTR_STAT_REG_1(port)	_MIPI_PORT(port, \
-				_MIPIA_INTR_STAT_REG_1, _MIPIC_INTR_STAT_REG_1)
+#define MIPI_INTR_STAT_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_STAT_REG_1, _MIPIC_INTR_STAT_REG_1)
 #define _MIPIA_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb094)
 #define _MIPIC_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb894)
-#define MIPI_INTR_EN_REG_1(port)	_MIPI_PORT(port, _MIPIA_INTR_EN_REG_1, \
-					_MIPIC_INTR_EN_REG_1)
+#define MIPI_INTR_EN_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_EN_REG_1, _MIPIC_INTR_EN_REG_1)
 #define  RX_CONTENTION_DETECTED				(1 << 0)
 
 /* XXX: only pipe A ?!? */
@@ -8073,8 +8079,7 @@
 
 #define _MIPIA_CTRL			(dev_priv->mipi_mmio_base + 0xb104)
 #define _MIPIC_CTRL			(dev_priv->mipi_mmio_base + 0xb904)
-#define MIPI_CTRL(port)			_MIPI_PORT(port, _MIPIA_CTRL, \
-					_MIPIC_CTRL)
+#define MIPI_CTRL(port)			_MMIO_MIPI(port, _MIPIA_CTRL, _MIPIC_CTRL)
 #define  ESCAPE_CLOCK_DIVIDER_SHIFT			5 /* A only */
 #define  ESCAPE_CLOCK_DIVIDER_MASK			(3 << 5)
 #define  ESCAPE_CLOCK_DIVIDER_1				(0 << 5)
@@ -8093,23 +8098,20 @@
 
 #define _MIPIA_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb108)
 #define _MIPIC_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb908)
-#define MIPI_DATA_ADDRESS(port)		_MIPI_PORT(port, _MIPIA_DATA_ADDRESS, \
-					_MIPIC_DATA_ADDRESS)
+#define MIPI_DATA_ADDRESS(port)		_MMIO_MIPI(port, _MIPIA_DATA_ADDRESS, _MIPIC_DATA_ADDRESS)
 #define  DATA_MEM_ADDRESS_SHIFT				5
 #define  DATA_MEM_ADDRESS_MASK				(0x7ffffff << 5)
 #define  DATA_VALID					(1 << 0)
 
 #define _MIPIA_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb10c)
 #define _MIPIC_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb90c)
-#define MIPI_DATA_LENGTH(port)		_MIPI_PORT(port, _MIPIA_DATA_LENGTH, \
-					_MIPIC_DATA_LENGTH)
+#define MIPI_DATA_LENGTH(port)		_MMIO_MIPI(port, _MIPIA_DATA_LENGTH, _MIPIC_DATA_LENGTH)
 #define  DATA_LENGTH_SHIFT				0
 #define  DATA_LENGTH_MASK				(0xfffff << 0)
 
 #define _MIPIA_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb110)
 #define _MIPIC_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb910)
-#define MIPI_COMMAND_ADDRESS(port)	_MIPI_PORT(port, \
-				_MIPIA_COMMAND_ADDRESS, _MIPIC_COMMAND_ADDRESS)
+#define MIPI_COMMAND_ADDRESS(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_ADDRESS, _MIPIC_COMMAND_ADDRESS)
 #define  COMMAND_MEM_ADDRESS_SHIFT			5
 #define  COMMAND_MEM_ADDRESS_MASK			(0x7ffffff << 5)
 #define  AUTO_PWG_ENABLE				(1 << 2)
@@ -8118,21 +8120,17 @@
 
 #define _MIPIA_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb114)
 #define _MIPIC_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb914)
-#define MIPI_COMMAND_LENGTH(port)	_MIPI_PORT(port, _MIPIA_COMMAND_LENGTH, \
-					_MIPIC_COMMAND_LENGTH)
+#define MIPI_COMMAND_LENGTH(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_LENGTH, _MIPIC_COMMAND_LENGTH)
 #define  COMMAND_LENGTH_SHIFT(n)			(8 * (n)) /* n: 0...3 */
 #define  COMMAND_LENGTH_MASK(n)				(0xff << (8 * (n)))
 
 #define _MIPIA_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb118)
 #define _MIPIC_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb918)
-#define MIPI_READ_DATA_RETURN(port, n) \
-	(_MIPI_PORT(port, _MIPIA_READ_DATA_RETURN0, _MIPIC_READ_DATA_RETURN0) \
-					+ 4 * (n)) /* n: 0...7 */
+#define MIPI_READ_DATA_RETURN(port, n) _MMIO(_MIPI(port, _MIPIA_READ_DATA_RETURN0, _MIPIC_READ_DATA_RETURN0) + 4 * (n)) /* n: 0...7 */
 
 #define _MIPIA_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb138)
 #define _MIPIC_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb938)
-#define MIPI_READ_DATA_VALID(port)	_MIPI_PORT(port, \
-				_MIPIA_READ_DATA_VALID, _MIPIC_READ_DATA_VALID)
+#define MIPI_READ_DATA_VALID(port)	_MMIO_MIPI(port, _MIPIA_READ_DATA_VALID, _MIPIC_READ_DATA_VALID)
 #define  READ_DATA_VALID(n)				(1 << (n))
 
 /* For UMS only (deprecated): */
@@ -8140,12 +8138,12 @@
 #define _PALETTE_B (dev_priv->info.display_mmio_offset + 0xa800)
 
 /* MOCS (Memory Object Control State) registers */
-#define GEN9_LNCFCMOCS0		0xb020	/* L3 Cache Control base */
+#define GEN9_LNCFCMOCS(i)	_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
 
-#define GEN9_GFX_MOCS_0		0xc800	/* Graphics MOCS base register*/
-#define GEN9_MFX0_MOCS_0	0xc900	/* Media 0 MOCS base register*/
-#define GEN9_MFX1_MOCS_0	0xca00	/* Media 1 MOCS base register*/
-#define GEN9_VEBOX_MOCS_0	0xcb00	/* Video MOCS base register*/
-#define GEN9_BLT_MOCS_0		0xcc00	/* Blitter MOCS base register*/
+#define GEN9_GFX_MOCS(i)	_MMIO(0xc800 + (i) * 4)	/* Graphics MOCS registers */
+#define GEN9_MFX0_MOCS(i)	_MMIO(0xc900 + (i) * 4)	/* Media 0 MOCS registers */
+#define GEN9_MFX1_MOCS(i)	_MMIO(0xca00 + (i) * 4)	/* Media 1 MOCS registers */
+#define GEN9_VEBOX_MOCS(i)	_MMIO(0xcb00 + (i) * 4)	/* Video MOCS registers */
+#define GEN9_BLT_MOCS(i)	_MMIO(0xcc00 + (i) * 4)	/* Blitter MOCS registers */
 
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 50ce9ce..f929c61 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -35,7 +35,8 @@
 #define dev_to_drm_minor(d) dev_get_drvdata((d))
 
 #ifdef CONFIG_PM
-static u32 calc_residency(struct drm_device *dev, const u32 reg)
+static u32 calc_residency(struct drm_device *dev,
+			  i915_reg_t reg)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u64 raw_time; /* 32b value may overflow during fixed point math */
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 04fe849..52b2d40 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -664,7 +664,7 @@
 );
 
 TRACE_EVENT_CONDITION(i915_reg_rw,
-	TP_PROTO(bool write, u32 reg, u64 val, int len, bool trace),
+	TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace),
 
 	TP_ARGS(write, reg, val, len, trace),
 
@@ -679,7 +679,7 @@
 
 	TP_fast_assign(
 		__entry->val = (u64)val;
-		__entry->reg = reg;
+		__entry->reg = i915_mmio_reg_offset(reg);
 		__entry->write = write;
 		__entry->len = len;
 		),
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 5eee75b..dea7429 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -69,13 +69,13 @@
 	if (!IS_HASWELL(dev))
 		return;
 
-	magic = readq(dev_priv->regs + vgtif_reg(magic));
+	magic = __raw_i915_read64(dev_priv, vgtif_reg(magic));
 	if (magic != VGT_MAGIC)
 		return;
 
 	version = INTEL_VGT_IF_VERSION_ENCODE(
-		readw(dev_priv->regs + vgtif_reg(version_major)),
-		readw(dev_priv->regs + vgtif_reg(version_minor)));
+		__raw_i915_read16(dev_priv, vgtif_reg(version_major)),
+		__raw_i915_read16(dev_priv, vgtif_reg(version_minor)));
 	if (version != INTEL_VGT_IF_VERSION) {
 		DRM_INFO("VGT interface version mismatch!\n");
 		return;
diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h
index 21c97f4..3c83b47 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.h
+++ b/drivers/gpu/drm/i915/i915_vgpu.h
@@ -92,14 +92,10 @@
 	uint32_t g2v_notify;
 	uint32_t rsv6[7];
 
-	uint32_t pdp0_lo;
-	uint32_t pdp0_hi;
-	uint32_t pdp1_lo;
-	uint32_t pdp1_hi;
-	uint32_t pdp2_lo;
-	uint32_t pdp2_hi;
-	uint32_t pdp3_lo;
-	uint32_t pdp3_hi;
+	struct {
+		uint32_t lo;
+		uint32_t hi;
+	} pdp[4];
 
 	uint32_t execlist_context_descriptor_lo;
 	uint32_t execlist_context_descriptor_hi;
@@ -108,7 +104,7 @@
 } __packed;
 
 #define vgtif_reg(x) \
-	(VGT_PVINFO_PAGE + (long)&((struct vgt_if *)NULL)->x)
+	_MMIO((VGT_PVINFO_PAGE + (long)&((struct vgt_if *)NULL)->x))
 
 /* vGPU display status to be used by the host side */
 #define VGT_DRV_DISPLAY_NOT_READY 0
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index f1975f2..643f342 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -94,6 +94,7 @@
 	__drm_atomic_helper_crtc_duplicate_state(crtc, &crtc_state->base);
 
 	crtc_state->update_pipe = false;
+	crtc_state->disable_lp_wm = false;
 
 	return &crtc_state->base;
 }
@@ -205,8 +206,6 @@
 				 * but since this plane is unchanged just do the
 				 * minimum required validation.
 				 */
-				if (plane->type == DRM_PLANE_TYPE_PRIMARY)
-					intel_crtc->atomic.wait_for_flips = true;
 				crtc_state->base.planes_changed = true;
 			}
 
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index a119806..c6bb0fc 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -84,6 +84,7 @@
 	state = &intel_state->base;
 
 	__drm_atomic_helper_plane_duplicate_state(plane, state);
+	intel_state->wait_req = NULL;
 
 	return state;
 }
@@ -100,6 +101,7 @@
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
+	WARN_ON(state && to_intel_plane_state(state)->wait_req);
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 4dccd9b..9aa83e7 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -161,9 +161,9 @@
 }
 
 static bool intel_eld_uptodate(struct drm_connector *connector,
-			       int reg_eldv, uint32_t bits_eldv,
-			       int reg_elda, uint32_t bits_elda,
-			       int reg_edid)
+			       i915_reg_t reg_eldv, uint32_t bits_eldv,
+			       i915_reg_t reg_elda, uint32_t bits_elda,
+			       i915_reg_t reg_edid)
 {
 	struct drm_i915_private *dev_priv = connector->dev->dev_private;
 	uint8_t *eld = connector->eld;
@@ -364,8 +364,7 @@
 	enum port port = intel_dig_port->port;
 	enum pipe pipe = intel_crtc->pipe;
 	uint32_t tmp, eldv;
-	int aud_config;
-	int aud_cntrl_st2;
+	i915_reg_t aud_config, aud_cntrl_st2;
 
 	DRM_DEBUG_KMS("Disable audio codec on port %c, pipe %c\n",
 		      port_name(port), pipe_name(pipe));
@@ -416,10 +415,7 @@
 	uint32_t eldv;
 	uint32_t tmp;
 	int len, i;
-	int hdmiw_hdmiedid;
-	int aud_config;
-	int aud_cntl_st;
-	int aud_cntrl_st2;
+	i915_reg_t hdmiw_hdmiedid, aud_config, aud_cntl_st, aud_cntrl_st2;
 
 	DRM_DEBUG_KMS("Enable audio codec on port %c, pipe %c, %u bytes ELD\n",
 		      port_name(port), pipe_name(pipe), drm_eld_size(eld));
@@ -591,7 +587,7 @@
 	struct drm_i915_private *dev_priv = dev_to_i915(dev);
 	u32 tmp;
 
-	if (!IS_SKYLAKE(dev_priv))
+	if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
 		return;
 
 	/*
@@ -642,10 +638,11 @@
 	u32 tmp;
 	int n;
 
-	/* HSW, BDW SKL need this fix */
+	/* HSW, BDW, SKL, KBL need this fix */
 	if (!IS_SKYLAKE(dev_priv) &&
-		!IS_BROADWELL(dev_priv) &&
-		!IS_HASWELL(dev_priv))
+	    !IS_KABYLAKE(dev_priv) &&
+	    !IS_BROADWELL(dev_priv) &&
+	    !IS_HASWELL(dev_priv))
 		return 0;
 
 	mutex_lock(&dev_priv->av_mutex);
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 6a2c76e..27b3e61 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -50,7 +50,7 @@
 	 * encoder's enable/disable callbacks */
 	struct intel_connector *connector;
 	bool force_hotplug_required;
-	u32 adpa_reg;
+	i915_reg_t adpa_reg;
 };
 
 static struct intel_crt *intel_encoder_to_crt(struct intel_encoder *encoder)
@@ -480,12 +480,8 @@
 	uint32_t vsample;
 	uint32_t vblank, vblank_start, vblank_end;
 	uint32_t dsl;
-	uint32_t bclrpat_reg;
-	uint32_t vtotal_reg;
-	uint32_t vblank_reg;
-	uint32_t vsync_reg;
-	uint32_t pipeconf_reg;
-	uint32_t pipe_dsl_reg;
+	i915_reg_t bclrpat_reg, vtotal_reg,
+		vblank_reg, vsync_reg, pipeconf_reg, pipe_dsl_reg;
 	uint8_t	st00;
 	enum drm_connector_status status;
 
@@ -518,7 +514,7 @@
 		/* Wait for next Vblank to substitue
 		 * border color for Color info */
 		intel_wait_for_vblank(dev, pipe);
-		st00 = I915_READ8(VGA_MSR_WRITE);
+		st00 = I915_READ8(_VGA_MSR_WRITE);
 		status = ((st00 & (1 << 4)) != 0) ?
 			connector_status_connected :
 			connector_status_disconnected;
@@ -563,7 +559,7 @@
 		do {
 			count++;
 			/* Read the ST00 VGA status register */
-			st00 = I915_READ8(VGA_MSR_WRITE);
+			st00 = I915_READ8(_VGA_MSR_WRITE);
 			if (st00 & (1 << 4))
 				detect++;
 		} while ((I915_READ(pipe_dsl_reg) == dsl));
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9e530a7..6c6a669 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -47,21 +47,10 @@
 MODULE_FIRMWARE(I915_CSR_SKL);
 MODULE_FIRMWARE(I915_CSR_BXT);
 
-/*
-* SKL CSR registers for DC5 and DC6
-*/
-#define CSR_PROGRAM(i)			(0x80000 + (i) * 4)
-#define CSR_SSP_BASE_ADDR_GEN9		0x00002FC0
-#define CSR_HTP_ADDR_SKL		0x00500034
-#define CSR_SSP_BASE			0x8F074
-#define CSR_HTP_SKL			0x8F004
-#define CSR_LAST_WRITE			0x8F034
-#define CSR_LAST_WRITE_VALUE		0xc003b400
-/* MMIO address range for CSR program (0x80000 - 0x82FFF) */
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
+
 #define CSR_MAX_FW_SIZE			0x2FFF
 #define CSR_DEFAULT_FW_OFFSET		0xFFFFFFFF
-#define CSR_MMIO_START_RANGE	0x80000
-#define CSR_MMIO_END_RANGE		0x8FFFF
 
 struct intel_css_header {
 	/* 0x09 for DMC */
@@ -178,166 +167,134 @@
 };
 
 static const struct stepping_info skl_stepping_info[] = {
-		{'A', '0'}, {'B', '0'}, {'C', '0'},
-		{'D', '0'}, {'E', '0'}, {'F', '0'},
-		{'G', '0'}, {'H', '0'}, {'I', '0'}
+	{'A', '0'}, {'B', '0'}, {'C', '0'},
+	{'D', '0'}, {'E', '0'}, {'F', '0'},
+	{'G', '0'}, {'H', '0'}, {'I', '0'}
 };
 
-static struct stepping_info bxt_stepping_info[] = {
+static const struct stepping_info bxt_stepping_info[] = {
 	{'A', '0'}, {'A', '1'}, {'A', '2'},
 	{'B', '0'}, {'B', '1'}, {'B', '2'}
 };
 
-static char intel_get_stepping(struct drm_device *dev)
+static const struct stepping_info *intel_get_stepping_info(struct drm_device *dev)
 {
-	if (IS_SKYLAKE(dev) && (dev->pdev->revision <
-			ARRAY_SIZE(skl_stepping_info)))
-		return skl_stepping_info[dev->pdev->revision].stepping;
-	else if (IS_BROXTON(dev) && (dev->pdev->revision <
-				ARRAY_SIZE(bxt_stepping_info)))
-		return bxt_stepping_info[dev->pdev->revision].stepping;
-	else
-		return -ENODATA;
-}
+	const struct stepping_info *si;
+	unsigned int size;
 
-static char intel_get_substepping(struct drm_device *dev)
-{
-	if (IS_SKYLAKE(dev) && (dev->pdev->revision <
-			ARRAY_SIZE(skl_stepping_info)))
-		return skl_stepping_info[dev->pdev->revision].substepping;
-	else if (IS_BROXTON(dev) && (dev->pdev->revision <
-			ARRAY_SIZE(bxt_stepping_info)))
-		return bxt_stepping_info[dev->pdev->revision].substepping;
-	else
-		return -ENODATA;
-}
+	if (IS_SKYLAKE(dev)) {
+		size = ARRAY_SIZE(skl_stepping_info);
+		si = skl_stepping_info;
+	} else if (IS_BROXTON(dev)) {
+		size = ARRAY_SIZE(bxt_stepping_info);
+		si = bxt_stepping_info;
+	} else {
+		return NULL;
+	}
 
-/**
- * intel_csr_load_status_get() - to get firmware loading status.
- * @dev_priv: i915 device.
- *
- * This function helps to get the firmware loading status.
- *
- * Return: Firmware loading status.
- */
-enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
-{
-	enum csr_state state;
+	if (INTEL_REVID(dev) < size)
+		return si + INTEL_REVID(dev);
 
-	mutex_lock(&dev_priv->csr_lock);
-	state = dev_priv->csr.state;
-	mutex_unlock(&dev_priv->csr_lock);
-
-	return state;
-}
-
-/**
- * intel_csr_load_status_set() - help to set firmware loading status.
- * @dev_priv: i915 device.
- * @state: enumeration of firmware loading status.
- *
- * Set the firmware loading status.
- */
-void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
-			enum csr_state state)
-{
-	mutex_lock(&dev_priv->csr_lock);
-	dev_priv->csr.state = state;
-	mutex_unlock(&dev_priv->csr_lock);
+	return NULL;
 }
 
 /**
  * intel_csr_load_program() - write the firmware from memory to register.
- * @dev: drm device.
+ * @dev_priv: i915 drm device.
  *
  * CSR firmware is read from a .bin file and kept in internal memory one time.
  * Everytime display comes back from low power state this function is called to
  * copy the firmware from internal memory to registers.
  */
-void intel_csr_load_program(struct drm_device *dev)
+void intel_csr_load_program(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 *payload = dev_priv->csr.dmc_payload;
 	uint32_t i, fw_size;
 
-	if (!IS_GEN9(dev)) {
+	if (!IS_GEN9(dev_priv)) {
 		DRM_ERROR("No CSR support available for this platform\n");
 		return;
 	}
 
-	/*
-	 * FIXME: Firmware gets lost on S3/S4, but not when entering system
-	 * standby or suspend-to-idle (which is just like forced runtime pm).
-	 * Unfortunately the ACPI subsystem doesn't yet give us a way to
-	 * differentiate this, hence figure it out with this hack.
-	 */
-	if (I915_READ(CSR_PROGRAM(0)))
+	if (!dev_priv->csr.dmc_payload) {
+		DRM_ERROR("Tried to program CSR with empty payload\n");
 		return;
+	}
 
-	mutex_lock(&dev_priv->csr_lock);
 	fw_size = dev_priv->csr.dmc_fw_size;
 	for (i = 0; i < fw_size; i++)
 		I915_WRITE(CSR_PROGRAM(i), payload[i]);
 
 	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
 		I915_WRITE(dev_priv->csr.mmioaddr[i],
-			dev_priv->csr.mmiodata[i]);
+			   dev_priv->csr.mmiodata[i]);
 	}
-
-	dev_priv->csr.state = FW_LOADED;
-	mutex_unlock(&dev_priv->csr_lock);
 }
 
-static void finish_csr_load(const struct firmware *fw, void *context)
+static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
+			      const struct firmware *fw)
 {
-	struct drm_i915_private *dev_priv = context;
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_css_header *css_header;
 	struct intel_package_header *package_header;
 	struct intel_dmc_header *dmc_header;
 	struct intel_csr *csr = &dev_priv->csr;
-	char stepping = intel_get_stepping(dev);
-	char substepping = intel_get_substepping(dev);
+	const struct stepping_info *stepping_info = intel_get_stepping_info(dev);
+	char stepping, substepping;
 	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
 	uint32_t i;
 	uint32_t *dmc_payload;
-	bool fw_loaded = false;
 
-	if (!fw) {
-		i915_firmware_load_error_print(csr->fw_path, 0);
-		goto out;
-	}
+	if (!fw)
+		return NULL;
 
-	if ((stepping == -ENODATA) || (substepping == -ENODATA)) {
+	if (!stepping_info) {
 		DRM_ERROR("Unknown stepping info, firmware loading failed\n");
-		goto out;
+		return NULL;
 	}
 
+	stepping = stepping_info->stepping;
+	substepping = stepping_info->substepping;
+
 	/* Extract CSS Header information*/
 	css_header = (struct intel_css_header *)fw->data;
 	if (sizeof(struct intel_css_header) !=
-		(css_header->header_len * 4)) {
+	    (css_header->header_len * 4)) {
 		DRM_ERROR("Firmware has wrong CSS header length %u bytes\n",
-			(css_header->header_len * 4));
-		goto out;
+			  (css_header->header_len * 4));
+		return NULL;
 	}
+
+	csr->version = css_header->version;
+
+	if (IS_SKYLAKE(dev) && csr->version < SKL_CSR_VERSION_REQUIRED) {
+		DRM_INFO("Refusing to load old Skylake DMC firmware v%u.%u,"
+			 " please upgrade to v%u.%u or later"
+			 " [https://01.org/linuxgraphics/intel-linux-graphics-firmwares].\n",
+			 CSR_VERSION_MAJOR(csr->version),
+			 CSR_VERSION_MINOR(csr->version),
+			 CSR_VERSION_MAJOR(SKL_CSR_VERSION_REQUIRED),
+			 CSR_VERSION_MINOR(SKL_CSR_VERSION_REQUIRED));
+		return NULL;
+	}
+
 	readcount += sizeof(struct intel_css_header);
 
 	/* Extract Package Header information*/
 	package_header = (struct intel_package_header *)
-					&fw->data[readcount];
+		&fw->data[readcount];
 	if (sizeof(struct intel_package_header) !=
-		(package_header->header_len * 4)) {
+	    (package_header->header_len * 4)) {
 		DRM_ERROR("Firmware has wrong package header length %u bytes\n",
-			(package_header->header_len * 4));
-		goto out;
+			  (package_header->header_len * 4));
+		return NULL;
 	}
 	readcount += sizeof(struct intel_package_header);
 
 	/* Search for dmc_offset to find firware binary. */
 	for (i = 0; i < package_header->num_entries; i++) {
 		if (package_header->fw_info[i].substepping == '*' &&
-			stepping == package_header->fw_info[i].stepping) {
+		    stepping == package_header->fw_info[i].stepping) {
 			dmc_offset = package_header->fw_info[i].offset;
 			break;
 		} else if (stepping == package_header->fw_info[i].stepping &&
@@ -345,12 +302,12 @@
 			dmc_offset = package_header->fw_info[i].offset;
 			break;
 		} else if (package_header->fw_info[i].stepping == '*' &&
-			package_header->fw_info[i].substepping == '*')
+			   package_header->fw_info[i].substepping == '*')
 			dmc_offset = package_header->fw_info[i].offset;
 	}
 	if (dmc_offset == CSR_DEFAULT_FW_OFFSET) {
 		DRM_ERROR("Firmware not supported for %c stepping\n", stepping);
-		goto out;
+		return NULL;
 	}
 	readcount += dmc_offset;
 
@@ -358,26 +315,26 @@
 	dmc_header = (struct intel_dmc_header *)&fw->data[readcount];
 	if (sizeof(struct intel_dmc_header) != (dmc_header->header_len)) {
 		DRM_ERROR("Firmware has wrong dmc header length %u bytes\n",
-				(dmc_header->header_len));
-		goto out;
+			  (dmc_header->header_len));
+		return NULL;
 	}
 	readcount += sizeof(struct intel_dmc_header);
 
 	/* Cache the dmc header info. */
 	if (dmc_header->mmio_count > ARRAY_SIZE(csr->mmioaddr)) {
 		DRM_ERROR("Firmware has wrong mmio count %u\n",
-						dmc_header->mmio_count);
-		goto out;
+			  dmc_header->mmio_count);
+		return NULL;
 	}
 	csr->mmio_count = dmc_header->mmio_count;
 	for (i = 0; i < dmc_header->mmio_count; i++) {
 		if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE ||
-			dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) {
+		    dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) {
 			DRM_ERROR(" Firmware has wrong mmio address 0x%x\n",
-						dmc_header->mmioaddr[i]);
-			goto out;
+				  dmc_header->mmioaddr[i]);
+			return NULL;
 		}
-		csr->mmioaddr[i] = dmc_header->mmioaddr[i];
+		csr->mmioaddr[i] = _MMIO(dmc_header->mmioaddr[i]);
 		csr->mmiodata[i] = dmc_header->mmiodata[i];
 	}
 
@@ -385,56 +342,80 @@
 	nbytes = dmc_header->fw_size * 4;
 	if (nbytes > CSR_MAX_FW_SIZE) {
 		DRM_ERROR("CSR firmware too big (%u) bytes\n", nbytes);
-		goto out;
+		return NULL;
 	}
 	csr->dmc_fw_size = dmc_header->fw_size;
 
-	csr->dmc_payload = kmalloc(nbytes, GFP_KERNEL);
-	if (!csr->dmc_payload) {
+	dmc_payload = kmalloc(nbytes, GFP_KERNEL);
+	if (!dmc_payload) {
 		DRM_ERROR("Memory allocation failed for dmc payload\n");
-		goto out;
+		return NULL;
 	}
 
-	dmc_payload = csr->dmc_payload;
 	memcpy(dmc_payload, &fw->data[readcount], nbytes);
 
-	/* load csr program during system boot, as needed for DC states */
-	intel_csr_load_program(dev);
-	fw_loaded = true;
+	return dmc_payload;
+}
 
-	DRM_DEBUG_KMS("Finished loading %s\n", dev_priv->csr.fw_path);
+static void csr_load_work_fn(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv;
+	struct intel_csr *csr;
+	const struct firmware *fw;
+	int ret;
+
+	dev_priv = container_of(work, typeof(*dev_priv), csr.work);
+	csr = &dev_priv->csr;
+
+	ret = request_firmware(&fw, dev_priv->csr.fw_path,
+			       &dev_priv->dev->pdev->dev);
+	if (!fw)
+		goto out;
+
+	dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw);
+	if (!dev_priv->csr.dmc_payload)
+		goto out;
+
+	/* load csr program during system boot, as needed for DC states */
+	intel_csr_load_program(dev_priv);
+
 out:
-	if (fw_loaded)
-		intel_runtime_pm_put(dev_priv);
-	else
-		intel_csr_load_status_set(dev_priv, FW_FAILED);
+	if (dev_priv->csr.dmc_payload) {
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+
+		DRM_INFO("Finished loading %s (v%u.%u)\n",
+			 dev_priv->csr.fw_path,
+			 CSR_VERSION_MAJOR(csr->version),
+			 CSR_VERSION_MINOR(csr->version));
+	} else {
+		DRM_ERROR("Failed to load DMC firmware, disabling rpm\n");
+	}
 
 	release_firmware(fw);
 }
 
 /**
  * intel_csr_ucode_init() - initialize the firmware loading.
- * @dev: drm device.
+ * @dev_priv: i915 drm device.
  *
  * This function is called at the time of loading the display driver to read
  * firmware from a .bin file and copied into a internal memory.
  */
-void intel_csr_ucode_init(struct drm_device *dev)
+void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_csr *csr = &dev_priv->csr;
-	int ret;
 
-	if (!HAS_CSR(dev))
+	INIT_WORK(&dev_priv->csr.work, csr_load_work_fn);
+
+	if (!HAS_CSR(dev_priv))
 		return;
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev_priv))
 		csr->fw_path = I915_CSR_SKL;
 	else if (IS_BROXTON(dev_priv))
 		csr->fw_path = I915_CSR_BXT;
 	else {
 		DRM_ERROR("Unexpected: no known CSR firmware for platform\n");
-		intel_csr_load_status_set(dev_priv, FW_FAILED);
 		return;
 	}
 
@@ -444,43 +425,24 @@
 	 * Obtain a runtime pm reference, until CSR is loaded,
 	 * to avoid entering runtime-suspend.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
-	/* CSR supported for platform, load firmware */
-	ret = request_firmware_nowait(THIS_MODULE, true, csr->fw_path,
-				&dev_priv->dev->pdev->dev,
-				GFP_KERNEL, dev_priv,
-				finish_csr_load);
-	if (ret) {
-		i915_firmware_load_error_print(csr->fw_path, ret);
-		intel_csr_load_status_set(dev_priv, FW_FAILED);
-	}
+	schedule_work(&dev_priv->csr.work);
 }
 
 /**
  * intel_csr_ucode_fini() - unload the CSR firmware.
- * @dev: drm device.
+ * @dev_priv: i915 drm device.
  *
  * Firmmware unloading includes freeing the internal momory and reset the
  * firmware loading status.
  */
-void intel_csr_ucode_fini(struct drm_device *dev)
+void intel_csr_ucode_fini(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (!HAS_CSR(dev))
+	if (!HAS_CSR(dev_priv))
 		return;
 
-	intel_csr_load_status_set(dev_priv, FW_FAILED);
-	kfree(dev_priv->csr.dmc_payload);
-}
+	flush_work(&dev_priv->csr.work);
 
-void assert_csr_loaded(struct drm_i915_private *dev_priv)
-{
-	WARN_ONCE(intel_csr_load_status_get(dev_priv) != FW_LOADED,
-		  "CSR is not loaded.\n");
-	WARN_ONCE(!I915_READ(CSR_PROGRAM(0)),
-		  "CSR program storage start is NULL\n");
-	WARN_ONCE(!I915_READ(CSR_SSP_BASE), "CSR SSP Base Not fine\n");
-	WARN_ONCE(!I915_READ(CSR_HTP_SKL), "CSR HTP Not fine\n");
+	kfree(dev_priv->csr.dmc_payload);
 }
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index a6752a6..76ce7c2 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -133,12 +133,12 @@
 	{ 0x00002016, 0x000000A0, 0x0 },
 	{ 0x00005012, 0x0000009B, 0x0 },
 	{ 0x00007011, 0x00000088, 0x0 },
-	{ 0x00009010, 0x000000C7, 0x0 },
+	{ 0x80009010, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x00002016, 0x0000009B, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
-	{ 0x00007011, 0x000000C7, 0x0 },
+	{ 0x80007011, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x00002016, 0x000000DF, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 };
 
 /* Skylake U */
@@ -146,12 +146,12 @@
 	{ 0x0000201B, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
 	{ 0x00007011, 0x00000087, 0x0 },
-	{ 0x80009010, 0x000000C7, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80009010, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x0000201B, 0x0000009D, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
-	{ 0x00007011, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80007011, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x00002016, 0x00000088, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 };
 
 /* Skylake Y */
@@ -159,12 +159,12 @@
 	{ 0x00000018, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
 	{ 0x00007011, 0x00000087, 0x0 },
-	{ 0x80009010, 0x000000C7, 0x3 },	/* Uses I_boost level 0x3 */
+	{ 0x80009010, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
 	{ 0x00000018, 0x0000009D, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
-	{ 0x00007011, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
+	{ 0x80007011, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
 	{ 0x00000018, 0x00000088, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
 };
 
 /*
@@ -345,7 +345,7 @@
 static bool
 intel_dig_port_supports_hdmi(const struct intel_digital_port *intel_dig_port)
 {
-	return intel_dig_port->hdmi.hdmi_reg;
+	return i915_mmio_reg_valid(intel_dig_port->hdmi.hdmi_reg);
 }
 
 static const struct ddi_buf_trans *skl_get_buf_trans_dp(struct drm_device *dev,
@@ -448,7 +448,7 @@
 		bxt_ddi_vswing_sequence(dev, hdmi_level, port,
 					INTEL_OUTPUT_HDMI);
 		return;
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		ddi_translations_fdi = NULL;
 		ddi_translations_dp =
 				skl_get_buf_trans_dp(dev, &n_dp_entries);
@@ -576,7 +576,7 @@
 static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv,
 				    enum port port)
 {
-	uint32_t reg = DDI_BUF_CTL(port);
+	i915_reg_t reg = DDI_BUF_CTL(port);
 	int i;
 
 	for (i = 0; i < 16; i++) {
@@ -931,7 +931,8 @@
 	/* Otherwise a < c && b >= d, do nothing */
 }
 
-static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv, int reg)
+static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv,
+				   i915_reg_t reg)
 {
 	int refclk = LC_FREQ;
 	int n, p, r;
@@ -967,7 +968,7 @@
 static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv,
 			       uint32_t dpll)
 {
-	uint32_t cfgcr1_reg, cfgcr2_reg;
+	i915_reg_t cfgcr1_reg, cfgcr2_reg;
 	uint32_t cfgcr1_val, cfgcr2_val;
 	uint32_t p0, p1, p2, dco_freq;
 
@@ -1112,10 +1113,10 @@
 		link_clock = 270000;
 		break;
 	case PORT_CLK_SEL_WRPLL1:
-		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL1);
+		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL(0));
 		break;
 	case PORT_CLK_SEL_WRPLL2:
-		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL2);
+		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL(1));
 		break;
 	case PORT_CLK_SEL_SPLL:
 		pll = I915_READ(SPLL_CTL) & SPLL_PLL_FREQ_MASK;
@@ -1184,7 +1185,7 @@
 
 	if (INTEL_INFO(dev)->gen <= 8)
 		hsw_ddi_clock_get(encoder, pipe_config);
-	else if (IS_SKYLAKE(dev))
+	else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skl_ddi_clock_get(encoder, pipe_config);
 	else if (IS_BROXTON(dev))
 		bxt_ddi_clock_get(encoder, pipe_config);
@@ -1780,7 +1781,7 @@
 	struct intel_encoder *intel_encoder =
 		intel_ddi_get_crtc_new_encoder(crtc_state);
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		return skl_ddi_pll_select(intel_crtc, crtc_state,
 					  intel_encoder);
 	else if (IS_BROXTON(dev))
@@ -1942,7 +1943,7 @@
 void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
 				       enum transcoder cpu_transcoder)
 {
-	uint32_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
+	i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
 	uint32_t val = I915_READ(reg);
 
 	val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK | TRANS_DDI_DP_VC_PAYLOAD_ALLOC);
@@ -2097,21 +2098,21 @@
 			iboost = dp_iboost;
 		} else {
 			ddi_translations = skl_get_buf_trans_dp(dev, &n_entries);
-			iboost = ddi_translations[port].i_boost;
+			iboost = ddi_translations[level].i_boost;
 		}
 	} else if (type == INTEL_OUTPUT_EDP) {
 		if (dp_iboost) {
 			iboost = dp_iboost;
 		} else {
 			ddi_translations = skl_get_buf_trans_edp(dev, &n_entries);
-			iboost = ddi_translations[port].i_boost;
+			iboost = ddi_translations[level].i_boost;
 		}
 	} else if (type == INTEL_OUTPUT_HDMI) {
 		if (hdmi_iboost) {
 			iboost = hdmi_iboost;
 		} else {
 			ddi_translations = skl_get_buf_trans_hdmi(dev, &n_entries);
-			iboost = ddi_translations[port].i_boost;
+			iboost = ddi_translations[level].i_boost;
 		}
 	} else {
 		return;
@@ -2263,7 +2264,7 @@
 
 	level = translate_signal_level(signal_levels);
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skl_ddi_set_iboost(dev, level, port, encoder->type);
 	else if (IS_BROXTON(dev))
 		bxt_ddi_vswing_sequence(dev, level, port, encoder->type);
@@ -2271,6 +2272,50 @@
 	return DDI_BUF_TRANS_SELECT(level);
 }
 
+void intel_ddi_clk_select(struct intel_encoder *encoder,
+			  const struct intel_crtc_state *pipe_config)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	enum port port = intel_ddi_get_encoder_port(encoder);
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		uint32_t dpll = pipe_config->ddi_pll_sel;
+		uint32_t val;
+
+		/*
+		 * DPLL0 is used for eDP and is the only "private" DPLL (as
+		 * opposed to shared) on SKL
+		 */
+		if (encoder->type == INTEL_OUTPUT_EDP) {
+			WARN_ON(dpll != SKL_DPLL0);
+
+			val = I915_READ(DPLL_CTRL1);
+
+			val &= ~(DPLL_CTRL1_HDMI_MODE(dpll) |
+				 DPLL_CTRL1_SSC(dpll) |
+				 DPLL_CTRL1_LINK_RATE_MASK(dpll));
+			val |= pipe_config->dpll_hw_state.ctrl1 << (dpll * 6);
+
+			I915_WRITE(DPLL_CTRL1, val);
+			POSTING_READ(DPLL_CTRL1);
+		}
+
+		/* DDI -> PLL mapping  */
+		val = I915_READ(DPLL_CTRL2);
+
+		val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) |
+			DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
+		val |= (DPLL_CTRL2_DDI_CLK_SEL(dpll, port) |
+			DPLL_CTRL2_DDI_SEL_OVERRIDE(port));
+
+		I915_WRITE(DPLL_CTRL2, val);
+
+	} else if (INTEL_INFO(dev_priv)->gen < 9) {
+		WARN_ON(pipe_config->ddi_pll_sel == PORT_CLK_SEL_NONE);
+		I915_WRITE(PORT_CLK_SEL(port), pipe_config->ddi_pll_sel);
+	}
+}
+
 static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
@@ -2286,42 +2331,7 @@
 		intel_edp_panel_on(intel_dp);
 	}
 
-	if (IS_SKYLAKE(dev)) {
-		uint32_t dpll = crtc->config->ddi_pll_sel;
-		uint32_t val;
-
-		/*
-		 * DPLL0 is used for eDP and is the only "private" DPLL (as
-		 * opposed to shared) on SKL
-		 */
-		if (type == INTEL_OUTPUT_EDP) {
-			WARN_ON(dpll != SKL_DPLL0);
-
-			val = I915_READ(DPLL_CTRL1);
-
-			val &= ~(DPLL_CTRL1_HDMI_MODE(dpll) |
-				 DPLL_CTRL1_SSC(dpll) |
-				 DPLL_CTRL1_LINK_RATE_MASK(dpll));
-			val |= crtc->config->dpll_hw_state.ctrl1 << (dpll * 6);
-
-			I915_WRITE(DPLL_CTRL1, val);
-			POSTING_READ(DPLL_CTRL1);
-		}
-
-		/* DDI -> PLL mapping  */
-		val = I915_READ(DPLL_CTRL2);
-
-		val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) |
-			DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
-		val |= (DPLL_CTRL2_DDI_CLK_SEL(dpll, port) |
-			DPLL_CTRL2_DDI_SEL_OVERRIDE(port));
-
-		I915_WRITE(DPLL_CTRL2, val);
-
-	} else if (INTEL_INFO(dev)->gen < 9) {
-		WARN_ON(crtc->config->ddi_pll_sel == PORT_CLK_SEL_NONE);
-		I915_WRITE(PORT_CLK_SEL(port), crtc->config->ddi_pll_sel);
-	}
+	intel_ddi_clk_select(intel_encoder, crtc->config);
 
 	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
@@ -2381,7 +2391,7 @@
 		intel_edp_panel_off(intel_dp);
 	}
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) |
 					DPLL_CTRL2_DDI_CLK_OFF(port)));
 	else if (INTEL_INFO(dev)->gen < 9)
@@ -2553,7 +2563,7 @@
 };
 
 struct skl_dpll_regs {
-	u32 ctl, cfgcr1, cfgcr2;
+	i915_reg_t ctl, cfgcr1, cfgcr2;
 };
 
 /* this array is indexed by the *shared* pll id */
@@ -2566,13 +2576,13 @@
 	},
 	{
 		/* DPLL 2 */
-		.ctl = WRPLL_CTL1,
+		.ctl = WRPLL_CTL(0),
 		.cfgcr1 = DPLL_CFGCR1(SKL_DPLL2),
 		.cfgcr2 = DPLL_CFGCR2(SKL_DPLL2),
 	},
 	{
 		/* DPLL 3 */
-		.ctl = WRPLL_CTL2,
+		.ctl = WRPLL_CTL(1),
 		.cfgcr1 = DPLL_CFGCR1(SKL_DPLL3),
 		.cfgcr2 = DPLL_CFGCR2(SKL_DPLL3),
 	},
@@ -2992,22 +3002,22 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t val = I915_READ(LCPLL_CTL);
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skl_shared_dplls_init(dev_priv);
 	else if (IS_BROXTON(dev))
 		bxt_shared_dplls_init(dev_priv);
 	else
 		hsw_shared_dplls_init(dev_priv);
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		int cdclk_freq;
 
 		cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
 		dev_priv->skl_boot_cdclk = cdclk_freq;
+		if (skl_sanitize_cdclk(dev_priv))
+			DRM_DEBUG_KMS("Sanitized cdclk programmed by pre-os\n");
 		if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE))
 			DRM_ERROR("LCPLL1 is disabled\n");
-		else
-			intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
 	} else if (IS_BROXTON(dev)) {
 		broxton_init_cdclk(dev);
 		broxton_ddi_phy_init(dev);
@@ -3026,11 +3036,11 @@
 	}
 }
 
-void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder)
+void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	struct intel_dp *intel_dp = &intel_dig_port->dp;
-	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_i915_private *dev_priv =
+		to_i915(intel_dig_port->base.base.dev);
 	enum port port = intel_dig_port->port;
 	uint32_t val;
 	bool wait = false;
@@ -3289,6 +3299,20 @@
 					  (DDI_BUF_PORT_REVERSAL |
 					   DDI_A_4_LANES);
 
+	/*
+	 * Bspec says that DDI_A_4_LANES is the only supported configuration
+	 * for Broxton.  Yet some BIOS fail to set this bit on port A if eDP
+	 * wasn't lit up at boot.  Force this bit on in our internal
+	 * configuration so that we use the proper lane count for our
+	 * calculations.
+	 */
+	if (IS_BROXTON(dev) && port == PORT_A) {
+		if (!(intel_dig_port->saved_port_bits & DDI_A_4_LANES)) {
+			DRM_DEBUG_KMS("BXT BIOS forgot to set DDI_A_4_LANES for port A; fixing\n");
+			intel_dig_port->saved_port_bits |= DDI_A_4_LANES;
+		}
+	}
+
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
 	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 	intel_encoder->cloneable = 0;
@@ -3302,8 +3326,7 @@
 		 * On BXT A0/A1, sw needs to activate DDIA HPD logic and
 		 * interrupts to check the external panel connection.
 		 */
-		if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0)
-					 && port == PORT_B)
+		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1) && port == PORT_B)
 			dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port;
 		else
 			dev_priv->hotplug.irq_port[port] = intel_dig_port;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index bea7f3ae..9228ec0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1095,7 +1095,7 @@
 static bool pipe_dsl_stopped(struct drm_device *dev, enum pipe pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg = PIPEDSL(pipe);
+	i915_reg_t reg = PIPEDSL(pipe);
 	u32 line1, line2;
 	u32 line_mask;
 
@@ -1135,7 +1135,7 @@
 	enum pipe pipe = crtc->pipe;
 
 	if (INTEL_INFO(dev)->gen >= 4) {
-		int reg = PIPECONF(cpu_transcoder);
+		i915_reg_t reg = PIPECONF(cpu_transcoder);
 
 		/* Wait for the Pipe State to go off */
 		if (wait_for((I915_READ(reg) & I965_PIPECONF_ACTIVE) == 0,
@@ -1285,7 +1285,7 @@
 			   enum pipe pipe)
 {
 	struct drm_device *dev = dev_priv->dev;
-	int pp_reg;
+	i915_reg_t pp_reg;
 	u32 val;
 	enum pipe panel_pipe = PIPE_A;
 	bool locked = true;
@@ -1480,8 +1480,7 @@
 		return false;
 
 	if (HAS_PCH_CPT(dev_priv->dev)) {
-		u32	trans_dp_ctl_reg = TRANS_DP_CTL(pipe);
-		u32	trans_dp_ctl = I915_READ(trans_dp_ctl_reg);
+		u32 trans_dp_ctl = I915_READ(TRANS_DP_CTL(pipe));
 		if ((trans_dp_ctl & TRANS_DP_PORT_SEL_MASK) != port_sel)
 			return false;
 	} else if (IS_CHERRYVIEW(dev_priv->dev)) {
@@ -1545,12 +1544,13 @@
 }
 
 static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv,
-				   enum pipe pipe, int reg, u32 port_sel)
+				   enum pipe pipe, i915_reg_t reg,
+				   u32 port_sel)
 {
 	u32 val = I915_READ(reg);
 	I915_STATE_WARN(dp_pipe_enabled(dev_priv, pipe, port_sel, val),
 	     "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
-	     reg, pipe_name(pipe));
+	     i915_mmio_reg_offset(reg), pipe_name(pipe));
 
 	I915_STATE_WARN(HAS_PCH_IBX(dev_priv->dev) && (val & DP_PORT_EN) == 0
 	     && (val & DP_PIPEB_SELECT),
@@ -1558,12 +1558,12 @@
 }
 
 static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv,
-				     enum pipe pipe, int reg)
+				     enum pipe pipe, i915_reg_t reg)
 {
 	u32 val = I915_READ(reg);
 	I915_STATE_WARN(hdmi_pipe_enabled(dev_priv, pipe, val),
 	     "PCH HDMI (0x%08x) enabled on transcoder %c, should be disabled\n",
-	     reg, pipe_name(pipe));
+	     i915_mmio_reg_offset(reg), pipe_name(pipe));
 
 	I915_STATE_WARN(HAS_PCH_IBX(dev_priv->dev) && (val & SDVO_ENABLE) == 0
 	     && (val & SDVO_PIPE_B_SELECT),
@@ -1599,7 +1599,7 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int reg = DPLL(crtc->pipe);
+	i915_reg_t reg = DPLL(crtc->pipe);
 	u32 dpll = pipe_config->dpll_hw_state.dpll;
 
 	assert_pipe_disabled(dev_priv, crtc->pipe);
@@ -1688,7 +1688,7 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int reg = DPLL(crtc->pipe);
+	i915_reg_t reg = DPLL(crtc->pipe);
 	u32 dpll = crtc->config->dpll_hw_state.dpll;
 
 	assert_pipe_disabled(dev_priv, crtc->pipe);
@@ -1837,7 +1837,7 @@
 			 unsigned int expected_mask)
 {
 	u32 port_mask;
-	int dpll_reg;
+	i915_reg_t dpll_reg;
 
 	switch (dport->port) {
 	case PORT_B:
@@ -1962,7 +1962,8 @@
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	uint32_t reg, val, pipeconf_val;
+	i915_reg_t reg;
+	uint32_t val, pipeconf_val;
 
 	/* PCH only available on ILK+ */
 	BUG_ON(!HAS_PCH_SPLIT(dev));
@@ -2051,7 +2052,8 @@
 					    enum pipe pipe)
 {
 	struct drm_device *dev = dev_priv->dev;
-	uint32_t reg, val;
+	i915_reg_t reg;
+	uint32_t val;
 
 	/* FDI relies on the transcoder */
 	assert_fdi_tx_disabled(dev_priv, pipe);
@@ -2068,7 +2070,7 @@
 	if (wait_for((I915_READ(reg) & TRANS_STATE_ENABLE) == 0, 50))
 		DRM_ERROR("failed to disable transcoder %c\n", pipe_name(pipe));
 
-	if (!HAS_PCH_IBX(dev)) {
+	if (HAS_PCH_CPT(dev)) {
 		/* Workaround: Clear the timing override chicken bit again. */
 		reg = TRANS_CHICKEN2(pipe);
 		val = I915_READ(reg);
@@ -2106,10 +2108,9 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe = crtc->pipe;
-	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
-								      pipe);
+	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
 	enum pipe pch_transcoder;
-	int reg;
+	i915_reg_t reg;
 	u32 val;
 
 	DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe));
@@ -2170,7 +2171,7 @@
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
-	int reg;
+	i915_reg_t reg;
 	u32 val;
 
 	DRM_DEBUG_KMS("disabling pipe %c\n", pipe_name(pipe));
@@ -2269,20 +2270,20 @@
 					       fb_format_modifier, 0));
 }
 
-static int
+static void
 intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, struct drm_framebuffer *fb,
 			const struct drm_plane_state *plane_state)
 {
-	struct intel_rotation_info *info = &view->rotation_info;
+	struct intel_rotation_info *info = &view->params.rotation_info;
 	unsigned int tile_height, tile_pitch;
 
 	*view = i915_ggtt_view_normal;
 
 	if (!plane_state)
-		return 0;
+		return;
 
 	if (!intel_rotation_90_or_270(plane_state->rotation))
-		return 0;
+		return;
 
 	*view = i915_ggtt_view_rotated;
 
@@ -2309,8 +2310,6 @@
 		info->size_uv = info->width_pages_uv * info->height_pages_uv *
 				PAGE_SIZE;
 	}
-
-	return 0;
 }
 
 static unsigned int intel_linear_alignment(struct drm_i915_private *dev_priv)
@@ -2329,9 +2328,7 @@
 int
 intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 			   struct drm_framebuffer *fb,
-			   const struct drm_plane_state *plane_state,
-			   struct intel_engine_cs *pipelined,
-			   struct drm_i915_gem_request **pipelined_request)
+			   const struct drm_plane_state *plane_state)
 {
 	struct drm_device *dev = fb->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2366,9 +2363,7 @@
 		return -EINVAL;
 	}
 
-	ret = intel_fill_fb_ggtt_view(&view, fb, plane_state);
-	if (ret)
-		return ret;
+	intel_fill_fb_ggtt_view(&view, fb, plane_state);
 
 	/* Note that the w/a also requires 64 PTE of padding following the
 	 * bo. We currently fill all unused PTE with the shadow page and so
@@ -2387,11 +2382,10 @@
 	 */
 	intel_runtime_pm_get(dev_priv);
 
-	dev_priv->mm.interruptible = false;
-	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined,
-						   pipelined_request, &view);
+	ret = i915_gem_object_pin_to_display_plane(obj, alignment,
+						   &view);
 	if (ret)
-		goto err_interruptible;
+		goto err_pm;
 
 	/* Install a fence for tiled scan-out. Pre-i965 always needs a
 	 * fence, whereas 965+ only requires a fence if using
@@ -2417,14 +2411,12 @@
 		i915_gem_object_pin_fence(obj);
 	}
 
-	dev_priv->mm.interruptible = true;
 	intel_runtime_pm_put(dev_priv);
 	return 0;
 
 err_unpin:
 	i915_gem_object_unpin_from_display_plane(obj, &view);
-err_interruptible:
-	dev_priv->mm.interruptible = true;
+err_pm:
 	intel_runtime_pm_put(dev_priv);
 	return ret;
 }
@@ -2434,12 +2426,10 @@
 {
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct i915_ggtt_view view;
-	int ret;
 
 	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
 
-	ret = intel_fill_fb_ggtt_view(&view, fb, plane_state);
-	WARN_ONCE(ret, "Couldn't get view from plane state!");
+	intel_fill_fb_ggtt_view(&view, fb, plane_state);
 
 	if (view.type == I915_GGTT_VIEW_NORMAL)
 		i915_gem_object_unpin_fence(obj);
@@ -2680,7 +2670,7 @@
 	int plane = intel_crtc->plane;
 	unsigned long linear_offset;
 	u32 dspcntr;
-	u32 reg = DSPCNTR(plane);
+	i915_reg_t reg = DSPCNTR(plane);
 	int pixel_size;
 
 	if (!visible || !fb) {
@@ -2810,7 +2800,7 @@
 	int plane = intel_crtc->plane;
 	unsigned long linear_offset;
 	u32 dspcntr;
-	u32 reg = DSPCNTR(plane);
+	i915_reg_t reg = DSPCNTR(plane);
 	int pixel_size;
 
 	if (!visible || !fb) {
@@ -2935,30 +2925,32 @@
 	}
 }
 
-unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane,
-				     struct drm_i915_gem_object *obj,
-				     unsigned int plane)
+u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
+			   struct drm_i915_gem_object *obj,
+			   unsigned int plane)
 {
-	const struct i915_ggtt_view *view = &i915_ggtt_view_normal;
+	struct i915_ggtt_view view;
 	struct i915_vma *vma;
-	unsigned char *offset;
+	u64 offset;
 
-	if (intel_rotation_90_or_270(intel_plane->base.state->rotation))
-		view = &i915_ggtt_view_rotated;
+	intel_fill_fb_ggtt_view(&view, intel_plane->base.fb,
+				intel_plane->base.state);
 
-	vma = i915_gem_obj_to_ggtt_view(obj, view);
+	vma = i915_gem_obj_to_ggtt_view(obj, &view);
 	if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
-		view->type))
+		view.type))
 		return -1;
 
-	offset = (unsigned char *)vma->node.start;
+	offset = vma->node.start;
 
 	if (plane == 1) {
-		offset += vma->ggtt_view.rotation_info.uv_start_page *
+		offset += vma->ggtt_view.params.rotation_info.uv_start_page *
 			  PAGE_SIZE;
 	}
 
-	return (unsigned long)offset;
+	WARN_ON(upper_32_bits(offset));
+
+	return lower_32_bits(offset);
 }
 
 static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
@@ -3084,7 +3076,7 @@
 	u32 tile_height, plane_offset, plane_size;
 	unsigned int rotation;
 	int x_offset, y_offset;
-	unsigned long surf_addr;
+	u32 surf_addr;
 	struct intel_crtc_state *crtc_state = intel_crtc->config;
 	struct intel_plane_state *plane_state;
 	int src_x = 0, src_y = 0, src_w = 0, src_h = 0;
@@ -3212,10 +3204,9 @@
 		struct intel_plane_state *plane_state;
 
 		drm_modeset_lock_crtc(crtc, &plane->base);
-
 		plane_state = to_intel_plane_state(plane->base.state);
 
-		if (plane_state->base.fb)
+		if (crtc->state->active && plane_state->base.fb)
 			plane->commit_plane(&plane->base, plane_state);
 
 		drm_modeset_unlock_crtc(crtc);
@@ -3291,32 +3282,6 @@
 	drm_modeset_unlock_all(dev);
 }
 
-static void
-intel_finish_fb(struct drm_framebuffer *old_fb)
-{
-	struct drm_i915_gem_object *obj = intel_fb_obj(old_fb);
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	bool was_interruptible = dev_priv->mm.interruptible;
-	int ret;
-
-	/* Big Hammer, we also need to ensure that any pending
-	 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
-	 * current scanout is retired before unpinning the old
-	 * framebuffer. Note that we rely on userspace rendering
-	 * into the buffer attached to the pipe they are waiting
-	 * on. If not, userspace generates a GPU hang with IPEHR
-	 * point to the MI_WAIT_FOR_EVENT.
-	 *
-	 * This should only fail upon a hung GPU, in which case we
-	 * can safely continue.
-	 */
-	dev_priv->mm.interruptible = false;
-	ret = i915_gem_object_wait_rendering(obj, true);
-	dev_priv->mm.interruptible = was_interruptible;
-
-	WARN_ON(ret);
-}
-
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -3386,7 +3351,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	i915_reg_t reg;
+	u32 temp;
 
 	/* enable normal train */
 	reg = FDI_TX_CTL(pipe);
@@ -3428,7 +3394,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, tries;
+	i915_reg_t reg;
+	u32 temp, tries;
 
 	/* FDI needs bits from pipe first */
 	assert_pipe_enabled(dev_priv, pipe);
@@ -3528,7 +3495,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i, retry;
+	i915_reg_t reg;
+	u32 temp, i, retry;
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -3660,7 +3628,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i, j;
+	i915_reg_t reg;
+	u32 temp, i, j;
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -3777,8 +3746,8 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
-
+	i915_reg_t reg;
+	u32 temp;
 
 	/* enable PCH FDI RX PLL, wait warmup plus DMI latency */
 	reg = FDI_RX_CTL(pipe);
@@ -3814,7 +3783,8 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	i915_reg_t reg;
+	u32 temp;
 
 	/* Switch from PCDclk to Rawclk */
 	reg = FDI_RX_CTL(pipe);
@@ -3844,7 +3814,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	i915_reg_t reg;
+	u32 temp;
 
 	/* disable CPU FDI tx and PCH FDI rx */
 	reg = FDI_TX_CTL(pipe);
@@ -3937,15 +3908,23 @@
 				 work->pending_flip_obj);
 }
 
-void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
+static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	long ret;
 
 	WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue));
-	if (WARN_ON(wait_event_timeout(dev_priv->pending_flip_queue,
-				       !intel_crtc_has_pending_flip(crtc),
-				       60*HZ) == 0)) {
+
+	ret = wait_event_interruptible_timeout(
+					dev_priv->pending_flip_queue,
+					!intel_crtc_has_pending_flip(crtc),
+					60*HZ);
+
+	if (ret < 0)
+		return ret;
+
+	if (ret == 0) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
 		spin_lock_irq(&dev->event_lock);
@@ -3956,11 +3935,7 @@
 		spin_unlock_irq(&dev->event_lock);
 	}
 
-	if (crtc->primary->fb) {
-		mutex_lock(&dev->struct_mutex);
-		intel_finish_fb(crtc->primary->fb);
-		mutex_unlock(&dev->struct_mutex);
-	}
+	return 0;
 }
 
 /* Program iCLKIP clock to the desired frequency */
@@ -4120,6 +4095,22 @@
 	}
 }
 
+/* Return which DP Port should be selected for Transcoder DP control */
+static enum port
+intel_trans_dp_port_sel(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct intel_encoder *encoder;
+
+	for_each_encoder_on_crtc(dev, crtc, encoder) {
+		if (encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
+		    encoder->type == INTEL_OUTPUT_EDP)
+			return enc_to_dig_port(&encoder->base)->port;
+	}
+
+	return -1;
+}
+
 /*
  * Enable PCH resources required for PCH ports:
  *   - PCH PLLs
@@ -4134,7 +4125,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	u32 temp;
 
 	assert_pch_transcoder_disabled(dev_priv, pipe);
 
@@ -4181,8 +4172,10 @@
 
 	/* For PCH DP, enable TRANS_DP_CTL */
 	if (HAS_PCH_CPT(dev) && intel_crtc->config->has_dp_encoder) {
+		const struct drm_display_mode *adjusted_mode =
+			&intel_crtc->config->base.adjusted_mode;
 		u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5;
-		reg = TRANS_DP_CTL(pipe);
+		i915_reg_t reg = TRANS_DP_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~(TRANS_DP_PORT_SEL_MASK |
 			  TRANS_DP_SYNC_MASK |
@@ -4190,19 +4183,19 @@
 		temp |= TRANS_DP_OUTPUT_ENABLE;
 		temp |= bpc << 9; /* same format but at 11:9 */
 
-		if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC)
+		if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 			temp |= TRANS_DP_HSYNC_ACTIVE_HIGH;
-		if (crtc->mode.flags & DRM_MODE_FLAG_PVSYNC)
+		if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 			temp |= TRANS_DP_VSYNC_ACTIVE_HIGH;
 
 		switch (intel_trans_dp_port_sel(crtc)) {
-		case PCH_DP_B:
+		case PORT_B:
 			temp |= TRANS_DP_PORT_SEL_B;
 			break;
-		case PCH_DP_C:
+		case PORT_C:
 			temp |= TRANS_DP_PORT_SEL_C;
 			break;
-		case PCH_DP_D:
+		case PORT_D:
 			temp |= TRANS_DP_PORT_SEL_D;
 			break;
 		default:
@@ -4342,7 +4335,7 @@
 static void cpt_verify_modeset(struct drm_device *dev, int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int dslreg = PIPEDSL(pipe);
+	i915_reg_t dslreg = PIPEDSL(pipe);
 	u32 temp;
 
 	temp = I915_READ(dslreg);
@@ -4652,7 +4645,7 @@
 	}
 
 	for (i = 0; i < 256; i++) {
-		u32 palreg;
+		i915_reg_t palreg;
 
 		if (HAS_GMCH_DISPLAY(dev))
 			palreg = PALETTE(pipe, i);
@@ -4731,9 +4724,9 @@
 	if (IS_GEN2(dev))
 		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 
-	/* Underruns don't raise interrupts, so check manually. */
-	if (HAS_GMCH_DISPLAY(dev))
-		i9xx_check_fifo_underruns(dev_priv);
+	/* Underruns don't always raise interrupts, so check manually. */
+	intel_check_cpu_fifo_underruns(dev_priv);
+	intel_check_pch_fifo_underruns(dev_priv);
 }
 
 /**
@@ -4792,7 +4785,6 @@
 	struct intel_crtc_atomic_commit *atomic = &crtc->atomic;
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_plane *plane;
 
 	if (atomic->wait_vblank)
 		intel_wait_for_vblank(dev, crtc->pipe);
@@ -4811,10 +4803,6 @@
 	if (atomic->post_enable_primary)
 		intel_post_enable_primary(&crtc->base);
 
-	drm_for_each_plane_mask(plane, dev, atomic->update_sprite_watermarks)
-		intel_update_sprite_watermarks(plane, &crtc->base,
-					       0, 0, 0, false, false);
-
 	memset(atomic, 0, sizeof(*atomic));
 }
 
@@ -4823,20 +4811,6 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc_atomic_commit *atomic = &crtc->atomic;
-	struct drm_plane *p;
-
-	/* Track fb's for any planes being disabled */
-	drm_for_each_plane_mask(p, dev, atomic->disabled_planes) {
-		struct intel_plane *plane = to_intel_plane(p);
-
-		mutex_lock(&dev->struct_mutex);
-		i915_gem_track_fb(intel_fb_obj(plane->base.fb), NULL,
-				  plane->frontbuffer_bit);
-		mutex_unlock(&dev->struct_mutex);
-	}
-
-	if (atomic->wait_for_flips)
-		intel_crtc_wait_for_pending_flips(&crtc->base);
 
 	if (atomic->disable_fbc)
 		intel_fbc_disable_crtc(crtc);
@@ -4885,6 +4859,9 @@
 		return;
 
 	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
+
+	if (intel_crtc->config->has_pch_encoder)
 		intel_prepare_shared_dpll(intel_crtc);
 
 	if (intel_crtc->config->has_dp_encoder)
@@ -4902,7 +4879,6 @@
 	intel_crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
-	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_enable)
@@ -4940,6 +4916,11 @@
 
 	if (HAS_PCH_CPT(dev))
 		cpt_verify_modeset(dev, intel_crtc->pipe);
+
+	/* Must wait for vblank to avoid spurious PCH FIFO underruns */
+	if (intel_crtc->config->has_pch_encoder)
+		intel_wait_for_vblank(dev, pipe);
+	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
 }
 
 /* IPS only exists on ULT machines and is tied to pipe A. */
@@ -4962,6 +4943,10 @@
 	if (WARN_ON(intel_crtc->active))
 		return;
 
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      false);
+
 	if (intel_crtc_to_shared_dpll(intel_crtc))
 		intel_enable_shared_dpll(intel_crtc);
 
@@ -4994,11 +4979,8 @@
 			encoder->pre_enable(encoder);
 	}
 
-	if (intel_crtc->config->has_pch_encoder) {
-		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
-						      true);
+	if (intel_crtc->config->has_pch_encoder)
 		dev_priv->display.fdi_link_train(crtc);
-	}
 
 	if (!is_dsi)
 		intel_ddi_enable_pipe_clock(intel_crtc);
@@ -5035,6 +5017,10 @@
 		intel_opregion_notify_encoder(encoder, true);
 	}
 
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      true);
+
 	/* If we change the relative order between pipe/planes enabling, we need
 	 * to change the workaround. */
 	hsw_workaround_pipe = pipe_config->hsw_workaround_pipe;
@@ -5066,7 +5052,9 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->disable(encoder);
@@ -5074,9 +5062,6 @@
 	drm_crtc_vblank_off(crtc);
 	assert_vblank_disabled(crtc);
 
-	if (intel_crtc->config->has_pch_encoder)
-		intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
-
 	intel_disable_pipe(intel_crtc);
 
 	ironlake_pfit_disable(intel_crtc, false);
@@ -5092,6 +5077,9 @@
 		ironlake_disable_pch_transcoder(dev_priv, pipe);
 
 		if (HAS_PCH_CPT(dev)) {
+			i915_reg_t reg;
+			u32 temp;
+
 			/* disable TRANS_DP_CTL */
 			reg = TRANS_DP_CTL(pipe);
 			temp = I915_READ(reg);
@@ -5108,6 +5096,8 @@
 
 		ironlake_fdi_pll_disable(intel_crtc);
 	}
+
+	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
 }
 
 static void haswell_crtc_disable(struct drm_crtc *crtc)
@@ -5119,6 +5109,10 @@
 	enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
 	bool is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI);
 
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      false);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder) {
 		intel_opregion_notify_encoder(encoder, false);
 		encoder->disable(encoder);
@@ -5127,9 +5121,6 @@
 	drm_crtc_vblank_off(crtc);
 	assert_vblank_disabled(crtc);
 
-	if (intel_crtc->config->has_pch_encoder)
-		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
-						      false);
 	intel_disable_pipe(intel_crtc);
 
 	if (intel_crtc->config->dp_encoder_is_mst)
@@ -5154,6 +5145,10 @@
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->post_disable)
 			encoder->post_disable(encoder);
+
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      true);
 }
 
 static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@ -5184,21 +5179,41 @@
 {
 	switch (port) {
 	case PORT_A:
-		return POWER_DOMAIN_PORT_DDI_A_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_A_LANES;
 	case PORT_B:
-		return POWER_DOMAIN_PORT_DDI_B_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_B_LANES;
 	case PORT_C:
-		return POWER_DOMAIN_PORT_DDI_C_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_C_LANES;
 	case PORT_D:
-		return POWER_DOMAIN_PORT_DDI_D_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_D_LANES;
 	case PORT_E:
-		return POWER_DOMAIN_PORT_DDI_E_2_LANES;
+		return POWER_DOMAIN_PORT_DDI_E_LANES;
 	default:
-		WARN_ON_ONCE(1);
+		MISSING_CASE(port);
 		return POWER_DOMAIN_PORT_OTHER;
 	}
 }
 
+static enum intel_display_power_domain port_to_aux_power_domain(enum port port)
+{
+	switch (port) {
+	case PORT_A:
+		return POWER_DOMAIN_AUX_A;
+	case PORT_B:
+		return POWER_DOMAIN_AUX_B;
+	case PORT_C:
+		return POWER_DOMAIN_AUX_C;
+	case PORT_D:
+		return POWER_DOMAIN_AUX_D;
+	case PORT_E:
+		/* FIXME: Check VBT for actual wiring of PORT E */
+		return POWER_DOMAIN_AUX_D;
+	default:
+		MISSING_CASE(port);
+		return POWER_DOMAIN_AUX_A;
+	}
+}
+
 #define for_each_power_domain(domain, mask)				\
 	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
 		if ((1 << (domain)) & (mask))
@@ -5230,6 +5245,36 @@
 	}
 }
 
+enum intel_display_power_domain
+intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder)
+{
+	struct drm_device *dev = intel_encoder->base.dev;
+	struct intel_digital_port *intel_dig_port;
+
+	switch (intel_encoder->type) {
+	case INTEL_OUTPUT_UNKNOWN:
+	case INTEL_OUTPUT_HDMI:
+		/*
+		 * Only DDI platforms should ever use these output types.
+		 * We can get here after the HDMI detect code has already set
+		 * the type of the shared encoder. Since we can't be sure
+		 * what's the status of the given connectors, play safe and
+		 * run the DP detection too.
+		 */
+		WARN_ON_ONCE(!HAS_DDI(dev));
+	case INTEL_OUTPUT_DISPLAYPORT:
+	case INTEL_OUTPUT_EDP:
+		intel_dig_port = enc_to_dig_port(&intel_encoder->base);
+		return port_to_aux_power_domain(intel_dig_port->port);
+	case INTEL_OUTPUT_DP_MST:
+		intel_dig_port = enc_to_mst(&intel_encoder->base)->primary;
+		return port_to_aux_power_domain(intel_dig_port->port);
+	default:
+		MISSING_CASE(intel_encoder->type);
+		return POWER_DOMAIN_AUX_A;
+	}
+}
+
 static unsigned long get_crtc_power_domains(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -5237,13 +5282,11 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum pipe pipe = intel_crtc->pipe;
 	unsigned long mask;
-	enum transcoder transcoder;
+	enum transcoder transcoder = intel_crtc->config->cpu_transcoder;
 
 	if (!crtc->state->active)
 		return 0;
 
-	transcoder = intel_pipe_to_cpu_transcoder(dev->dev_private, pipe);
-
 	mask = BIT(POWER_DOMAIN_PIPE(pipe));
 	mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder));
 	if (intel_crtc->config->pch_pfit.enabled ||
@@ -5330,7 +5373,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK;
 
 		if (limit == SKL_DFSM_CDCLK_LIMIT_675)
@@ -5747,32 +5790,16 @@
 	if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
 		DRM_ERROR("DBuf power disable timeout\n");
 
-	/*
-	 * DMC assumes ownership of LCPLL and will get confused if we touch it.
-	 */
-	if (dev_priv->csr.dmc_payload) {
-		/* disable DPLL0 */
-		I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) &
-					~LCPLL_PLL_ENABLE);
-		if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
-			DRM_ERROR("Couldn't disable DPLL0\n");
-	}
-
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	/* disable DPLL0 */
+	I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE);
+	if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
+		DRM_ERROR("Couldn't disable DPLL0\n");
 }
 
 void skl_init_cdclk(struct drm_i915_private *dev_priv)
 {
-	u32 val;
 	unsigned int required_vco;
 
-	/* enable PCH reset handshake */
-	val = I915_READ(HSW_NDE_RSTWRN_OPT);
-	I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE);
-
-	/* enable PG1 and Misc I/O */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
-
 	/* DPLL0 not enabled (happens on early BIOS versions) */
 	if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE)) {
 		/* enable DPLL0 */
@@ -5793,6 +5820,45 @@
 		DRM_ERROR("DBuf power enable timeout\n");
 }
 
+int skl_sanitize_cdclk(struct drm_i915_private *dev_priv)
+{
+	uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
+	uint32_t cdctl = I915_READ(CDCLK_CTL);
+	int freq = dev_priv->skl_boot_cdclk;
+
+	/*
+	 * check if the pre-os intialized the display
+	 * There is SWF18 scratchpad register defined which is set by the
+	 * pre-os which can be used by the OS drivers to check the status
+	 */
+	if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0)
+		goto sanitize;
+
+	/* Is PLL enabled and locked ? */
+	if (!((lcpll1 & LCPLL_PLL_ENABLE) && (lcpll1 & LCPLL_PLL_LOCK)))
+		goto sanitize;
+
+	/* DPLL okay; verify the cdclock
+	 *
+	 * Noticed in some instances that the freq selection is correct but
+	 * decimal part is programmed wrong from BIOS where pre-os does not
+	 * enable display. Verify the same as well.
+	 */
+	if (cdctl == ((cdctl & CDCLK_FREQ_SEL_MASK) | skl_cdclk_decimal(freq)))
+		/* All well; nothing to sanitize */
+		return false;
+sanitize:
+	/*
+	 * As of now initialize with max cdclk till
+	 * we get dynamic cdclk support
+	 * */
+	dev_priv->skl_boot_cdclk = dev_priv->max_cdclk_freq;
+	skl_init_cdclk(dev_priv);
+
+	/* we did have to sanitize */
+	return true;
+}
+
 /* Adjust CDclk dividers to allow high res or save power if possible */
 static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
 {
@@ -6257,7 +6323,8 @@
 		return;
 
 	if (to_intel_plane_state(crtc->primary->state)->visible) {
-		intel_crtc_wait_for_pending_flips(crtc);
+		WARN_ON(intel_crtc->unpin_work);
+
 		intel_pre_disable_primary(crtc);
 	}
 
@@ -6575,6 +6642,15 @@
 		pipe_config_supports_ips(dev_priv, pipe_config);
 }
 
+static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc)
+{
+	const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	/* GDG double wide on either pipe, otherwise pipe A only */
+	return INTEL_INFO(dev_priv)->gen < 4 &&
+		(crtc->pipe == PIPE_A || IS_I915G(dev_priv));
+}
+
 static int intel_crtc_compute_config(struct intel_crtc *crtc,
 				     struct intel_crtc_state *pipe_config)
 {
@@ -6584,23 +6660,24 @@
 
 	/* FIXME should check pixel clock limits on all platforms */
 	if (INTEL_INFO(dev)->gen < 4) {
-		int clock_limit = dev_priv->max_cdclk_freq;
+		int clock_limit = dev_priv->max_cdclk_freq * 9 / 10;
 
 		/*
-		 * Enable pixel doubling when the dot clock
+		 * Enable double wide mode when the dot clock
 		 * is > 90% of the (display) core speed.
-		 *
-		 * GDG double wide on either pipe,
-		 * otherwise pipe A only.
 		 */
-		if ((crtc->pipe == PIPE_A || IS_I915G(dev)) &&
-		    adjusted_mode->crtc_clock > clock_limit * 9 / 10) {
+		if (intel_crtc_supports_double_wide(crtc) &&
+		    adjusted_mode->crtc_clock > clock_limit) {
 			clock_limit *= 2;
 			pipe_config->double_wide = true;
 		}
 
-		if (adjusted_mode->crtc_clock > clock_limit * 9 / 10)
+		if (adjusted_mode->crtc_clock > clock_limit) {
+			DRM_DEBUG_KMS("requested pixel clock (%d kHz) too high (max: %d kHz, double wide: %s)\n",
+				      adjusted_mode->crtc_clock, clock_limit,
+				      yesno(pipe_config->double_wide));
 			return -EINVAL;
+		}
 	}
 
 	/*
@@ -7365,7 +7442,7 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = crtc->pipe;
-	int dpll_reg = DPLL(crtc->pipe);
+	i915_reg_t dpll_reg = DPLL(crtc->pipe);
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
 	u32 loopfilter, tribuf_calcntr;
 	u32 bestn, bestm1, bestm2, bestp1, bestp2, bestm2_frac;
@@ -9283,8 +9360,8 @@
 
 	I915_STATE_WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n");
 	I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
-	I915_STATE_WARN(I915_READ(WRPLL_CTL1) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
-	I915_STATE_WARN(I915_READ(WRPLL_CTL2) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
+	I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
+	I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
 	I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
 	I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
 	     "CPU PWM1 enabled\n");
@@ -9746,7 +9823,7 @@
 
 	port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT;
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skylake_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_BROXTON(dev))
 		bxt_get_ddi_pll(dev_priv, port, pipe_config);
@@ -10092,20 +10169,17 @@
 	int ret;
 
 	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
-	if (!intel_fb) {
-		drm_gem_object_unreference(&obj->base);
+	if (!intel_fb)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
 	if (ret)
 		goto err;
 
 	return &intel_fb->base;
-err:
-	drm_gem_object_unreference(&obj->base);
-	kfree(intel_fb);
 
+err:
+	kfree(intel_fb);
 	return ERR_PTR(ret);
 }
 
@@ -10145,6 +10219,7 @@
 				  struct drm_display_mode *mode,
 				  int depth, int bpp)
 {
+	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
 
@@ -10159,7 +10234,11 @@
 								bpp);
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth);
 
-	return intel_framebuffer_create(dev, &mode_cmd, obj);
+	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+	if (IS_ERR(fb))
+		drm_gem_object_unreference_unlocked(&obj->base);
+
+	return fb;
 }
 
 static struct drm_framebuffer *
@@ -11062,7 +11141,7 @@
 	 */
 	if (ring->id == RCS) {
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit_reg(ring, DERRMR);
 		intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
 					DERRMR_PIPEB_PRI_FLIP_DONE |
 					DERRMR_PIPEC_PRI_FLIP_DONE));
@@ -11072,7 +11151,7 @@
 		else
 			intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
 					      MI_SRM_LRM_GLOBAL_GTT);
-		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit_reg(ring, DERRMR);
 		intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
 		if (IS_GEN8(dev)) {
 			intel_ring_emit(ring, 0);
@@ -11117,13 +11196,14 @@
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
+			     unsigned int rotation,
 			     struct intel_unpin_work *work)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
 	const enum pipe pipe = intel_crtc->pipe;
-	u32 ctl, stride;
+	u32 ctl, stride, tile_height;
 
 	ctl = I915_READ(PLANE_CTL(pipe, 0));
 	ctl &= ~PLANE_CTL_TILED_MASK;
@@ -11147,9 +11227,16 @@
 	 * The stride is either expressed as a multiple of 64 bytes chunks for
 	 * linear buffers or in number of tiles for tiled buffers.
 	 */
-	stride = fb->pitches[0] /
-		 intel_fb_stride_alignment(dev, fb->modifier[0],
-					   fb->pixel_format);
+	if (intel_rotation_90_or_270(rotation)) {
+		/* stride = Surface height in tiles */
+		tile_height = intel_tile_height(dev, fb->pixel_format,
+						fb->modifier[0], 0);
+		stride = DIV_ROUND_UP(fb->height, tile_height);
+	} else {
+		stride = fb->pitches[0] /
+				intel_fb_stride_alignment(dev, fb->modifier[0],
+							  fb->pixel_format);
+	}
 
 	/*
 	 * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
@@ -11170,10 +11257,9 @@
 	struct intel_framebuffer *intel_fb =
 		to_intel_framebuffer(intel_crtc->base.primary->fb);
 	struct drm_i915_gem_object *obj = intel_fb->obj;
+	i915_reg_t reg = DSPCNTR(intel_crtc->plane);
 	u32 dspcntr;
-	u32 reg;
 
-	reg = DSPCNTR(intel_crtc->plane);
 	dspcntr = I915_READ(reg);
 
 	if (obj->tiling_mode != I915_TILING_NONE)
@@ -11207,7 +11293,7 @@
 	intel_pipe_update_start(crtc);
 
 	if (INTEL_INFO(mmio_flip->i915)->gen >= 9)
-		skl_do_mmio_flip(crtc, work);
+		skl_do_mmio_flip(crtc, mmio_flip->rotation, work);
 	else
 		/* use_mmio_flip() retricts MMIO flips to ilk+ */
 		ilk_do_mmio_flip(crtc, work);
@@ -11234,10 +11320,7 @@
 
 static int intel_queue_mmio_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
-				 uint32_t flags)
+				 struct drm_i915_gem_object *obj)
 {
 	struct intel_mmio_flip *mmio_flip;
 
@@ -11248,6 +11331,7 @@
 	mmio_flip->i915 = to_i915(dev);
 	mmio_flip->req = i915_gem_request_reference(obj->last_write_req);
 	mmio_flip->crtc = to_intel_crtc(crtc);
+	mmio_flip->rotation = crtc->primary->state->rotation;
 
 	INIT_WORK(&mmio_flip->work, intel_mmio_flip_work_func);
 	schedule_work(&mmio_flip->work);
@@ -11453,9 +11537,14 @@
 	 * synchronisation, so all we want here is to pin the framebuffer
 	 * into the display plane and skip any waits.
 	 */
+	if (!mmio_flip) {
+		ret = i915_gem_object_sync(obj, ring, &request);
+		if (ret)
+			goto cleanup_pending;
+	}
+
 	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
-					 crtc->primary->state,
-					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
+					 crtc->primary->state);
 	if (ret)
 		goto cleanup_pending;
 
@@ -11464,8 +11553,7 @@
 	work->gtt_offset += intel_crtc->dspaddr_offset;
 
 	if (mmio_flip) {
-		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring,
-					    page_flip_flags);
+		ret = intel_queue_mmio_flip(dev, crtc, obj);
 		if (ret)
 			goto cleanup_unpin;
 
@@ -11579,18 +11667,32 @@
 static bool intel_wm_need_update(struct drm_plane *plane,
 				 struct drm_plane_state *state)
 {
-	/* Update watermarks on tiling changes. */
+	struct intel_plane_state *new = to_intel_plane_state(state);
+	struct intel_plane_state *cur = to_intel_plane_state(plane->state);
+
+	/* Update watermarks on tiling or size changes. */
 	if (!plane->state->fb || !state->fb ||
 	    plane->state->fb->modifier[0] != state->fb->modifier[0] ||
-	    plane->state->rotation != state->rotation)
-		return true;
-
-	if (plane->state->crtc_w != state->crtc_w)
+	    plane->state->rotation != state->rotation ||
+	    drm_rect_width(&new->src) != drm_rect_width(&cur->src) ||
+	    drm_rect_height(&new->src) != drm_rect_height(&cur->src) ||
+	    drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) ||
+	    drm_rect_height(&new->dst) != drm_rect_height(&cur->dst))
 		return true;
 
 	return false;
 }
 
+static bool needs_scaling(struct intel_plane_state *state)
+{
+	int src_w = drm_rect_width(&state->src) >> 16;
+	int src_h = drm_rect_height(&state->src) >> 16;
+	int dst_w = drm_rect_width(&state->dst);
+	int dst_h = drm_rect_height(&state->dst);
+
+	return (src_w != dst_w || src_h != dst_h);
+}
+
 int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 				    struct drm_plane_state *plane_state)
 {
@@ -11606,7 +11708,6 @@
 	bool mode_changed = needs_modeset(crtc_state);
 	bool was_crtc_enabled = crtc->state->active;
 	bool is_crtc_enabled = crtc_state->active;
-
 	bool turn_off, turn_on, visible, was_visible;
 	struct drm_framebuffer *fb = plane_state->fb;
 
@@ -11619,14 +11720,6 @@
 			return ret;
 	}
 
-	/*
-	 * Disabling a plane is always okay; we just need to update
-	 * fb tracking in a special way since cleanup_fb() won't
-	 * get called by the plane helpers.
-	 */
-	if (old_plane_state->base.fb && !fb)
-		intel_crtc->atomic.disabled_planes |= 1 << i;
-
 	was_visible = old_plane_state->visible;
 	visible = to_intel_plane_state(plane_state)->visible;
 
@@ -11676,7 +11769,6 @@
 
 	switch (plane->type) {
 	case DRM_PLANE_TYPE_PRIMARY:
-		intel_crtc->atomic.wait_for_flips = true;
 		intel_crtc->atomic.pre_disable_primary = turn_off;
 		intel_crtc->atomic.post_enable_primary = turn_on;
 
@@ -11724,11 +11816,23 @@
 	case DRM_PLANE_TYPE_CURSOR:
 		break;
 	case DRM_PLANE_TYPE_OVERLAY:
-		if (turn_off && !mode_changed) {
+		/*
+		 * WaCxSRDisabledForSpriteScaling:ivb
+		 *
+		 * cstate->update_wm was already set above, so this flag will
+		 * take effect when we commit and program watermarks.
+		 */
+		if (IS_IVYBRIDGE(dev) &&
+		    needs_scaling(to_intel_plane_state(plane_state)) &&
+		    !needs_scaling(old_plane_state)) {
+			to_intel_crtc_state(crtc_state)->disable_lp_wm = true;
+		} else if (turn_off && !mode_changed) {
 			intel_crtc->atomic.wait_vblank = true;
 			intel_crtc->atomic.update_sprite_watermarks |=
 				1 << i;
 		}
+
+		break;
 	}
 	return 0;
 }
@@ -11813,6 +11917,12 @@
 	}
 
 	ret = 0;
+	if (dev_priv->display.compute_pipe_wm) {
+		ret = dev_priv->display.compute_pipe_wm(intel_crtc, state);
+		if (ret)
+			return ret;
+	}
+
 	if (INTEL_INFO(dev)->gen >= 9) {
 		if (mode_changed)
 			ret = skl_update_scaler_crtc(pipe_config);
@@ -12002,7 +12112,7 @@
 			      pipe_config->dpll_hw_state.pll9,
 			      pipe_config->dpll_hw_state.pll10,
 			      pipe_config->dpll_hw_state.pcsdw12);
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
 			      "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
 			      pipe_config->ddi_pll_sel,
@@ -12256,6 +12366,18 @@
 			crtc->hwmode = crtc->state->adjusted_mode;
 		else
 			crtc->hwmode.crtc_clock = 0;
+
+		/*
+		 * Update legacy state to satisfy fbc code. This can
+		 * be removed when fbc uses the atomic state.
+		 */
+		if (drm_atomic_get_existing_plane_state(state, crtc->primary)) {
+			struct drm_plane_state *plane_state = crtc->primary->state;
+
+			crtc->primary->fb = plane_state->fb;
+			crtc->x = plane_state->src_x >> 16;
+			crtc->y = plane_state->src_y >> 16;
+		}
 	}
 }
 
@@ -13020,6 +13142,45 @@
 	return 0;
 }
 
+/*
+ * Handle calculation of various watermark data at the end of the atomic check
+ * phase.  The code here should be run after the per-crtc and per-plane 'check'
+ * handlers to ensure that all derived state has been updated.
+ */
+static void calc_watermark_data(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *cstate;
+	struct drm_plane *plane;
+	struct drm_plane_state *pstate;
+
+	/*
+	 * Calculate watermark configuration details now that derived
+	 * plane/crtc state is all properly updated.
+	 */
+	drm_for_each_crtc(crtc, dev) {
+		cstate = drm_atomic_get_existing_crtc_state(state, crtc) ?:
+			crtc->state;
+
+		if (cstate->active)
+			intel_state->wm_config.num_pipes_active++;
+	}
+	drm_for_each_legacy_plane(plane, dev) {
+		pstate = drm_atomic_get_existing_plane_state(state, plane) ?:
+			plane->state;
+
+		if (!to_intel_plane_state(pstate)->visible)
+			continue;
+
+		intel_state->wm_config.sprites_enabled = true;
+		if (pstate->crtc_w != pstate->src_w >> 16 ||
+		    pstate->crtc_h != pstate->src_h >> 16)
+			intel_state->wm_config.sprites_scaled = true;
+	}
+}
+
 /**
  * intel_atomic_check - validate state object
  * @dev: drm device
@@ -13028,6 +13189,7 @@
 static int intel_atomic_check(struct drm_device *dev,
 			      struct drm_atomic_state *state)
 {
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
 	int ret, i;
@@ -13095,10 +13257,81 @@
 		if (ret)
 			return ret;
 	} else
-		to_intel_atomic_state(state)->cdclk =
-			to_i915(state->dev)->cdclk_freq;
+		intel_state->cdclk = to_i915(state->dev)->cdclk_freq;
 
-	return drm_atomic_helper_check_planes(state->dev, state);
+	ret = drm_atomic_helper_check_planes(state->dev, state);
+	if (ret)
+		return ret;
+
+	calc_watermark_data(state);
+
+	return 0;
+}
+
+static int intel_atomic_prepare_commit(struct drm_device *dev,
+				       struct drm_atomic_state *state,
+				       bool async)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_plane_state *plane_state;
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane *plane;
+	struct drm_crtc *crtc;
+	int i, ret;
+
+	if (async) {
+		DRM_DEBUG_KMS("i915 does not yet support async commit\n");
+		return -EINVAL;
+	}
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		ret = intel_crtc_wait_for_pending_flips(crtc);
+		if (ret)
+			return ret;
+
+		if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
+			flush_workqueue(dev_priv->wq);
+	}
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = drm_atomic_helper_prepare_planes(dev, state);
+	if (!ret && !async && !i915_reset_in_progress(&dev_priv->gpu_error)) {
+		u32 reset_counter;
+
+		reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
+		mutex_unlock(&dev->struct_mutex);
+
+		for_each_plane_in_state(state, plane, plane_state, i) {
+			struct intel_plane_state *intel_plane_state =
+				to_intel_plane_state(plane_state);
+
+			if (!intel_plane_state->wait_req)
+				continue;
+
+			ret = __i915_wait_request(intel_plane_state->wait_req,
+						  reset_counter, true,
+						  NULL, NULL);
+
+			/* Swallow -EIO errors to allow updates during hw lockup. */
+			if (ret == -EIO)
+				ret = 0;
+
+			if (ret)
+				break;
+		}
+
+		if (!ret)
+			return 0;
+
+		mutex_lock(&dev->struct_mutex);
+		drm_atomic_helper_cleanup_planes(dev, state);
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
 }
 
 /**
@@ -13122,22 +13355,20 @@
 			       bool async)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
 	int ret = 0;
 	int i;
 	bool any_ms = false;
 
-	if (async) {
-		DRM_DEBUG_KMS("i915 does not yet support async commit\n");
-		return -EINVAL;
+	ret = intel_atomic_prepare_commit(dev, state, async);
+	if (ret) {
+		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
+		return ret;
 	}
 
-	ret = drm_atomic_helper_prepare_planes(dev, state);
-	if (ret)
-		return ret;
-
 	drm_atomic_helper_swap_state(dev, state);
+	dev_priv->wm.config = to_intel_atomic_state(state)->wm_config;
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -13175,6 +13406,9 @@
 			to_intel_crtc_state(crtc->state)->update_pipe;
 		unsigned long put_domains = 0;
 
+		if (modeset)
+			intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+
 		if (modeset && crtc->state->active) {
 			update_scanline_offset(to_intel_crtc(crtc));
 			dev_priv->display.crtc_enable(crtc);
@@ -13190,18 +13424,26 @@
 		if (!modeset)
 			intel_pre_plane_update(intel_crtc);
 
-		drm_atomic_helper_commit_planes_on_crtc(crtc_state);
+		if (crtc->state->active &&
+		    (crtc->state->planes_changed || update_pipe))
+			drm_atomic_helper_commit_planes_on_crtc(crtc_state);
 
 		if (put_domains)
 			modeset_put_power_domains(dev_priv, put_domains);
 
 		intel_post_plane_update(intel_crtc);
+
+		if (modeset)
+			intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
 	}
 
 	/* FIXME: add subpixel order */
 
 	drm_atomic_helper_wait_for_vblanks(dev, state);
+
+	mutex_lock(&dev->struct_mutex);
 	drm_atomic_helper_cleanup_planes(dev, state);
+	mutex_unlock(&dev->struct_mutex);
 
 	if (any_ms)
 		intel_modeset_check_state(dev, state);
@@ -13370,6 +13612,8 @@
  * bits.  Some older platforms need special physical address handling for
  * cursor planes.
  *
+ * Must be called with struct_mutex held.
+ *
  * Returns 0 on success, negative error code on failure.
  */
 int
@@ -13380,28 +13624,58 @@
 	struct drm_framebuffer *fb = new_state->fb;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->fb);
+	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
 	int ret = 0;
 
-	if (!obj)
+	if (!obj && !old_obj)
 		return 0;
 
-	mutex_lock(&dev->struct_mutex);
+	if (old_obj) {
+		struct drm_crtc_state *crtc_state =
+			drm_atomic_get_existing_crtc_state(new_state->state, plane->state->crtc);
 
-	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+		/* Big Hammer, we also need to ensure that any pending
+		 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
+		 * current scanout is retired before unpinning the old
+		 * framebuffer. Note that we rely on userspace rendering
+		 * into the buffer attached to the pipe they are waiting
+		 * on. If not, userspace generates a GPU hang with IPEHR
+		 * point to the MI_WAIT_FOR_EVENT.
+		 *
+		 * This should only fail upon a hung GPU, in which case we
+		 * can safely continue.
+		 */
+		if (needs_modeset(crtc_state))
+			ret = i915_gem_object_wait_rendering(old_obj, true);
+
+		/* Swallow -EIO errors to allow updates during hw lockup. */
+		if (ret && ret != -EIO)
+			return ret;
+	}
+
+	if (!obj) {
+		ret = 0;
+	} else if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 	    INTEL_INFO(dev)->cursor_needs_physical) {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
 		ret = i915_gem_object_attach_phys(obj, align);
 		if (ret)
 			DRM_DEBUG_KMS("failed to attach phys object\n");
 	} else {
-		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL);
+		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state);
 	}
 
-	if (ret == 0)
-		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+	if (ret == 0) {
+		if (obj) {
+			struct intel_plane_state *plane_state =
+				to_intel_plane_state(new_state);
 
-	mutex_unlock(&dev->struct_mutex);
+			i915_gem_request_assign(&plane_state->wait_req,
+						obj->last_write_req);
+		}
+
+		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+	}
 
 	return ret;
 }
@@ -13412,23 +13686,35 @@
  * @fb: old framebuffer that was on plane
  *
  * Cleans up a framebuffer that has just been removed from a plane.
+ *
+ * Must be called with struct_mutex held.
  */
 void
 intel_cleanup_plane_fb(struct drm_plane *plane,
 		       const struct drm_plane_state *old_state)
 {
 	struct drm_device *dev = plane->dev;
-	struct drm_i915_gem_object *obj = intel_fb_obj(old_state->fb);
+	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_plane_state *old_intel_state;
+	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
+	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
 
-	if (!obj)
+	old_intel_state = to_intel_plane_state(old_state);
+
+	if (!obj && !old_obj)
 		return;
 
-	if (plane->type != DRM_PLANE_TYPE_CURSOR ||
-	    !INTEL_INFO(dev)->cursor_needs_physical) {
-		mutex_lock(&dev->struct_mutex);
+	if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR ||
+	    !INTEL_INFO(dev)->cursor_needs_physical))
 		intel_unpin_fb_obj(old_state->fb, old_state);
-		mutex_unlock(&dev->struct_mutex);
-	}
+
+	/* prepare_fb aborted? */
+	if ((old_obj && (old_obj->frontbuffer_bits & intel_plane->frontbuffer_bit)) ||
+	    (obj && !(obj->frontbuffer_bits & intel_plane->frontbuffer_bit)))
+		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+
+	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
+
 }
 
 int
@@ -13447,7 +13733,7 @@
 	crtc_clock = crtc_state->base.adjusted_mode.crtc_clock;
 	cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk;
 
-	if (!crtc_clock || !cdclk)
+	if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock))
 		return DRM_PLANE_HELPER_NO_SCALING;
 
 	/*
@@ -13495,18 +13781,8 @@
 	struct drm_framebuffer *fb = state->base.fb;
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc;
-	struct drm_rect *src = &state->src;
 
 	crtc = crtc ? crtc : plane->crtc;
-	intel_crtc = to_intel_crtc(crtc);
-
-	plane->fb = fb;
-	crtc->x = src->x1 >> 16;
-	crtc->y = src->y1 >> 16;
-
-	if (!crtc->state->active)
-		return;
 
 	dev_priv->display.update_primary_plane(crtc, fb,
 					       state->src.x1 >> 16,
@@ -13536,8 +13812,7 @@
 		intel_update_watermarks(crtc);
 
 	/* Perform vblank evasion around commit operation */
-	if (crtc->state->active)
-		intel_pipe_update_start(intel_crtc);
+	intel_pipe_update_start(intel_crtc);
 
 	if (modeset)
 		return;
@@ -13553,8 +13828,7 @@
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	if (crtc->state->active)
-		intel_pipe_update_end(intel_crtc);
+	intel_pipe_update_end(intel_crtc);
 }
 
 /**
@@ -13737,8 +14011,7 @@
 	intel_crtc->cursor_bo = obj;
 
 update:
-	if (crtc->state->active)
-		intel_crtc_update_cursor(crtc, state->visible);
+	intel_crtc_update_cursor(crtc, state->visible);
 }
 
 static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
@@ -14010,7 +14283,7 @@
 		 */
 		found = I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED;
 		/* WaIgnoreDDIAStrap: skl */
-		if (found || IS_SKYLAKE(dev))
+		if (found || IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 			intel_ddi_init(dev, PORT_A);
 
 		/* DDI B, C and D detection is indicated by the SFUSE_STRAP
@@ -14026,7 +14299,7 @@
 		/*
 		 * On SKL we don't have a way to detect DDI-E so we rely on VBT.
 		 */
-		if (IS_SKYLAKE(dev) &&
+		if ((IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) &&
 		    (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp ||
 		     dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi ||
 		     dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi))
@@ -14041,7 +14314,7 @@
 
 		if (I915_READ(PCH_HDMIB) & SDVO_DETECTED) {
 			/* PCH SDVOB multiplex with HDMIB */
-			found = intel_sdvo_init(dev, PCH_SDVOB, true);
+			found = intel_sdvo_init(dev, PCH_SDVOB, PORT_B);
 			if (!found)
 				intel_hdmi_init(dev, PCH_HDMIB, PORT_B);
 			if (!found && (I915_READ(PCH_DP_B) & DP_DETECTED))
@@ -14097,7 +14370,7 @@
 
 		if (I915_READ(GEN3_SDVOB) & SDVO_DETECTED) {
 			DRM_DEBUG_KMS("probing SDVOB\n");
-			found = intel_sdvo_init(dev, GEN3_SDVOB, true);
+			found = intel_sdvo_init(dev, GEN3_SDVOB, PORT_B);
 			if (!found && IS_G4X(dev)) {
 				DRM_DEBUG_KMS("probing HDMI on SDVOB\n");
 				intel_hdmi_init(dev, GEN4_HDMIB, PORT_B);
@@ -14111,7 +14384,7 @@
 
 		if (I915_READ(GEN3_SDVOB) & SDVO_DETECTED) {
 			DRM_DEBUG_KMS("probing SDVOC\n");
-			found = intel_sdvo_init(dev, GEN3_SDVOC, false);
+			found = intel_sdvo_init(dev, GEN3_SDVOC, PORT_C);
 		}
 
 		if (!found && (I915_READ(GEN3_SDVOC) & SDVO_DETECTED)) {
@@ -14379,6 +14652,7 @@
 			      struct drm_file *filp,
 			      const struct drm_mode_fb_cmd2 *user_mode_cmd)
 {
+	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
 
@@ -14387,7 +14661,11 @@
 	if (&obj->base == NULL)
 		return ERR_PTR(-ENOENT);
 
-	return intel_framebuffer_create(dev, &mode_cmd, obj);
+	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+	if (IS_ERR(fb))
+		drm_gem_object_unreference_unlocked(&obj->base);
+
+	return fb;
 }
 
 #ifndef CONFIG_DRM_FBDEV_EMULATION
@@ -14472,7 +14750,7 @@
 	}
 
 	/* Returns the core display clock speed */
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		dev_priv->display.get_display_clock_speed =
 			skylake_get_display_clock_speed;
 	else if (IS_BROXTON(dev))
@@ -14761,7 +15039,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u8 sr1;
-	u32 vga_reg = i915_vgacntrl_reg(dev);
+	i915_reg_t vga_reg = i915_vgacntrl_reg(dev);
 
 	/* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
 	vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
@@ -14877,9 +15155,6 @@
 	i915_disable_vga(dev);
 	intel_setup_outputs(dev);
 
-	/* Just in case the BIOS is doing something questionable. */
-	intel_fbc_disable(dev_priv);
-
 	drm_modeset_lock_all(dev);
 	intel_modeset_setup_hw_state(dev);
 	drm_modeset_unlock_all(dev);
@@ -14966,10 +15241,9 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg;
+	i915_reg_t reg = PIPECONF(crtc->config->cpu_transcoder);
 
 	/* Clear any frame start delays used for debugging left by the BIOS */
-	reg = PIPECONF(crtc->config->cpu_transcoder);
 	I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
 
 	/* restore vblank interrupts to correct state */
@@ -15123,7 +15397,7 @@
 void i915_redisable_vga_power_on(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 vga_reg = i915_vgacntrl_reg(dev);
+	i915_reg_t vga_reg = i915_vgacntrl_reg(dev);
 
 	if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) {
 		DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
@@ -15162,7 +15436,7 @@
 	struct intel_plane_state *plane_state =
 		to_intel_plane_state(primary->state);
 
-	plane_state->visible =
+	plane_state->visible = crtc->active &&
 		primary_get_hw_state(to_intel_plane(primary));
 
 	if (plane_state->visible)
@@ -15419,8 +15693,7 @@
 		mutex_lock(&dev->struct_mutex);
 		ret = intel_pin_and_fence_fb_obj(c->primary,
 						 c->primary->fb,
-						 c->primary->state,
-						 NULL, NULL);
+						 c->primary->state);
 		mutex_unlock(&dev->struct_mutex);
 		if (ret) {
 			DRM_ERROR("failed to pin boot fb on pipe %d\n",
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 09bdd94..bec443a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -277,7 +277,7 @@
 	 * See vlv_power_sequencer_reset() why we need
 	 * a power domain reference here.
 	 */
-	power_domain = intel_display_port_power_domain(encoder);
+	power_domain = intel_display_port_aux_power_domain(encoder);
 	intel_display_power_get(dev_priv, power_domain);
 
 	mutex_lock(&dev_priv->pps_mutex);
@@ -293,7 +293,7 @@
 
 	mutex_unlock(&dev_priv->pps_mutex);
 
-	power_domain = intel_display_port_power_domain(encoder);
+	power_domain = intel_display_port_aux_power_domain(encoder);
 	intel_display_power_put(dev_priv, power_domain);
 }
 
@@ -541,7 +541,8 @@
 	}
 }
 
-static u32 _pp_ctrl_reg(struct intel_dp *intel_dp)
+static i915_reg_t
+_pp_ctrl_reg(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
@@ -553,7 +554,8 @@
 		return VLV_PIPE_PP_CONTROL(vlv_power_sequencer_pipe(intel_dp));
 }
 
-static u32 _pp_stat_reg(struct intel_dp *intel_dp)
+static i915_reg_t
+_pp_stat_reg(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
@@ -582,7 +584,7 @@
 
 	if (IS_VALLEYVIEW(dev)) {
 		enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
-		u32 pp_ctrl_reg, pp_div_reg;
+		i915_reg_t pp_ctrl_reg, pp_div_reg;
 		u32 pp_div;
 
 		pp_ctrl_reg = VLV_PIPE_PP_CONTROL(pipe);
@@ -652,7 +654,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t ch_ctl = intel_dp->aux_ch_ctl_reg;
+	i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg;
 	uint32_t status;
 	bool done;
 
@@ -750,7 +752,7 @@
 	else
 		precharge = 5;
 
-	if (IS_BROADWELL(dev) && intel_dp->aux_ch_ctl_reg == DPA_AUX_CH_CTL)
+	if (IS_BROADWELL(dev) && intel_dig_port->port == PORT_A)
 		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
 	else
 		timeout = DP_AUX_CH_CTL_TIME_OUT_400us;
@@ -789,8 +791,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t ch_ctl = intel_dp->aux_ch_ctl_reg;
-	uint32_t ch_data = ch_ctl + 4;
+	i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg;
 	uint32_t aux_clock_divider;
 	int i, ret, recv_bytes;
 	uint32_t status;
@@ -816,8 +817,6 @@
 
 	intel_dp_check_edp(intel_dp);
 
-	intel_aux_display_runtime_get(dev_priv);
-
 	/* Try to wait for any previous AUX channel activity */
 	for (try = 0; try < 3; try++) {
 		status = I915_READ_NOTRACE(ch_ctl);
@@ -856,7 +855,7 @@
 		for (try = 0; try < 5; try++) {
 			/* Load the send data into the aux channel data registers */
 			for (i = 0; i < send_bytes; i += 4)
-				I915_WRITE(ch_data + i,
+				I915_WRITE(intel_dp->aux_ch_data_reg[i >> 2],
 					   intel_dp_pack_aux(send + i,
 							     send_bytes - i));
 
@@ -920,13 +919,12 @@
 		recv_bytes = recv_size;
 
 	for (i = 0; i < recv_bytes; i += 4)
-		intel_dp_unpack_aux(I915_READ(ch_data + i),
+		intel_dp_unpack_aux(I915_READ(intel_dp->aux_ch_data_reg[i >> 2]),
 				    recv + i, recv_bytes - i);
 
 	ret = recv_bytes;
 out:
 	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
-	intel_aux_display_runtime_put(dev_priv);
 
 	if (vdd)
 		edp_panel_vdd_off(intel_dp, false);
@@ -1008,96 +1006,206 @@
 	return ret;
 }
 
-static void
-intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
+static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	enum port port = intel_dig_port->port;
-	struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
-	const char *name = NULL;
-	uint32_t porte_aux_ctl_reg = DPA_AUX_CH_CTL;
-	int ret;
-
-	/* On SKL we don't have Aux for port E so we rely on VBT to set
-	 * a proper alternate aux channel.
-	 */
-	if (IS_SKYLAKE(dev) && port == PORT_E) {
-		switch (info->alternate_aux_channel) {
-		case DP_AUX_B:
-			porte_aux_ctl_reg = DPB_AUX_CH_CTL;
-			break;
-		case DP_AUX_C:
-			porte_aux_ctl_reg = DPC_AUX_CH_CTL;
-			break;
-		case DP_AUX_D:
-			porte_aux_ctl_reg = DPD_AUX_CH_CTL;
-			break;
-		case DP_AUX_A:
-		default:
-			porte_aux_ctl_reg = DPA_AUX_CH_CTL;
-		}
+	switch (port) {
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return DP_AUX_CH_CTL(port);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_CTL(PORT_B);
 	}
+}
+
+static i915_reg_t g4x_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	switch (port) {
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return DP_AUX_CH_DATA(port, index);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_DATA(PORT_B, index);
+	}
+}
+
+static i915_reg_t ilk_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
+{
+	switch (port) {
+	case PORT_A:
+		return DP_AUX_CH_CTL(port);
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return PCH_DP_AUX_CH_CTL(port);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_CTL(PORT_A);
+	}
+}
+
+static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	switch (port) {
+	case PORT_A:
+		return DP_AUX_CH_DATA(port, index);
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return PCH_DP_AUX_CH_DATA(port, index);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_DATA(PORT_A, index);
+	}
+}
+
+/*
+ * On SKL we don't have Aux for port E so we rely
+ * on VBT to set a proper alternate aux channel.
+ */
+static enum port skl_porte_aux_port(struct drm_i915_private *dev_priv)
+{
+	const struct ddi_vbt_port_info *info =
+		&dev_priv->vbt.ddi_port_info[PORT_E];
+
+	switch (info->alternate_aux_channel) {
+	case DP_AUX_A:
+		return PORT_A;
+	case DP_AUX_B:
+		return PORT_B;
+	case DP_AUX_C:
+		return PORT_C;
+	case DP_AUX_D:
+		return PORT_D;
+	default:
+		MISSING_CASE(info->alternate_aux_channel);
+		return PORT_A;
+	}
+}
+
+static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
+{
+	if (port == PORT_E)
+		port = skl_porte_aux_port(dev_priv);
 
 	switch (port) {
 	case PORT_A:
-		intel_dp->aux_ch_ctl_reg = DPA_AUX_CH_CTL;
-		name = "DPDDC-A";
-		break;
 	case PORT_B:
-		intel_dp->aux_ch_ctl_reg = PCH_DPB_AUX_CH_CTL;
-		name = "DPDDC-B";
-		break;
 	case PORT_C:
-		intel_dp->aux_ch_ctl_reg = PCH_DPC_AUX_CH_CTL;
-		name = "DPDDC-C";
-		break;
 	case PORT_D:
-		intel_dp->aux_ch_ctl_reg = PCH_DPD_AUX_CH_CTL;
-		name = "DPDDC-D";
-		break;
-	case PORT_E:
-		intel_dp->aux_ch_ctl_reg = porte_aux_ctl_reg;
-		name = "DPDDC-E";
-		break;
+		return DP_AUX_CH_CTL(port);
 	default:
-		BUG();
+		MISSING_CASE(port);
+		return DP_AUX_CH_CTL(PORT_A);
 	}
+}
 
-	/*
-	 * The AUX_CTL register is usually DP_CTL + 0x10.
-	 *
-	 * On Haswell and Broadwell though:
-	 *   - Both port A DDI_BUF_CTL and DDI_AUX_CTL are on the CPU
-	 *   - Port B/C/D AUX channels are on the PCH, DDI_BUF_CTL on the CPU
-	 *
-	 * Skylake moves AUX_CTL back next to DDI_BUF_CTL, on the CPU.
-	 */
-	if (!IS_HASWELL(dev) && !IS_BROADWELL(dev) && port != PORT_E)
-		intel_dp->aux_ch_ctl_reg = intel_dp->output_reg + 0x10;
+static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	if (port == PORT_E)
+		port = skl_porte_aux_port(dev_priv);
 
-	intel_dp->aux.name = name;
+	switch (port) {
+	case PORT_A:
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return DP_AUX_CH_DATA(port, index);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_DATA(PORT_A, index);
+	}
+}
+
+static i915_reg_t intel_aux_ctl_reg(struct drm_i915_private *dev_priv,
+					 enum port port)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return skl_aux_ctl_reg(dev_priv, port);
+	else if (HAS_PCH_SPLIT(dev_priv))
+		return ilk_aux_ctl_reg(dev_priv, port);
+	else
+		return g4x_aux_ctl_reg(dev_priv, port);
+}
+
+static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv,
+					  enum port port, int index)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return skl_aux_data_reg(dev_priv, port, index);
+	else if (HAS_PCH_SPLIT(dev_priv))
+		return ilk_aux_data_reg(dev_priv, port, index);
+	else
+		return g4x_aux_data_reg(dev_priv, port, index);
+}
+
+static void intel_aux_reg_init(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
+	enum port port = dp_to_dig_port(intel_dp)->port;
+	int i;
+
+	intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port);
+	for (i = 0; i < ARRAY_SIZE(intel_dp->aux_ch_data_reg); i++)
+		intel_dp->aux_ch_data_reg[i] = intel_aux_data_reg(dev_priv, port, i);
+}
+
+static void
+intel_dp_aux_fini(struct intel_dp *intel_dp)
+{
+	drm_dp_aux_unregister(&intel_dp->aux);
+	kfree(intel_dp->aux.name);
+}
+
+static int
+intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	enum port port = intel_dig_port->port;
+	int ret;
+
+	intel_aux_reg_init(intel_dp);
+
+	intel_dp->aux.name = kasprintf(GFP_KERNEL, "DPDDC-%c", port_name(port));
+	if (!intel_dp->aux.name)
+		return -ENOMEM;
+
 	intel_dp->aux.dev = dev->dev;
 	intel_dp->aux.transfer = intel_dp_aux_transfer;
 
-	DRM_DEBUG_KMS("registering %s bus for %s\n", name,
+	DRM_DEBUG_KMS("registering %s bus for %s\n",
+		      intel_dp->aux.name,
 		      connector->base.kdev->kobj.name);
 
 	ret = drm_dp_aux_register(&intel_dp->aux);
 	if (ret < 0) {
 		DRM_ERROR("drm_dp_aux_register() for %s failed (%d)\n",
-			  name, ret);
-		return;
+			  intel_dp->aux.name, ret);
+		kfree(intel_dp->aux.name);
+		return ret;
 	}
 
 	ret = sysfs_create_link(&connector->base.kdev->kobj,
 				&intel_dp->aux.ddc.dev.kobj,
 				intel_dp->aux.ddc.dev.kobj.name);
 	if (ret < 0) {
-		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n", name, ret);
-		drm_dp_aux_unregister(&intel_dp->aux);
+		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n",
+			  intel_dp->aux.name, ret);
+		intel_dp_aux_fini(intel_dp);
+		return ret;
 	}
+
+	return 0;
 }
 
 static void
@@ -1189,10 +1297,13 @@
 	return (intel_dp_max_link_bw(intel_dp) >> 3) + 1;
 }
 
-static bool intel_dp_source_supports_hbr2(struct drm_device *dev)
+bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp)
 {
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
+
 	/* WaDisableHBR2:skl */
-	if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0)
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0))
 		return false;
 
 	if ((IS_HASWELL(dev) && !IS_HSW_ULX(dev)) || IS_BROADWELL(dev) ||
@@ -1203,14 +1314,16 @@
 }
 
 static int
-intel_dp_source_rates(struct drm_device *dev, const int **source_rates)
+intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
 {
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
 	int size;
 
 	if (IS_BROXTON(dev)) {
 		*source_rates = bxt_rates;
 		size = ARRAY_SIZE(bxt_rates);
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		*source_rates = skl_rates;
 		size = ARRAY_SIZE(skl_rates);
 	} else {
@@ -1219,7 +1332,7 @@
 	}
 
 	/* This depends on the fact that 5.4 is last value in the array */
-	if (!intel_dp_source_supports_hbr2(dev))
+	if (!intel_dp_source_supports_hbr2(intel_dp))
 		size--;
 
 	return size;
@@ -1284,12 +1397,11 @@
 static int intel_dp_common_rates(struct intel_dp *intel_dp,
 				 int *common_rates)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	const int *source_rates, *sink_rates;
 	int source_len, sink_len;
 
 	sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-	source_len = intel_dp_source_rates(dev, &source_rates);
+	source_len = intel_dp_source_rates(intel_dp, &source_rates);
 
 	return intersect_rates(source_rates, source_len,
 			       sink_rates, sink_len,
@@ -1314,7 +1426,6 @@
 
 static void intel_dp_print_rates(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	const int *source_rates, *sink_rates;
 	int source_len, sink_len, common_len;
 	int common_rates[DP_MAX_SUPPORTED_RATES];
@@ -1323,7 +1434,7 @@
 	if ((drm_debug & DRM_UT_KMS) == 0)
 		return;
 
-	source_len = intel_dp_source_rates(dev, &source_rates);
+	source_len = intel_dp_source_rates(intel_dp, &source_rates);
 	snprintf_int_array(str, sizeof(str), source_rates, source_len);
 	DRM_DEBUG_KMS("source rates: %s\n", str);
 
@@ -1365,8 +1476,8 @@
 	return rate_to_index(rate, intel_dp->sink_rates);
 }
 
-static void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
-				  uint8_t *link_bw, uint8_t *rate_select)
+void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
+			   uint8_t *link_bw, uint8_t *rate_select)
 {
 	if (intel_dp->num_sink_rates) {
 		*link_bw = 0;
@@ -1426,7 +1537,7 @@
 				return ret;
 		}
 
-		if (!HAS_PCH_SPLIT(dev))
+		if (HAS_GMCH_DISPLAY(dev))
 			intel_gmch_panel_fitting(intel_crtc, pipe_config,
 						 intel_connector->panel.fitting_mode);
 		else
@@ -1530,7 +1641,7 @@
 				&pipe_config->dp_m2_n2);
 	}
 
-	if (IS_SKYLAKE(dev) && is_edp(intel_dp))
+	if ((IS_SKYLAKE(dev)  || IS_KABYLAKE(dev)) && is_edp(intel_dp))
 		skl_edp_set_pll_config(pipe_config);
 	else if (IS_BROXTON(dev))
 		/* handled in ddi */;
@@ -1542,37 +1653,6 @@
 	return true;
 }
 
-static void ironlake_set_pll_cpu_edp(struct intel_dp *intel_dp)
-{
-	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 dpa_ctl;
-
-	DRM_DEBUG_KMS("eDP PLL enable for clock %d\n",
-		      crtc->config->port_clock);
-	dpa_ctl = I915_READ(DP_A);
-	dpa_ctl &= ~DP_PLL_FREQ_MASK;
-
-	if (crtc->config->port_clock == 162000) {
-		/* For a long time we've carried around a ILK-DevA w/a for the
-		 * 160MHz clock. If we're really unlucky, it's still required.
-		 */
-		DRM_DEBUG_KMS("160MHz cpu eDP clock, might need ilk devA w/a\n");
-		dpa_ctl |= DP_PLL_FREQ_160MHZ;
-		intel_dp->DP |= DP_PLL_FREQ_160MHZ;
-	} else {
-		dpa_ctl |= DP_PLL_FREQ_270MHZ;
-		intel_dp->DP |= DP_PLL_FREQ_270MHZ;
-	}
-
-	I915_WRITE(DP_A, dpa_ctl);
-
-	POSTING_READ(DP_A);
-	udelay(500);
-}
-
 void intel_dp_set_link_params(struct intel_dp *intel_dp,
 			      const struct intel_crtc_state *pipe_config)
 {
@@ -1617,9 +1697,6 @@
 	intel_dp->DP |= DP_VOLTAGE_0_4 | DP_PRE_EMPHASIS_0;
 	intel_dp->DP |= DP_PORT_WIDTH(crtc->config->lane_count);
 
-	if (crtc->config->has_audio)
-		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
-
 	/* Split out the IBX/CPU vs CPT settings */
 
 	if (IS_GEN7(dev) && port == PORT_A) {
@@ -1680,7 +1757,7 @@
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 pp_stat_reg, pp_ctrl_reg;
+	i915_reg_t pp_stat_reg, pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -1770,7 +1847,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum intel_display_power_domain power_domain;
 	u32 pp;
-	u32 pp_stat_reg, pp_ctrl_reg;
+	i915_reg_t pp_stat_reg, pp_ctrl_reg;
 	bool need_to_disable = !intel_dp->want_panel_vdd;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
@@ -1784,7 +1861,7 @@
 	if (edp_have_panel_vdd(intel_dp))
 		return need_to_disable;
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);
 
 	DRM_DEBUG_KMS("Turning eDP port %c VDD on\n",
@@ -1846,7 +1923,7 @@
 	struct intel_encoder *intel_encoder = &intel_dig_port->base;
 	enum intel_display_power_domain power_domain;
 	u32 pp;
-	u32 pp_stat_reg, pp_ctrl_reg;
+	i915_reg_t pp_stat_reg, pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -1874,7 +1951,7 @@
 	if ((pp & POWER_TARGET_ON) == 0)
 		intel_dp->last_power_cycle = jiffies;
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_put(dev_priv, power_domain);
 }
 
@@ -1933,7 +2010,7 @@
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -1995,7 +2072,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum intel_display_power_domain power_domain;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -2025,7 +2102,7 @@
 	wait_panel_off(intel_dp);
 
 	/* We got a reference when we enabled the VDD. */
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_put(dev_priv, power_domain);
 }
 
@@ -2046,7 +2123,7 @@
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	/*
 	 * If we enable the backlight right away following a panel power
@@ -2087,7 +2164,7 @@
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	if (!is_edp(intel_dp))
 		return;
@@ -2146,27 +2223,61 @@
 		_intel_edp_backlight_off(intel_dp);
 }
 
+static const char *state_string(bool enabled)
+{
+	return enabled ? "on" : "off";
+}
+
+static void assert_dp_port(struct intel_dp *intel_dp, bool state)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+	bool cur_state = I915_READ(intel_dp->output_reg) & DP_PORT_EN;
+
+	I915_STATE_WARN(cur_state != state,
+			"DP port %c state assertion failure (expected %s, current %s)\n",
+			port_name(dig_port->port),
+			state_string(state), state_string(cur_state));
+}
+#define assert_dp_port_disabled(d) assert_dp_port((d), false)
+
+static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state)
+{
+	bool cur_state = I915_READ(DP_A) & DP_PLL_ENABLE;
+
+	I915_STATE_WARN(cur_state != state,
+			"eDP PLL state assertion failure (expected %s, current %s)\n",
+			state_string(state), state_string(cur_state));
+}
+#define assert_edp_pll_enabled(d) assert_edp_pll((d), true)
+#define assert_edp_pll_disabled(d) assert_edp_pll((d), false)
+
 static void ironlake_edp_pll_on(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 dpa_ctl;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv,
-			     to_intel_crtc(crtc)->pipe);
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_dp_port_disabled(intel_dp);
+	assert_edp_pll_disabled(dev_priv);
 
-	DRM_DEBUG_KMS("\n");
-	dpa_ctl = I915_READ(DP_A);
-	WARN(dpa_ctl & DP_PLL_ENABLE, "dp pll on, should be off\n");
-	WARN(dpa_ctl & DP_PORT_EN, "dp port still on, should be off\n");
+	DRM_DEBUG_KMS("enabling eDP PLL for clock %d\n",
+		      crtc->config->port_clock);
 
-	/* We don't adjust intel_dp->DP while tearing down the link, to
-	 * facilitate link retraining (e.g. after hotplug). Hence clear all
-	 * enable bits here to ensure that we don't enable too much. */
-	intel_dp->DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE);
+	intel_dp->DP &= ~DP_PLL_FREQ_MASK;
+
+	if (crtc->config->port_clock == 162000)
+		intel_dp->DP |= DP_PLL_FREQ_162MHZ;
+	else
+		intel_dp->DP |= DP_PLL_FREQ_270MHZ;
+
+	I915_WRITE(DP_A, intel_dp->DP);
+	POSTING_READ(DP_A);
+	udelay(500);
+
 	intel_dp->DP |= DP_PLL_ENABLE;
+
 	I915_WRITE(DP_A, intel_dp->DP);
 	POSTING_READ(DP_A);
 	udelay(200);
@@ -2175,24 +2286,18 @@
 static void ironlake_edp_pll_off(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 dpa_ctl;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv,
-			     to_intel_crtc(crtc)->pipe);
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_dp_port_disabled(intel_dp);
+	assert_edp_pll_enabled(dev_priv);
 
-	dpa_ctl = I915_READ(DP_A);
-	WARN((dpa_ctl & DP_PLL_ENABLE) == 0,
-	     "dp pll off, should be on\n");
-	WARN(dpa_ctl & DP_PORT_EN, "dp port still on, should be off\n");
+	DRM_DEBUG_KMS("disabling eDP PLL\n");
 
-	/* We can't rely on the value tracked for the DP register in
-	 * intel_dp->DP because link_down must not change that (otherwise link
-	 * re-training will fail. */
-	dpa_ctl &= ~DP_PLL_ENABLE;
-	I915_WRITE(DP_A, dpa_ctl);
+	intel_dp->DP &= ~DP_PLL_ENABLE;
+
+	I915_WRITE(DP_A, intel_dp->DP);
 	POSTING_READ(DP_A);
 	udelay(200);
 }
@@ -2261,7 +2366,7 @@
 		}
 
 		DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n",
-			      intel_dp->output_reg);
+			      i915_mmio_reg_offset(intel_dp->output_reg));
 	} else if (IS_CHERRYVIEW(dev)) {
 		*pipe = DP_PORT_TO_PIPE_CHV(tmp);
 	} else {
@@ -2324,7 +2429,7 @@
 	intel_dp_get_m_n(crtc, pipe_config);
 
 	if (port == PORT_A) {
-		if ((I915_READ(DP_A) & DP_PLL_FREQ_MASK) == DP_PLL_FREQ_160MHZ)
+		if ((I915_READ(DP_A) & DP_PLL_FREQ_MASK) == DP_PLL_FREQ_162MHZ)
 			pipe_config->port_clock = 162000;
 		else
 			pipe_config->port_clock = 270000;
@@ -2389,6 +2494,8 @@
 	enum port port = dp_to_dig_port(intel_dp)->port;
 
 	intel_dp_link_down(intel_dp);
+
+	/* Only ilk+ has port A */
 	if (port == PORT_A)
 		ironlake_edp_pll_off(intel_dp);
 }
@@ -2548,6 +2655,8 @@
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *crtc =
+		to_intel_crtc(dp_to_dig_port(intel_dp)->base.base.crtc);
 
 	/* enable with pattern 1 (as per spec) */
 	_intel_dp_set_link_train(intel_dp, &intel_dp->DP,
@@ -2563,6 +2672,8 @@
 	 * fail when the power sequencer is freshly used for this port.
 	 */
 	intel_dp->DP |= DP_PORT_EN;
+	if (crtc->config->has_audio)
+		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
 
 	I915_WRITE(intel_dp->output_reg, intel_dp->DP);
 	POSTING_READ(intel_dp->output_reg);
@@ -2575,6 +2686,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	uint32_t dp_reg = I915_READ(intel_dp->output_reg);
+	enum port port = dp_to_dig_port(intel_dp)->port;
+	enum pipe pipe = crtc->pipe;
 
 	if (WARN_ON(dp_reg & DP_PORT_EN))
 		return;
@@ -2586,6 +2699,17 @@
 
 	intel_dp_enable_port(intel_dp);
 
+	if (port == PORT_A && IS_GEN5(dev_priv)) {
+		/*
+		 * Underrun reporting for the other pipe was disabled in
+		 * g4x_pre_enable_dp(). The eDP PLL and port have now been
+		 * enabled, so it's now safe to re-enable underrun reporting.
+		 */
+		intel_wait_for_vblank_if_active(dev_priv->dev, !pipe);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, !pipe, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, !pipe, true);
+	}
+
 	edp_panel_vdd_on(intel_dp);
 	edp_panel_on(intel_dp);
 	edp_panel_vdd_off(intel_dp, true);
@@ -2608,7 +2732,7 @@
 
 	if (crtc->config->has_audio) {
 		DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n",
-				 pipe_name(crtc->pipe));
+				 pipe_name(pipe));
 		intel_audio_codec_enable(encoder);
 	}
 }
@@ -2631,16 +2755,29 @@
 
 static void g4x_pre_enable_dp(struct intel_encoder *encoder)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
+	enum port port = dp_to_dig_port(intel_dp)->port;
+	enum pipe pipe = to_intel_crtc(encoder->base.crtc)->pipe;
 
 	intel_dp_prepare(encoder);
 
-	/* Only ilk+ has port A */
-	if (dport->port == PORT_A) {
-		ironlake_set_pll_cpu_edp(intel_dp);
-		ironlake_edp_pll_on(intel_dp);
+	if (port == PORT_A && IS_GEN5(dev_priv)) {
+		/*
+		 * We get FIFO underruns on the other pipe when
+		 * enabling the CPU eDP PLL, and when enabling CPU
+		 * eDP port. We could potentially avoid the PLL
+		 * underrun with a vblank wait just prior to enabling
+		 * the PLL, but that doesn't appear to help the port
+		 * enable case. Just sweep it all under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, !pipe, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, !pipe, false);
 	}
+
+	/* Only ilk+ has port A */
+	if (port == PORT_A)
+		ironlake_edp_pll_on(intel_dp);
 }
 
 static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
@@ -2648,7 +2785,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = intel_dig_port->base.base.dev->dev_private;
 	enum pipe pipe = intel_dp->pps_pipe;
-	int pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe);
+	i915_reg_t pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe);
 
 	edp_panel_vdd_off_sync(intel_dp);
 
@@ -3046,7 +3183,7 @@
  * Fetch AUX CH registers 0x202 - 0x207 which contain
  * link status information
  */
-static bool
+bool
 intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE])
 {
 	return intel_dp_dpcd_read_wake(&intel_dp->aux,
@@ -3056,7 +3193,7 @@
 }
 
 /* These are source-specific values. */
-static uint8_t
+uint8_t
 intel_dp_voltage_max(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
@@ -3079,7 +3216,7 @@
 		return DP_TRAIN_VOLTAGE_SWING_LEVEL_2;
 }
 
-static uint8_t
+uint8_t
 intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
@@ -3421,38 +3558,6 @@
 	return 0;
 }
 
-static void
-intel_get_adjust_train(struct intel_dp *intel_dp,
-		       const uint8_t link_status[DP_LINK_STATUS_SIZE])
-{
-	uint8_t v = 0;
-	uint8_t p = 0;
-	int lane;
-	uint8_t voltage_max;
-	uint8_t preemph_max;
-
-	for (lane = 0; lane < intel_dp->lane_count; lane++) {
-		uint8_t this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
-		uint8_t this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
-
-		if (this_v > v)
-			v = this_v;
-		if (this_p > p)
-			p = this_p;
-	}
-
-	voltage_max = intel_dp_voltage_max(intel_dp);
-	if (v >= voltage_max)
-		v = voltage_max | DP_TRAIN_MAX_SWING_REACHED;
-
-	preemph_max = intel_dp_pre_emphasis_max(intel_dp, v);
-	if (p >= preemph_max)
-		p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
-
-	for (lane = 0; lane < 4; lane++)
-		intel_dp->train_set[lane] = v | p;
-}
-
 static uint32_t
 gen4_signal_levels(uint8_t train_set)
 {
@@ -3550,13 +3655,13 @@
 	}
 }
 
-/* Properly updates "DP" with the correct signal levels. */
-static void
-intel_dp_set_signal_levels(struct intel_dp *intel_dp, uint32_t *DP)
+void
+intel_dp_set_signal_levels(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	enum port port = intel_dig_port->port;
 	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	uint32_t signal_levels, mask = 0;
 	uint8_t train_set = intel_dp->train_set[0];
 
@@ -3591,74 +3696,27 @@
 		(train_set & DP_TRAIN_PRE_EMPHASIS_MASK) >>
 			DP_TRAIN_PRE_EMPHASIS_SHIFT);
 
-	*DP = (*DP & ~mask) | signal_levels;
+	intel_dp->DP = (intel_dp->DP & ~mask) | signal_levels;
+
+	I915_WRITE(intel_dp->output_reg, intel_dp->DP);
+	POSTING_READ(intel_dp->output_reg);
 }
 
-static bool
-intel_dp_set_link_train(struct intel_dp *intel_dp,
-			uint32_t *DP,
-			uint8_t dp_train_pat)
+void
+intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
+				       uint8_t dp_train_pat)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv =
 		to_i915(intel_dig_port->base.base.dev);
-	uint8_t buf[sizeof(intel_dp->train_set) + 1];
-	int ret, len;
 
-	_intel_dp_set_link_train(intel_dp, DP, dp_train_pat);
+	_intel_dp_set_link_train(intel_dp, &intel_dp->DP, dp_train_pat);
 
-	I915_WRITE(intel_dp->output_reg, *DP);
+	I915_WRITE(intel_dp->output_reg, intel_dp->DP);
 	POSTING_READ(intel_dp->output_reg);
-
-	buf[0] = dp_train_pat;
-	if ((dp_train_pat & DP_TRAINING_PATTERN_MASK) ==
-	    DP_TRAINING_PATTERN_DISABLE) {
-		/* don't write DP_TRAINING_LANEx_SET on disable */
-		len = 1;
-	} else {
-		/* DP_TRAINING_LANEx_SET follow DP_TRAINING_PATTERN_SET */
-		memcpy(buf + 1, intel_dp->train_set, intel_dp->lane_count);
-		len = intel_dp->lane_count + 1;
-	}
-
-	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_PATTERN_SET,
-				buf, len);
-
-	return ret == len;
 }
 
-static bool
-intel_dp_reset_link_train(struct intel_dp *intel_dp, uint32_t *DP,
-			uint8_t dp_train_pat)
-{
-	if (!intel_dp->train_set_valid)
-		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
-	intel_dp_set_signal_levels(intel_dp, DP);
-	return intel_dp_set_link_train(intel_dp, DP, dp_train_pat);
-}
-
-static bool
-intel_dp_update_link_train(struct intel_dp *intel_dp, uint32_t *DP,
-			   const uint8_t link_status[DP_LINK_STATUS_SIZE])
-{
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv =
-		to_i915(intel_dig_port->base.base.dev);
-	int ret;
-
-	intel_get_adjust_train(intel_dp, link_status);
-	intel_dp_set_signal_levels(intel_dp, DP);
-
-	I915_WRITE(intel_dp->output_reg, *DP);
-	POSTING_READ(intel_dp->output_reg);
-
-	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET,
-				intel_dp->train_set, intel_dp->lane_count);
-
-	return ret == intel_dp->lane_count;
-}
-
-static void intel_dp_set_idle_link_train(struct intel_dp *intel_dp)
+void intel_dp_set_idle_link_train(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
@@ -3689,232 +3747,6 @@
 		DRM_ERROR("Timed out waiting for DP idle patterns\n");
 }
 
-/* Enable corresponding port and start training pattern 1 */
-static void
-intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
-{
-	struct drm_encoder *encoder = &dp_to_dig_port(intel_dp)->base.base;
-	struct drm_device *dev = encoder->dev;
-	int i;
-	uint8_t voltage;
-	int voltage_tries, loop_tries;
-	uint32_t DP = intel_dp->DP;
-	uint8_t link_config[2];
-	uint8_t link_bw, rate_select;
-
-	if (HAS_DDI(dev))
-		intel_ddi_prepare_link_retrain(encoder);
-
-	intel_dp_compute_rate(intel_dp, intel_dp->link_rate,
-			      &link_bw, &rate_select);
-
-	/* Write the link configuration data */
-	link_config[0] = link_bw;
-	link_config[1] = intel_dp->lane_count;
-	if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
-		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
-	if (intel_dp->num_sink_rates)
-		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
-				  &rate_select, 1);
-
-	link_config[0] = 0;
-	link_config[1] = DP_SET_ANSI_8B10B;
-	drm_dp_dpcd_write(&intel_dp->aux, DP_DOWNSPREAD_CTRL, link_config, 2);
-
-	DP |= DP_PORT_EN;
-
-	/* clock recovery */
-	if (!intel_dp_reset_link_train(intel_dp, &DP,
-				       DP_TRAINING_PATTERN_1 |
-				       DP_LINK_SCRAMBLING_DISABLE)) {
-		DRM_ERROR("failed to enable link training\n");
-		return;
-	}
-
-	voltage = 0xff;
-	voltage_tries = 0;
-	loop_tries = 0;
-	for (;;) {
-		uint8_t link_status[DP_LINK_STATUS_SIZE];
-
-		drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd);
-		if (!intel_dp_get_link_status(intel_dp, link_status)) {
-			DRM_ERROR("failed to get link status\n");
-			break;
-		}
-
-		if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
-			DRM_DEBUG_KMS("clock recovery OK\n");
-			break;
-		}
-
-		/*
-		 * if we used previously trained voltage and pre-emphasis values
-		 * and we don't get clock recovery, reset link training values
-		 */
-		if (intel_dp->train_set_valid) {
-			DRM_DEBUG_KMS("clock recovery not ok, reset");
-			/* clear the flag as we are not reusing train set */
-			intel_dp->train_set_valid = false;
-			if (!intel_dp_reset_link_train(intel_dp, &DP,
-						       DP_TRAINING_PATTERN_1 |
-						       DP_LINK_SCRAMBLING_DISABLE)) {
-				DRM_ERROR("failed to enable link training\n");
-				return;
-			}
-			continue;
-		}
-
-		/* Check to see if we've tried the max voltage */
-		for (i = 0; i < intel_dp->lane_count; i++)
-			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
-				break;
-		if (i == intel_dp->lane_count) {
-			++loop_tries;
-			if (loop_tries == 5) {
-				DRM_ERROR("too many full retries, give up\n");
-				break;
-			}
-			intel_dp_reset_link_train(intel_dp, &DP,
-						  DP_TRAINING_PATTERN_1 |
-						  DP_LINK_SCRAMBLING_DISABLE);
-			voltage_tries = 0;
-			continue;
-		}
-
-		/* Check to see if we've tried the same voltage 5 times */
-		if ((intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
-			++voltage_tries;
-			if (voltage_tries == 5) {
-				DRM_ERROR("too many voltage retries, give up\n");
-				break;
-			}
-		} else
-			voltage_tries = 0;
-		voltage = intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
-
-		/* Update training set as requested by target */
-		if (!intel_dp_update_link_train(intel_dp, &DP, link_status)) {
-			DRM_ERROR("failed to update link training\n");
-			break;
-		}
-	}
-
-	intel_dp->DP = DP;
-}
-
-static void
-intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
-{
-	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = dig_port->base.base.dev;
-	bool channel_eq = false;
-	int tries, cr_tries;
-	uint32_t DP = intel_dp->DP;
-	uint32_t training_pattern = DP_TRAINING_PATTERN_2;
-
-	/*
-	 * Training Pattern 3 for HBR2 or 1.2 devices that support it.
-	 *
-	 * Intel platforms that support HBR2 also support TPS3. TPS3 support is
-	 * also mandatory for downstream devices that support HBR2.
-	 *
-	 * Due to WaDisableHBR2 SKL < B0 is the only exception where TPS3 is
-	 * supported but still not enabled.
-	 */
-	if (intel_dp_source_supports_hbr2(dev) &&
-	    drm_dp_tps3_supported(intel_dp->dpcd))
-		training_pattern = DP_TRAINING_PATTERN_3;
-	else if (intel_dp->link_rate == 540000)
-		DRM_ERROR("5.4 Gbps link rate without HBR2/TPS3 support\n");
-
-	/* channel equalization */
-	if (!intel_dp_set_link_train(intel_dp, &DP,
-				     training_pattern |
-				     DP_LINK_SCRAMBLING_DISABLE)) {
-		DRM_ERROR("failed to start channel equalization\n");
-		return;
-	}
-
-	tries = 0;
-	cr_tries = 0;
-	channel_eq = false;
-	for (;;) {
-		uint8_t link_status[DP_LINK_STATUS_SIZE];
-
-		if (cr_tries > 5) {
-			DRM_ERROR("failed to train DP, aborting\n");
-			break;
-		}
-
-		drm_dp_link_train_channel_eq_delay(intel_dp->dpcd);
-		if (!intel_dp_get_link_status(intel_dp, link_status)) {
-			DRM_ERROR("failed to get link status\n");
-			break;
-		}
-
-		/* Make sure clock is still ok */
-		if (!drm_dp_clock_recovery_ok(link_status,
-					      intel_dp->lane_count)) {
-			intel_dp->train_set_valid = false;
-			intel_dp_link_training_clock_recovery(intel_dp);
-			intel_dp_set_link_train(intel_dp, &DP,
-						training_pattern |
-						DP_LINK_SCRAMBLING_DISABLE);
-			cr_tries++;
-			continue;
-		}
-
-		if (drm_dp_channel_eq_ok(link_status,
-					 intel_dp->lane_count)) {
-			channel_eq = true;
-			break;
-		}
-
-		/* Try 5 times, then try clock recovery if that fails */
-		if (tries > 5) {
-			intel_dp->train_set_valid = false;
-			intel_dp_link_training_clock_recovery(intel_dp);
-			intel_dp_set_link_train(intel_dp, &DP,
-						training_pattern |
-						DP_LINK_SCRAMBLING_DISABLE);
-			tries = 0;
-			cr_tries++;
-			continue;
-		}
-
-		/* Update training set as requested by target */
-		if (!intel_dp_update_link_train(intel_dp, &DP, link_status)) {
-			DRM_ERROR("failed to update link training\n");
-			break;
-		}
-		++tries;
-	}
-
-	intel_dp_set_idle_link_train(intel_dp);
-
-	intel_dp->DP = DP;
-
-	if (channel_eq) {
-		intel_dp->train_set_valid = true;
-		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
-	}
-}
-
-void intel_dp_stop_link_train(struct intel_dp *intel_dp)
-{
-	intel_dp_set_link_train(intel_dp, &intel_dp->DP,
-				DP_TRAINING_PATTERN_DISABLE);
-}
-
-void
-intel_dp_start_link_train(struct intel_dp *intel_dp)
-{
-	intel_dp_link_training_clock_recovery(intel_dp);
-	intel_dp_link_training_channel_equalization(intel_dp);
-}
-
 static void
 intel_dp_link_down(struct intel_dp *intel_dp)
 {
@@ -3957,6 +3789,13 @@
 	 * matching HDMI port to be enabled on transcoder A.
 	 */
 	if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B && port != PORT_A) {
+		/*
+		 * We get CPU/PCH FIFO underruns on the other pipe when
+		 * doing the workaround. Sweep them under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+
 		/* always enable with pattern 1 (as per spec) */
 		DP &= ~(DP_PIPEB_SELECT | DP_LINK_TRAIN_MASK);
 		DP |= DP_PORT_EN | DP_LINK_TRAIN_PAT_1;
@@ -3966,9 +3805,15 @@
 		DP &= ~DP_PORT_EN;
 		I915_WRITE(intel_dp->output_reg, DP);
 		POSTING_READ(intel_dp->output_reg);
+
+		intel_wait_for_vblank_if_active(dev_priv->dev, PIPE_A);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 	}
 
 	msleep(intel_dp->panel_power_down_delay);
+
+	intel_dp->DP = DP;
 }
 
 static bool
@@ -4016,7 +3861,7 @@
 	}
 
 	DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n",
-		      yesno(intel_dp_source_supports_hbr2(dev)),
+		      yesno(intel_dp_source_supports_hbr2(intel_dp)),
 		      yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
 
 	/* Intermediate frequency support */
@@ -4106,9 +3951,12 @@
 static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
 	u8 buf;
 	int ret = 0;
+	int count = 0;
+	int attempts = 10;
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
 		DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n");
@@ -4123,7 +3971,22 @@
 		goto out;
 	}
 
-	intel_dp->sink_crc.started = false;
+	do {
+		intel_wait_for_vblank(dev, intel_crtc->pipe);
+
+		if (drm_dp_dpcd_readb(&intel_dp->aux,
+				      DP_TEST_SINK_MISC, &buf) < 0) {
+			ret = -EIO;
+			goto out;
+		}
+		count = buf & DP_TEST_COUNT_MASK;
+	} while (--attempts && count);
+
+	if (attempts == 0) {
+		DRM_ERROR("TIMEOUT: Sink CRC counter is not zeroed\n");
+		ret = -ETIMEDOUT;
+	}
+
  out:
 	hsw_enable_ips(intel_crtc);
 	return ret;
@@ -4132,27 +3995,26 @@
 static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
 	u8 buf;
 	int ret;
 
-	if (intel_dp->sink_crc.started) {
-		ret = intel_dp_sink_crc_stop(intel_dp);
-		if (ret)
-			return ret;
-	}
-
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
 		return -EIO;
 
 	if (!(buf & DP_TEST_CRC_SUPPORTED))
 		return -ENOTTY;
 
-	intel_dp->sink_crc.last_count = buf & DP_TEST_COUNT_MASK;
-
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
 		return -EIO;
 
+	if (buf & DP_TEST_SINK_START) {
+		ret = intel_dp_sink_crc_stop(intel_dp);
+		if (ret)
+			return ret;
+	}
+
 	hsw_disable_ips(intel_crtc);
 
 	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
@@ -4161,7 +4023,7 @@
 		return -EIO;
 	}
 
-	intel_dp->sink_crc.started = true;
+	intel_wait_for_vblank(dev, intel_crtc->pipe);
 	return 0;
 }
 
@@ -4173,7 +4035,6 @@
 	u8 buf;
 	int count, ret;
 	int attempts = 6;
-	bool old_equal_new;
 
 	ret = intel_dp_sink_crc_start(intel_dp);
 	if (ret)
@@ -4189,35 +4050,17 @@
 		}
 		count = buf & DP_TEST_COUNT_MASK;
 
-		/*
-		 * Count might be reset during the loop. In this case
-		 * last known count needs to be reset as well.
-		 */
-		if (count == 0)
-			intel_dp->sink_crc.last_count = 0;
-
-		if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
-			ret = -EIO;
-			goto stop;
-		}
-
-		old_equal_new = (count == intel_dp->sink_crc.last_count &&
-				 !memcmp(intel_dp->sink_crc.last_crc, crc,
-					 6 * sizeof(u8)));
-
-	} while (--attempts && (count == 0 || old_equal_new));
-
-	intel_dp->sink_crc.last_count = buf & DP_TEST_COUNT_MASK;
-	memcpy(intel_dp->sink_crc.last_crc, crc, 6 * sizeof(u8));
+	} while (--attempts && count == 0);
 
 	if (attempts == 0) {
-		if (old_equal_new) {
-			DRM_DEBUG_KMS("Unreliable Sink CRC counter: Current returned CRC is identical to the previous one\n");
-		} else {
-			DRM_ERROR("Panel is unable to calculate any CRC after 6 vblanks\n");
-			ret = -ETIMEDOUT;
-			goto stop;
-		}
+		DRM_ERROR("Panel is unable to calculate any CRC after 6 vblanks\n");
+		ret = -ETIMEDOUT;
+		goto stop;
+	}
+
+	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
+		ret = -EIO;
+		goto stop;
 	}
 
 stop:
@@ -4317,13 +4160,6 @@
 	uint8_t rxdata = 0;
 	int status = 0;
 
-	intel_dp->compliance_test_active = 0;
-	intel_dp->compliance_test_type = 0;
-	intel_dp->compliance_test_data = 0;
-
-	intel_dp->aux.i2c_nack_count = 0;
-	intel_dp->aux.i2c_defer_count = 0;
-
 	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1);
 	if (status <= 0) {
 		DRM_DEBUG_KMS("Could not read test request from sink\n");
@@ -4439,6 +4275,14 @@
 
 	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
+	/*
+	 * Clearing compliance test variables to allow capturing
+	 * of values for next automated test request.
+	 */
+	intel_dp->compliance_test_active = 0;
+	intel_dp->compliance_test_type = 0;
+	intel_dp->compliance_test_data = 0;
+
 	if (!intel_encoder->base.crtc)
 		return;
 
@@ -4469,7 +4313,9 @@
 			DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n");
 	}
 
-	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
+	/* if link training is requested we should perform it always */
+	if ((intel_dp->compliance_test_type == DP_TEST_LINK_TRAINING) ||
+		(!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) {
 		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
 			      intel_encoder->base.name);
 		intel_dp_start_link_train(intel_dp);
@@ -4687,41 +4533,6 @@
 		return g4x_digital_port_connected(dev_priv, port);
 }
 
-static enum drm_connector_status
-ironlake_dp_detect(struct intel_dp *intel_dp)
-{
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-
-	if (!intel_digital_port_connected(dev_priv, intel_dig_port))
-		return connector_status_disconnected;
-
-	return intel_dp_detect_dpcd(intel_dp);
-}
-
-static enum drm_connector_status
-g4x_dp_detect(struct intel_dp *intel_dp)
-{
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-
-	/* Can't disconnect eDP, but you can close the lid... */
-	if (is_edp(intel_dp)) {
-		enum drm_connector_status status;
-
-		status = intel_panel_detect(dev);
-		if (status == connector_status_unknown)
-			status = connector_status_connected;
-		return status;
-	}
-
-	if (!intel_digital_port_connected(dev->dev_private, intel_dig_port))
-		return connector_status_disconnected;
-
-	return intel_dp_detect_dpcd(intel_dp);
-}
-
 static struct edid *
 intel_dp_get_edid(struct intel_dp *intel_dp)
 {
@@ -4765,26 +4576,6 @@
 	intel_dp->has_audio = false;
 }
 
-static enum intel_display_power_domain
-intel_dp_power_get(struct intel_dp *dp)
-{
-	struct intel_encoder *encoder = &dp_to_dig_port(dp)->base;
-	enum intel_display_power_domain power_domain;
-
-	power_domain = intel_display_port_power_domain(encoder);
-	intel_display_power_get(to_i915(encoder->base.dev), power_domain);
-
-	return power_domain;
-}
-
-static void
-intel_dp_power_put(struct intel_dp *dp,
-		   enum intel_display_power_domain power_domain)
-{
-	struct intel_encoder *encoder = &dp_to_dig_port(dp)->base;
-	intel_display_power_put(to_i915(encoder->base.dev), power_domain);
-}
-
 static enum drm_connector_status
 intel_dp_detect(struct drm_connector *connector, bool force)
 {
@@ -4808,17 +4599,25 @@
 		return connector_status_disconnected;
 	}
 
-	power_domain = intel_dp_power_get(intel_dp);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
+	intel_display_power_get(to_i915(dev), power_domain);
 
 	/* Can't disconnect eDP, but you can close the lid... */
 	if (is_edp(intel_dp))
 		status = edp_detect(intel_dp);
-	else if (HAS_PCH_SPLIT(dev))
-		status = ironlake_dp_detect(intel_dp);
+	else if (intel_digital_port_connected(to_i915(dev),
+					      dp_to_dig_port(intel_dp)))
+		status = intel_dp_detect_dpcd(intel_dp);
 	else
-		status = g4x_dp_detect(intel_dp);
-	if (status != connector_status_connected)
+		status = connector_status_disconnected;
+
+	if (status != connector_status_connected) {
+		intel_dp->compliance_test_active = 0;
+		intel_dp->compliance_test_type = 0;
+		intel_dp->compliance_test_data = 0;
+
 		goto out;
+	}
 
 	intel_dp_probe_oui(intel_dp);
 
@@ -4832,6 +4631,14 @@
 		goto out;
 	}
 
+	/*
+	 * Clearing NACK and defer counts to get their exact values
+	 * while reading EDID which are required by Compliance tests
+	 * 4.2.2.4 and 4.2.2.5
+	 */
+	intel_dp->aux.i2c_nack_count = 0;
+	intel_dp->aux.i2c_defer_count = 0;
+
 	intel_dp_set_edid(intel_dp);
 
 	if (intel_encoder->type != INTEL_OUTPUT_EDP)
@@ -4853,7 +4660,7 @@
 	}
 
 out:
-	intel_dp_power_put(intel_dp, power_domain);
+	intel_display_power_put(to_i915(dev), power_domain);
 	return status;
 }
 
@@ -4862,6 +4669,7 @@
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
 	enum intel_display_power_domain power_domain;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
@@ -4871,11 +4679,12 @@
 	if (connector->status != connector_status_connected)
 		return;
 
-	power_domain = intel_dp_power_get(intel_dp);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
+	intel_display_power_get(dev_priv, power_domain);
 
 	intel_dp_set_edid(intel_dp);
 
-	intel_dp_power_put(intel_dp, power_domain);
+	intel_display_power_put(dev_priv, power_domain);
 
 	if (intel_encoder->type != INTEL_OUTPUT_EDP)
 		intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
@@ -5034,7 +4843,7 @@
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
-	drm_dp_aux_unregister(&intel_dp->aux);
+	intel_dp_aux_fini(intel_dp);
 	intel_dp_mst_encoder_cleanup(intel_dig_port);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
@@ -5091,7 +4900,7 @@
 	 * indefinitely.
 	 */
 	DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n");
-	power_domain = intel_display_port_power_domain(&intel_dig_port->base);
+	power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base);
 	intel_display_power_get(dev_priv, power_domain);
 
 	edp_panel_vdd_schedule_off(intel_dp);
@@ -5172,7 +4981,7 @@
 		      port_name(intel_dig_port->port),
 		      long_hpd ? "long" : "short");
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);
 
 	if (long_hpd) {
@@ -5223,25 +5032,6 @@
 	return ret;
 }
 
-/* Return which DP Port should be selected for Transcoder DP control */
-int
-intel_trans_dp_port_sel(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct intel_encoder *intel_encoder;
-	struct intel_dp *intel_dp;
-
-	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
-		intel_dp = enc_to_intel_dp(&intel_encoder->base);
-
-		if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
-		    intel_encoder->type == INTEL_OUTPUT_EDP)
-			return intel_dp->output_reg;
-	}
-
-	return -1;
-}
-
 /* check the VBT to see whether the eDP is on another port */
 bool intel_dp_is_edp(struct drm_device *dev, enum port port)
 {
@@ -5313,7 +5103,7 @@
 	struct edp_power_seq cur, vbt, spec,
 		*final = &intel_dp->pps_delays;
 	u32 pp_on, pp_off, pp_div = 0, pp_ctl = 0;
-	int pp_ctrl_reg, pp_on_reg, pp_off_reg, pp_div_reg = 0;
+	i915_reg_t pp_ctrl_reg, pp_on_reg, pp_off_reg, pp_div_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -5435,7 +5225,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp_on, pp_off, pp_div, port_sel = 0;
 	int div = HAS_PCH_SPLIT(dev) ? intel_pch_rawclk(dev) : intel_hrawclk(dev);
-	int pp_on_reg, pp_off_reg, pp_div_reg = 0, pp_ctrl_reg;
+	i915_reg_t pp_on_reg, pp_off_reg, pp_div_reg, pp_ctrl_reg;
 	enum port port = dp_to_dig_port(intel_dp)->port;
 	const struct edp_power_seq *seq = &intel_dp->pps_delays;
 
@@ -5597,7 +5387,7 @@
 			DRM_ERROR("Unsupported refreshrate type\n");
 		}
 	} else if (INTEL_INFO(dev)->gen > 6) {
-		u32 reg = PIPECONF(intel_crtc->config->cpu_transcoder);
+		i915_reg_t reg = PIPECONF(intel_crtc->config->cpu_transcoder);
 		u32 val;
 
 		val = I915_READ(reg);
@@ -6015,7 +5805,7 @@
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum port port = intel_dig_port->port;
-	int type;
+	int type, ret;
 
 	intel_dp->pps_pipe = INVALID_PIPE;
 
@@ -6036,6 +5826,9 @@
 	else
 		intel_dp->get_aux_send_ctl = i9xx_get_aux_send_ctl;
 
+	if (HAS_DDI(dev))
+		intel_dp->prepare_link_retrain = intel_ddi_prepare_link_retrain;
+
 	/* Preserve the current hw state. */
 	intel_dp->DP = I915_READ(intel_dp->output_reg);
 	intel_dp->attached_connector = intel_connector;
@@ -6087,7 +5880,7 @@
 		break;
 	case PORT_B:
 		intel_encoder->hpd_pin = HPD_PORT_B;
-		if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0))
+		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 			intel_encoder->hpd_pin = HPD_PORT_A;
 		break;
 	case PORT_C:
@@ -6113,7 +5906,9 @@
 		pps_unlock(intel_dp);
 	}
 
-	intel_dp_aux_init(intel_dp, intel_connector);
+	ret = intel_dp_aux_init(intel_dp, intel_connector);
+	if (ret)
+		goto fail;
 
 	/* init MST on ports that can support it */
 	if (HAS_DP_MST(dev) &&
@@ -6122,20 +5917,9 @@
 					  intel_connector->base.base.id);
 
 	if (!intel_edp_init_connector(intel_dp, intel_connector)) {
-		drm_dp_aux_unregister(&intel_dp->aux);
-		if (is_edp(intel_dp)) {
-			cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-			/*
-			 * vdd might still be enabled do to the delayed vdd off.
-			 * Make sure vdd is actually turned off here.
-			 */
-			pps_lock(intel_dp);
-			edp_panel_vdd_off_sync(intel_dp);
-			pps_unlock(intel_dp);
-		}
-		drm_connector_unregister(connector);
-		drm_connector_cleanup(connector);
-		return false;
+		intel_dp_aux_fini(intel_dp);
+		intel_dp_mst_encoder_cleanup(intel_dig_port);
+		goto fail;
 	}
 
 	intel_dp_add_properties(intel_dp, connector);
@@ -6152,10 +5936,27 @@
 	i915_debugfs_connector_add(connector);
 
 	return true;
+
+fail:
+	if (is_edp(intel_dp)) {
+		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
+		/*
+		 * vdd might still be enabled do to the delayed vdd off.
+		 * Make sure vdd is actually turned off here.
+		 */
+		pps_lock(intel_dp);
+		edp_panel_vdd_off_sync(intel_dp);
+		pps_unlock(intel_dp);
+	}
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+
+	return false;
 }
 
 void
-intel_dp_init(struct drm_device *dev, int output_reg, enum port port)
+intel_dp_init(struct drm_device *dev,
+	      i915_reg_t output_reg, enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_digital_port *intel_dig_port;
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
new file mode 100644
index 0000000..8888793
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "intel_drv.h"
+
+static void
+intel_get_adjust_train(struct intel_dp *intel_dp,
+		       const uint8_t link_status[DP_LINK_STATUS_SIZE])
+{
+	uint8_t v = 0;
+	uint8_t p = 0;
+	int lane;
+	uint8_t voltage_max;
+	uint8_t preemph_max;
+
+	for (lane = 0; lane < intel_dp->lane_count; lane++) {
+		uint8_t this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
+		uint8_t this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
+
+		if (this_v > v)
+			v = this_v;
+		if (this_p > p)
+			p = this_p;
+	}
+
+	voltage_max = intel_dp_voltage_max(intel_dp);
+	if (v >= voltage_max)
+		v = voltage_max | DP_TRAIN_MAX_SWING_REACHED;
+
+	preemph_max = intel_dp_pre_emphasis_max(intel_dp, v);
+	if (p >= preemph_max)
+		p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
+
+	for (lane = 0; lane < 4; lane++)
+		intel_dp->train_set[lane] = v | p;
+}
+
+static bool
+intel_dp_set_link_train(struct intel_dp *intel_dp,
+			uint8_t dp_train_pat)
+{
+	uint8_t buf[sizeof(intel_dp->train_set) + 1];
+	int ret, len;
+
+	intel_dp_program_link_training_pattern(intel_dp, dp_train_pat);
+
+	buf[0] = dp_train_pat;
+	if ((dp_train_pat & DP_TRAINING_PATTERN_MASK) ==
+	    DP_TRAINING_PATTERN_DISABLE) {
+		/* don't write DP_TRAINING_LANEx_SET on disable */
+		len = 1;
+	} else {
+		/* DP_TRAINING_LANEx_SET follow DP_TRAINING_PATTERN_SET */
+		memcpy(buf + 1, intel_dp->train_set, intel_dp->lane_count);
+		len = intel_dp->lane_count + 1;
+	}
+
+	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_PATTERN_SET,
+				buf, len);
+
+	return ret == len;
+}
+
+static bool
+intel_dp_reset_link_train(struct intel_dp *intel_dp,
+			uint8_t dp_train_pat)
+{
+	if (!intel_dp->train_set_valid)
+		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+	intel_dp_set_signal_levels(intel_dp);
+	return intel_dp_set_link_train(intel_dp, dp_train_pat);
+}
+
+static bool
+intel_dp_update_link_train(struct intel_dp *intel_dp)
+{
+	int ret;
+
+	intel_dp_set_signal_levels(intel_dp);
+
+	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET,
+				intel_dp->train_set, intel_dp->lane_count);
+
+	return ret == intel_dp->lane_count;
+}
+
+/* Enable corresponding port and start training pattern 1 */
+static void
+intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
+{
+	int i;
+	uint8_t voltage;
+	int voltage_tries, loop_tries;
+	uint8_t link_config[2];
+	uint8_t link_bw, rate_select;
+
+	if (intel_dp->prepare_link_retrain)
+		intel_dp->prepare_link_retrain(intel_dp);
+
+	intel_dp_compute_rate(intel_dp, intel_dp->link_rate,
+			      &link_bw, &rate_select);
+
+	/* Write the link configuration data */
+	link_config[0] = link_bw;
+	link_config[1] = intel_dp->lane_count;
+	if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
+		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
+	if (intel_dp->num_sink_rates)
+		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
+				  &rate_select, 1);
+
+	link_config[0] = 0;
+	link_config[1] = DP_SET_ANSI_8B10B;
+	drm_dp_dpcd_write(&intel_dp->aux, DP_DOWNSPREAD_CTRL, link_config, 2);
+
+	intel_dp->DP |= DP_PORT_EN;
+
+	/* clock recovery */
+	if (!intel_dp_reset_link_train(intel_dp,
+				       DP_TRAINING_PATTERN_1 |
+				       DP_LINK_SCRAMBLING_DISABLE)) {
+		DRM_ERROR("failed to enable link training\n");
+		return;
+	}
+
+	voltage = 0xff;
+	voltage_tries = 0;
+	loop_tries = 0;
+	for (;;) {
+		uint8_t link_status[DP_LINK_STATUS_SIZE];
+
+		drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd);
+		if (!intel_dp_get_link_status(intel_dp, link_status)) {
+			DRM_ERROR("failed to get link status\n");
+			break;
+		}
+
+		if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
+			DRM_DEBUG_KMS("clock recovery OK\n");
+			break;
+		}
+
+		/*
+		 * if we used previously trained voltage and pre-emphasis values
+		 * and we don't get clock recovery, reset link training values
+		 */
+		if (intel_dp->train_set_valid) {
+			DRM_DEBUG_KMS("clock recovery not ok, reset");
+			/* clear the flag as we are not reusing train set */
+			intel_dp->train_set_valid = false;
+			if (!intel_dp_reset_link_train(intel_dp,
+						       DP_TRAINING_PATTERN_1 |
+						       DP_LINK_SCRAMBLING_DISABLE)) {
+				DRM_ERROR("failed to enable link training\n");
+				return;
+			}
+			continue;
+		}
+
+		/* Check to see if we've tried the max voltage */
+		for (i = 0; i < intel_dp->lane_count; i++)
+			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
+				break;
+		if (i == intel_dp->lane_count) {
+			++loop_tries;
+			if (loop_tries == 5) {
+				DRM_ERROR("too many full retries, give up\n");
+				break;
+			}
+			intel_dp_reset_link_train(intel_dp,
+						  DP_TRAINING_PATTERN_1 |
+						  DP_LINK_SCRAMBLING_DISABLE);
+			voltage_tries = 0;
+			continue;
+		}
+
+		/* Check to see if we've tried the same voltage 5 times */
+		if ((intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
+			++voltage_tries;
+			if (voltage_tries == 5) {
+				DRM_ERROR("too many voltage retries, give up\n");
+				break;
+			}
+		} else
+			voltage_tries = 0;
+		voltage = intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+
+		/* Update training set as requested by target */
+		intel_get_adjust_train(intel_dp, link_status);
+		if (!intel_dp_update_link_train(intel_dp)) {
+			DRM_ERROR("failed to update link training\n");
+			break;
+		}
+	}
+}
+
+static void
+intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
+{
+	bool channel_eq = false;
+	int tries, cr_tries;
+	uint32_t training_pattern = DP_TRAINING_PATTERN_2;
+
+	/*
+	 * Training Pattern 3 for HBR2 or 1.2 devices that support it.
+	 *
+	 * Intel platforms that support HBR2 also support TPS3. TPS3 support is
+	 * also mandatory for downstream devices that support HBR2.
+	 *
+	 * Due to WaDisableHBR2 SKL < B0 is the only exception where TPS3 is
+	 * supported but still not enabled.
+	 */
+	if (intel_dp_source_supports_hbr2(intel_dp) &&
+	    drm_dp_tps3_supported(intel_dp->dpcd))
+		training_pattern = DP_TRAINING_PATTERN_3;
+	else if (intel_dp->link_rate == 540000)
+		DRM_ERROR("5.4 Gbps link rate without HBR2/TPS3 support\n");
+
+	/* channel equalization */
+	if (!intel_dp_set_link_train(intel_dp,
+				     training_pattern |
+				     DP_LINK_SCRAMBLING_DISABLE)) {
+		DRM_ERROR("failed to start channel equalization\n");
+		return;
+	}
+
+	tries = 0;
+	cr_tries = 0;
+	channel_eq = false;
+	for (;;) {
+		uint8_t link_status[DP_LINK_STATUS_SIZE];
+
+		if (cr_tries > 5) {
+			DRM_ERROR("failed to train DP, aborting\n");
+			break;
+		}
+
+		drm_dp_link_train_channel_eq_delay(intel_dp->dpcd);
+		if (!intel_dp_get_link_status(intel_dp, link_status)) {
+			DRM_ERROR("failed to get link status\n");
+			break;
+		}
+
+		/* Make sure clock is still ok */
+		if (!drm_dp_clock_recovery_ok(link_status,
+					      intel_dp->lane_count)) {
+			intel_dp->train_set_valid = false;
+			intel_dp_link_training_clock_recovery(intel_dp);
+			intel_dp_set_link_train(intel_dp,
+						training_pattern |
+						DP_LINK_SCRAMBLING_DISABLE);
+			cr_tries++;
+			continue;
+		}
+
+		if (drm_dp_channel_eq_ok(link_status,
+					 intel_dp->lane_count)) {
+			channel_eq = true;
+			break;
+		}
+
+		/* Try 5 times, then try clock recovery if that fails */
+		if (tries > 5) {
+			intel_dp->train_set_valid = false;
+			intel_dp_link_training_clock_recovery(intel_dp);
+			intel_dp_set_link_train(intel_dp,
+						training_pattern |
+						DP_LINK_SCRAMBLING_DISABLE);
+			tries = 0;
+			cr_tries++;
+			continue;
+		}
+
+		/* Update training set as requested by target */
+		intel_get_adjust_train(intel_dp, link_status);
+		if (!intel_dp_update_link_train(intel_dp)) {
+			DRM_ERROR("failed to update link training\n");
+			break;
+		}
+		++tries;
+	}
+
+	intel_dp_set_idle_link_train(intel_dp);
+
+	if (channel_eq) {
+		intel_dp->train_set_valid = true;
+		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
+	}
+}
+
+void intel_dp_stop_link_train(struct intel_dp *intel_dp)
+{
+	intel_dp_set_link_train(intel_dp,
+				DP_TRAINING_PATTERN_DISABLE);
+}
+
+void
+intel_dp_start_link_train(struct intel_dp *intel_dp)
+{
+	intel_dp_link_training_clock_recovery(intel_dp);
+	intel_dp_link_training_channel_equalization(intel_dp);
+}
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 0639275..8c4e7df 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -173,20 +173,14 @@
 	intel_mst->port = found->port;
 
 	if (intel_dp->active_mst_links == 0) {
-		enum port port = intel_ddi_get_encoder_port(encoder);
+		intel_ddi_clk_select(encoder, intel_crtc->config);
 
 		intel_dp_set_link_params(intel_dp, intel_crtc->config);
 
-		/* FIXME: add support for SKL */
-		if (INTEL_INFO(dev)->gen < 9)
-			I915_WRITE(PORT_CLK_SEL(port),
-				   intel_crtc->config->ddi_pll_sel);
-
 		intel_ddi_init_dp_buf_reg(&intel_dig_port->base);
 
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 
-
 		intel_dp_start_link_train(intel_dp);
 		intel_dp_stop_link_train(intel_dp);
 	}
@@ -414,7 +408,10 @@
 {
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	drm_fb_helper_add_one_connector(&dev_priv->fbdev->helper, &connector->base);
+
+	if (dev_priv->fbdev)
+		drm_fb_helper_add_one_connector(&dev_priv->fbdev->helper,
+						&connector->base);
 #endif
 }
 
@@ -422,7 +419,10 @@
 {
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	drm_fb_helper_remove_one_connector(&dev_priv->fbdev->helper, &connector->base);
+
+	if (dev_priv->fbdev)
+		drm_fb_helper_remove_one_connector(&dev_priv->fbdev->helper,
+						   &connector->base);
 #endif
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d792665..ab5c147 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -248,6 +248,7 @@
 	unsigned int cdclk;
 	bool dpll_set;
 	struct intel_shared_dpll_config shared_dpll[I915_NUM_PLLS];
+	struct intel_wm_config wm_config;
 };
 
 struct intel_plane_state {
@@ -278,6 +279,9 @@
 	int scaler_id;
 
 	struct drm_intel_sprite_colorkey ckey;
+
+	/* async flip related structures */
+	struct drm_i915_gem_request *wait_req;
 };
 
 struct intel_initial_plane_config {
@@ -332,6 +336,21 @@
 /* drm_mode->private_flags */
 #define I915_MODE_FLAG_INHERITED 1
 
+struct intel_pipe_wm {
+	struct intel_wm_level wm[5];
+	uint32_t linetime;
+	bool fbc_wm_enabled;
+	bool pipe_enabled;
+	bool sprites_enabled;
+	bool sprites_scaled;
+};
+
+struct skl_pipe_wm {
+	struct skl_wm_level wm[8];
+	struct skl_wm_level trans_wm;
+	uint32_t linetime;
+};
+
 struct intel_crtc_state {
 	struct drm_crtc_state base;
 
@@ -466,6 +485,20 @@
 
 	/* w/a for waiting 2 vblanks during crtc enable */
 	enum pipe hsw_workaround_pipe;
+
+	/* IVB sprite scaling w/a (WaCxSRDisabledForSpriteScaling:ivb) */
+	bool disable_lp_wm;
+
+	struct {
+		/*
+		 * optimal watermarks, programmed post-vblank when this state
+		 * is committed
+		 */
+		union {
+			struct intel_pipe_wm ilk;
+			struct skl_pipe_wm skl;
+		} optimal;
+	} wm;
 };
 
 struct vlv_wm_state {
@@ -477,26 +510,12 @@
 	bool cxsr;
 };
 
-struct intel_pipe_wm {
-	struct intel_wm_level wm[5];
-	uint32_t linetime;
-	bool fbc_wm_enabled;
-	bool pipe_enabled;
-	bool sprites_enabled;
-	bool sprites_scaled;
-};
-
 struct intel_mmio_flip {
 	struct work_struct work;
 	struct drm_i915_private *i915;
 	struct drm_i915_gem_request *req;
 	struct intel_crtc *crtc;
-};
-
-struct skl_pipe_wm {
-	struct skl_wm_level wm[8];
-	struct skl_wm_level trans_wm;
-	uint32_t linetime;
+	unsigned int rotation;
 };
 
 /*
@@ -507,13 +526,11 @@
  */
 struct intel_crtc_atomic_commit {
 	/* Sleepable operations to perform before commit */
-	bool wait_for_flips;
 	bool disable_fbc;
 	bool disable_ips;
 	bool disable_cxsr;
 	bool pre_disable_primary;
 	bool update_wm_pre, update_wm_post;
-	unsigned disabled_planes;
 
 	/* Sleepable operations to perform after commit */
 	unsigned fb_bits;
@@ -566,9 +583,10 @@
 	/* per-pipe watermark state */
 	struct {
 		/* watermarks currently being used  */
-		struct intel_pipe_wm active;
-		/* SKL wm values currently in use */
-		struct skl_pipe_wm skl_active;
+		union {
+			struct intel_pipe_wm ilk;
+			struct skl_pipe_wm skl;
+		} active;
 		/* allow CxSR on this pipe */
 		bool cxsr_allowed;
 	} wm;
@@ -676,7 +694,7 @@
 #define intel_fb_obj(x) (x ? to_intel_framebuffer(x)->obj : NULL)
 
 struct intel_hdmi {
-	u32 hdmi_reg;
+	i915_reg_t hdmi_reg;
 	int ddc_bus;
 	bool limited_color_range;
 	bool color_range_auto;
@@ -718,15 +736,10 @@
 	M2_N2
 };
 
-struct sink_crc {
-	bool started;
-	u8 last_crc[6];
-	int last_count;
-};
-
 struct intel_dp {
-	uint32_t output_reg;
-	uint32_t aux_ch_ctl_reg;
+	i915_reg_t output_reg;
+	i915_reg_t aux_ch_ctl_reg;
+	i915_reg_t aux_ch_data_reg[5];
 	uint32_t DP;
 	int link_rate;
 	uint8_t lane_count;
@@ -740,7 +753,6 @@
 	/* sink rates as reported by DP_SUPPORTED_LINK_RATES */
 	uint8_t num_sink_rates;
 	int sink_rates[DP_MAX_SUPPORTED_RATES];
-	struct sink_crc sink_crc;
 	struct drm_dp_aux aux;
 	uint8_t train_set[4];
 	int panel_power_up_delay;
@@ -782,6 +794,10 @@
 				     bool has_aux_irq,
 				     int send_bytes,
 				     uint32_t aux_clock_divider);
+
+	/* This is called before a link training is starterd */
+	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
+
 	bool train_set_valid;
 
 	/* Displayport compliance testing */
@@ -941,7 +957,8 @@
 					 enum pipe pipe);
 void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
 					 enum transcoder pch_transcoder);
-void i9xx_check_fifo_underruns(struct drm_i915_private *dev_priv);
+void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv);
+void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
 
 /* i915_irq.c */
 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
@@ -972,6 +989,8 @@
 
 
 /* intel_ddi.c */
+void intel_ddi_clk_select(struct intel_encoder *encoder,
+			  const struct intel_crtc_state *pipe_config);
 void intel_prepare_ddi(struct drm_device *dev);
 void hsw_fdi_link_train(struct drm_crtc *crtc);
 void intel_ddi_init(struct drm_device *dev, enum port port);
@@ -986,7 +1005,7 @@
 bool intel_ddi_pll_select(struct intel_crtc *crtc,
 			  struct intel_crtc_state *crtc_state);
 void intel_ddi_set_pipe_settings(struct drm_crtc *crtc);
-void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder);
+void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_fdi_disable(struct drm_crtc *crtc);
 void intel_ddi_get_config(struct intel_encoder *encoder,
@@ -1054,6 +1073,15 @@
 {
 	drm_wait_one_vblank(dev, pipe);
 }
+static inline void
+intel_wait_for_vblank_if_active(struct drm_device *dev, int pipe)
+{
+	const struct intel_crtc *crtc =
+		to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
+
+	if (crtc->active)
+		intel_wait_for_vblank(dev, pipe);
+}
 int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp);
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 			 struct intel_digital_port *dport,
@@ -1067,9 +1095,7 @@
 				    struct drm_modeset_acquire_ctx *ctx);
 int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 			       struct drm_framebuffer *fb,
-			       const struct drm_plane_state *plane_state,
-			       struct intel_engine_cs *pipelined,
-			       struct drm_i915_gem_request **pipelined_request);
+			       const struct drm_plane_state *plane_state);
 struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
 			   struct drm_mode_fb_cmd2 *mode_cmd,
@@ -1150,7 +1176,10 @@
 void bxt_enable_dc9(struct drm_i915_private *dev_priv);
 void bxt_disable_dc9(struct drm_i915_private *dev_priv);
 void skl_init_cdclk(struct drm_i915_private *dev_priv);
+int skl_sanitize_cdclk(struct drm_i915_private *dev_priv);
 void skl_uninit_cdclk(struct drm_i915_private *dev_priv);
+void skl_enable_dc6(struct drm_i915_private *dev_priv);
+void skl_disable_dc6(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n);
@@ -1167,33 +1196,30 @@
 void hsw_disable_ips(struct intel_crtc *crtc);
 enum intel_display_power_domain
 intel_display_port_power_domain(struct intel_encoder *intel_encoder);
+enum intel_display_power_domain
+intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder);
 void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_state *pipe_config);
-void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc);
 void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file);
 
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
 
-unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane,
-				     struct drm_i915_gem_object *obj,
-				     unsigned int plane);
+u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
+			   struct drm_i915_gem_object *obj,
+			   unsigned int plane);
 
 u32 skl_plane_ctl_format(uint32_t pixel_format);
 u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
 u32 skl_plane_ctl_rotation(unsigned int rotation);
 
 /* intel_csr.c */
-void intel_csr_ucode_init(struct drm_device *dev);
-enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv);
-void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
-					enum csr_state state);
-void intel_csr_load_program(struct drm_device *dev);
-void intel_csr_ucode_fini(struct drm_device *dev);
-void assert_csr_loaded(struct drm_i915_private *dev_priv);
+void intel_csr_ucode_init(struct drm_i915_private *);
+void intel_csr_load_program(struct drm_i915_private *);
+void intel_csr_ucode_fini(struct drm_i915_private *);
 
 /* intel_dp.c */
-void intel_dp_init(struct drm_device *dev, int output_reg, enum port port);
+void intel_dp_init(struct drm_device *dev, i915_reg_t output_reg, enum port port);
 bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 			     struct intel_connector *intel_connector);
 void intel_dp_set_link_params(struct intel_dp *intel_dp,
@@ -1231,6 +1257,22 @@
 					 struct intel_digital_port *port);
 void hsw_dp_set_ddi_pll_sel(struct intel_crtc_state *pipe_config);
 
+void
+intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
+				       uint8_t dp_train_pat);
+void
+intel_dp_set_signal_levels(struct intel_dp *intel_dp);
+void intel_dp_set_idle_link_train(struct intel_dp *intel_dp);
+uint8_t
+intel_dp_voltage_max(struct intel_dp *intel_dp);
+uint8_t
+intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing);
+void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
+			   uint8_t *link_bw, uint8_t *rate_select);
+bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp);
+bool
+intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE]);
+
 /* intel_dp_mst.c */
 int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id);
 void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port);
@@ -1245,7 +1287,7 @@
 /* legacy fbdev emulation in intel_fbdev.c */
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 extern int intel_fbdev_init(struct drm_device *dev);
-extern void intel_fbdev_initial_config(void *data, async_cookie_t cookie);
+extern void intel_fbdev_initial_config_async(struct drm_device *dev);
 extern void intel_fbdev_fini(struct drm_device *dev);
 extern void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous);
 extern void intel_fbdev_output_poll_changed(struct drm_device *dev);
@@ -1256,7 +1298,7 @@
 	return 0;
 }
 
-static inline void intel_fbdev_initial_config(void *data, async_cookie_t cookie)
+static inline void intel_fbdev_initial_config_async(struct drm_device *dev)
 {
 }
 
@@ -1284,11 +1326,10 @@
 			  enum fb_op_origin origin);
 void intel_fbc_flush(struct drm_i915_private *dev_priv,
 		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
-const char *intel_no_fbc_reason_str(enum no_fbc_reason reason);
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
 
 /* intel_hdmi.c */
-void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
+void intel_hdmi_init(struct drm_device *dev, i915_reg_t hdmi_reg, enum port port);
 void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 			       struct intel_connector *intel_connector);
 struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
@@ -1364,7 +1405,10 @@
 /* intel_runtime_pm.c */
 int intel_power_domains_init(struct drm_i915_private *);
 void intel_power_domains_fini(struct drm_i915_private *);
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv);
+void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
+void intel_power_domains_suspend(struct drm_i915_private *dev_priv);
+void skl_pw1_misc_io_init(struct drm_i915_private *dev_priv);
+void skl_pw1_misc_io_fini(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
 
 bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
@@ -1375,8 +1419,6 @@
 			     enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
-void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
-void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
@@ -1394,12 +1436,6 @@
 void intel_suspend_hw(struct drm_device *dev);
 int ilk_wm_max_level(const struct drm_device *dev);
 void intel_update_watermarks(struct drm_crtc *crtc);
-void intel_update_sprite_watermarks(struct drm_plane *plane,
-				    struct drm_crtc *crtc,
-				    uint32_t sprite_width,
-				    uint32_t sprite_height,
-				    int pixel_size,
-				    bool enabled, bool scaled);
 void intel_init_pm(struct drm_device *dev);
 void intel_pm_setup(struct drm_device *dev);
 void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
@@ -1427,7 +1463,8 @@
 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
 
 /* intel_sdvo.c */
-bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob);
+bool intel_sdvo_init(struct drm_device *dev,
+		     i915_reg_t reg, enum port port);
 
 
 /* intel_sprite.c */
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 170ae6f..efb5a27 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -60,7 +60,8 @@
 		DRM_ERROR("DPI FIFOs are not empty\n");
 }
 
-static void write_data(struct drm_i915_private *dev_priv, u32 reg,
+static void write_data(struct drm_i915_private *dev_priv,
+		       i915_reg_t reg,
 		       const u8 *data, u32 len)
 {
 	u32 i, j;
@@ -75,7 +76,8 @@
 	}
 }
 
-static void read_data(struct drm_i915_private *dev_priv, u32 reg,
+static void read_data(struct drm_i915_private *dev_priv,
+		      i915_reg_t reg,
 		      u8 *data, u32 len)
 {
 	u32 i, j;
@@ -98,7 +100,8 @@
 	struct mipi_dsi_packet packet;
 	ssize_t ret;
 	const u8 *header, *data;
-	u32 data_reg, data_mask, ctrl_reg, ctrl_mask;
+	i915_reg_t data_reg, ctrl_reg;
+	u32 data_mask, ctrl_mask;
 
 	ret = mipi_dsi_create_packet(&packet, msg);
 	if (ret < 0)
@@ -377,10 +380,10 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
-	u32 temp;
-	u32 port_ctrl;
 
 	if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) {
+		u32 temp;
+
 		temp = I915_READ(VLV_CHICKEN_3);
 		temp &= ~PIXEL_OVERLAP_CNT_MASK |
 					intel_dsi->pixel_overlap <<
@@ -389,8 +392,9 @@
 	}
 
 	for_each_dsi_port(port, intel_dsi->ports) {
-		port_ctrl = IS_BROXTON(dev) ? BXT_MIPI_PORT_CTRL(port) :
-						MIPI_PORT_CTRL(port);
+		i915_reg_t port_ctrl = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		u32 temp;
 
 		temp = I915_READ(port_ctrl);
 
@@ -416,13 +420,13 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
-	u32 temp;
-	u32 port_ctrl;
 
 	for_each_dsi_port(port, intel_dsi->ports) {
+		i915_reg_t port_ctrl = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		u32 temp;
+
 		/* de-assert ip_tg_enable signal */
-		port_ctrl = IS_BROXTON(dev) ? BXT_MIPI_PORT_CTRL(port) :
-						MIPI_PORT_CTRL(port);
 		temp = I915_READ(port_ctrl);
 		I915_WRITE(port_ctrl, temp & ~DPI_ENABLE);
 		POSTING_READ(port_ctrl);
@@ -580,11 +584,13 @@
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
-	u32 val;
-	u32 port_ctrl = 0;
 
 	DRM_DEBUG_KMS("\n");
 	for_each_dsi_port(port, intel_dsi->ports) {
+		/* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */
+		i915_reg_t port_ctrl = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A);
+		u32 val;
 
 		I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY |
 							ULPS_STATE_ENTER);
@@ -598,12 +604,6 @@
 							ULPS_STATE_ENTER);
 		usleep_range(2000, 2500);
 
-		if (IS_BROXTON(dev))
-			port_ctrl = BXT_MIPI_PORT_CTRL(port);
-		else if (IS_VALLEYVIEW(dev))
-			/* Common bit for both MIPI Port A & MIPI Port C */
-			port_ctrl = MIPI_PORT_CTRL(PORT_A);
-
 		/* Wait till Clock lanes are in LP-00 state for MIPI Port A
 		 * only. MIPI Port C has no similar bit for checking
 		 */
@@ -656,7 +656,6 @@
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	struct drm_device *dev = encoder->base.dev;
 	enum intel_display_power_domain power_domain;
-	u32 dpi_enabled, func, ctrl_reg;
 	enum port port;
 
 	DRM_DEBUG_KMS("\n");
@@ -667,9 +666,11 @@
 
 	/* XXX: this only works for one DSI output */
 	for_each_dsi_port(port, intel_dsi->ports) {
+		i915_reg_t ctrl_reg = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		u32 dpi_enabled, func;
+
 		func = I915_READ(MIPI_DSI_FUNC_PRG(port));
-		ctrl_reg = IS_BROXTON(dev) ? BXT_MIPI_PORT_CTRL(port) :
-						MIPI_PORT_CTRL(port);
 		dpi_enabled = I915_READ(ctrl_reg) & DPI_ENABLE;
 
 		/* Due to some hardware limitations on BYT, MIPI Port C DPI
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 8492053..7161deb 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -44,6 +44,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "sil164",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = SIL164_ADDR,
 		.dev_ops = &sil164_ops,
 	},
@@ -51,6 +52,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "ch7xxx",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = CH7xxx_ADDR,
 		.dev_ops = &ch7xxx_ops,
 	},
@@ -58,6 +60,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "ch7xxx",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = 0x75, /* For some ch7010 */
 		.dev_ops = &ch7xxx_ops,
 	},
@@ -65,6 +68,7 @@
 		.type = INTEL_DVO_CHIP_LVDS,
 		.name = "ivch",
 		.dvo_reg = DVOA,
+		.dvo_srcdim_reg = DVOA_SRCDIM,
 		.slave_addr = 0x02, /* Might also be 0x44, 0x84, 0xc4 */
 		.dev_ops = &ivch_ops,
 	},
@@ -72,6 +76,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "tfp410",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = TFP410_ADDR,
 		.dev_ops = &tfp410_ops,
 	},
@@ -79,6 +84,7 @@
 		.type = INTEL_DVO_CHIP_LVDS,
 		.name = "ch7017",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = 0x75,
 		.gpio = GMBUS_PIN_DPB,
 		.dev_ops = &ch7017_ops,
@@ -87,6 +93,7 @@
 	        .type = INTEL_DVO_CHIP_TMDS,
 		.name = "ns2501",
 		.dvo_reg = DVOB,
+		.dvo_srcdim_reg = DVOB_SRCDIM,
 		.slave_addr = NS2501_ADDR,
 		.dev_ops = &ns2501_ops,
        }
@@ -171,7 +178,7 @@
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
-	u32 dvo_reg = intel_dvo->dev.dvo_reg;
+	i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg;
 	u32 temp = I915_READ(dvo_reg);
 
 	intel_dvo->dev.dev_ops->dpms(&intel_dvo->dev, false);
@@ -184,7 +191,7 @@
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
-	u32 dvo_reg = intel_dvo->dev.dvo_reg;
+	i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg;
 	u32 temp = I915_READ(dvo_reg);
 
 	intel_dvo->dev.dev_ops->mode_set(&intel_dvo->dev,
@@ -255,20 +262,8 @@
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	int pipe = crtc->pipe;
 	u32 dvo_val;
-	u32 dvo_reg = intel_dvo->dev.dvo_reg, dvo_srcdim_reg;
-
-	switch (dvo_reg) {
-	case DVOA:
-	default:
-		dvo_srcdim_reg = DVOA_SRCDIM;
-		break;
-	case DVOB:
-		dvo_srcdim_reg = DVOB_SRCDIM;
-		break;
-	case DVOC:
-		dvo_srcdim_reg = DVOC_SRCDIM;
-		break;
-	}
+	i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg;
+	i915_reg_t dvo_srcdim_reg = intel_dvo->dev.dvo_srcdim_reg;
 
 	/* Save the data order, since I don't know what it should be set to. */
 	dvo_val = I915_READ(dvo_reg) &
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index cf47352..11fc528 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -46,6 +46,11 @@
 	return dev_priv->fbc.enable_fbc != NULL;
 }
 
+static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv)
+{
+	return IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8;
+}
+
 /*
  * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the
  * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's
@@ -182,7 +187,8 @@
 	return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
 }
 
-static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
+/* This function forces a CFB recompression through the nuke operation. */
+static void intel_fbc_recompress(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);
 	POSTING_READ(MSG_FBC_REND_STATE);
@@ -231,7 +237,7 @@
 		I915_WRITE(DPFC_CPU_FENCE_OFFSET, y_offset);
 	}
 
-	intel_fbc_nuke(dev_priv);
+	intel_fbc_recompress(dev_priv);
 
 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
@@ -310,7 +316,7 @@
 		   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
 	I915_WRITE(DPFC_CPU_FENCE_OFFSET, get_crtc_fence_y_offset(crtc));
 
-	intel_fbc_nuke(dev_priv);
+	intel_fbc_recompress(dev_priv);
 
 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
@@ -370,8 +376,6 @@
 	if (dev_priv->fbc.fbc_work == NULL)
 		return;
 
-	DRM_DEBUG_KMS("cancelling pending FBC enable\n");
-
 	/* Synchronisation is provided by struct_mutex and checking of
 	 * dev_priv->fbc.fbc_work, so we can perform the cancellation
 	 * entirely asynchronously.
@@ -432,7 +436,8 @@
 
 	intel_fbc_cancel_work(dev_priv);
 
-	dev_priv->fbc.disable_fbc(dev_priv);
+	if (dev_priv->fbc.enabled)
+		dev_priv->fbc.disable_fbc(dev_priv);
 	dev_priv->fbc.crtc = NULL;
 }
 
@@ -471,78 +476,45 @@
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
-const char *intel_no_fbc_reason_str(enum no_fbc_reason reason)
-{
-	switch (reason) {
-	case FBC_OK:
-		return "FBC enabled but currently disabled in hardware";
-	case FBC_UNSUPPORTED:
-		return "unsupported by this chipset";
-	case FBC_NO_OUTPUT:
-		return "no output";
-	case FBC_STOLEN_TOO_SMALL:
-		return "not enough stolen memory";
-	case FBC_UNSUPPORTED_MODE:
-		return "mode incompatible with compression";
-	case FBC_MODE_TOO_LARGE:
-		return "mode too large for compression";
-	case FBC_BAD_PLANE:
-		return "FBC unsupported on plane";
-	case FBC_NOT_TILED:
-		return "framebuffer not tiled or fenced";
-	case FBC_MULTIPLE_PIPES:
-		return "more than one pipe active";
-	case FBC_MODULE_PARAM:
-		return "disabled per module param";
-	case FBC_CHIP_DEFAULT:
-		return "disabled per chip default";
-	case FBC_ROTATION:
-		return "rotation unsupported";
-	case FBC_IN_DBG_MASTER:
-		return "Kernel debugger is active";
-	case FBC_BAD_STRIDE:
-		return "framebuffer stride not supported";
-	case FBC_PIXEL_RATE:
-		return "pixel rate is too big";
-	case FBC_PIXEL_FORMAT:
-		return "pixel format is invalid";
-	default:
-		MISSING_CASE(reason);
-		return "unknown reason";
-	}
-}
-
 static void set_no_fbc_reason(struct drm_i915_private *dev_priv,
-			      enum no_fbc_reason reason)
+			      const char *reason)
 {
 	if (dev_priv->fbc.no_fbc_reason == reason)
 		return;
 
 	dev_priv->fbc.no_fbc_reason = reason;
-	DRM_DEBUG_KMS("Disabling FBC: %s\n", intel_no_fbc_reason_str(reason));
+	DRM_DEBUG_KMS("Disabling FBC: %s\n", reason);
+}
+
+static bool crtc_is_valid(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
+	if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A)
+		return false;
+
+	if (!intel_crtc_active(&crtc->base))
+		return false;
+
+	if (!to_intel_plane_state(crtc->base.primary->state)->visible)
+		return false;
+
+	return true;
 }
 
 static struct drm_crtc *intel_fbc_find_crtc(struct drm_i915_private *dev_priv)
 {
 	struct drm_crtc *crtc = NULL, *tmp_crtc;
 	enum pipe pipe;
-	bool pipe_a_only = false;
-
-	if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
-		pipe_a_only = true;
 
 	for_each_pipe(dev_priv, pipe) {
 		tmp_crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 
-		if (intel_crtc_active(tmp_crtc) &&
-		    to_intel_plane_state(tmp_crtc->primary->state)->visible)
+		if (crtc_is_valid(to_intel_crtc(tmp_crtc)))
 			crtc = tmp_crtc;
-
-		if (pipe_a_only)
-			break;
 	}
 
-	if (!crtc || crtc->primary->fb == NULL)
+	if (!crtc)
 		return NULL;
 
 	return crtc;
@@ -581,7 +553,8 @@
 	 * reserved range size, so it always assumes the maximum (8mb) is used.
 	 * If we enable FBC using a CFB on that memory range we'll get FIFO
 	 * underruns, even if that range is not reserved by the BIOS. */
-	if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+	if (IS_BROADWELL(dev_priv) ||
+	    IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		end = dev_priv->gtt.stolen_size - 8 * 1024 * 1024;
 	else
 		end = dev_priv->gtt.stolen_usable_size;
@@ -734,6 +707,7 @@
 	if (INTEL_INFO(dev_priv)->gen >= 7)
 		lines = min(lines, 2048);
 
+	/* Hardware needs the full buffer stride, not just the active area. */
 	return lines * fb->pitches[0];
 }
 
@@ -832,84 +806,62 @@
  * __intel_fbc_update - enable/disable FBC as needed, unlocked
  * @dev_priv: i915 device instance
  *
- * Set up the framebuffer compression hardware at mode set time.  We
- * enable it if possible:
- *   - plane A only (on pre-965)
- *   - no pixel mulitply/line duplication
- *   - no alpha buffer discard
- *   - no dual wide
- *   - framebuffer <= max_hdisplay in width, max_vdisplay in height
- *
- * We can't assume that any compression will take place (worst case),
- * so the compressed buffer has to be the same size as the uncompressed
- * one.  It also must reside (along with the line length buffer) in
- * stolen memory.
- *
- * We need to enable/disable FBC on a global basis.
+ * This function completely reevaluates the status of FBC, then enables,
+ * disables or maintains it on the same state.
  */
 static void __intel_fbc_update(struct drm_i915_private *dev_priv)
 {
-	struct drm_crtc *crtc = NULL;
-	struct intel_crtc *intel_crtc;
+	struct drm_crtc *drm_crtc = NULL;
+	struct intel_crtc *crtc;
 	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	const struct drm_display_mode *adjusted_mode;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
 
-	/* disable framebuffer compression in vGPU */
 	if (intel_vgpu_active(dev_priv->dev))
 		i915.enable_fbc = 0;
 
 	if (i915.enable_fbc < 0) {
-		set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT);
+		set_no_fbc_reason(dev_priv, "disabled per chip default");
 		goto out_disable;
 	}
 
 	if (!i915.enable_fbc) {
-		set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM);
+		set_no_fbc_reason(dev_priv, "disabled per module param");
 		goto out_disable;
 	}
 
-	/*
-	 * If FBC is already on, we just have to verify that we can
-	 * keep it that way...
-	 * Need to disable if:
-	 *   - more than one pipe is active
-	 *   - changing FBC params (stride, fence, mode)
-	 *   - new fb is too large to fit in compressed buffer
-	 *   - going to an unsupported config (interlace, pixel multiply, etc.)
-	 */
-	crtc = intel_fbc_find_crtc(dev_priv);
-	if (!crtc) {
-		set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT);
+	drm_crtc = intel_fbc_find_crtc(dev_priv);
+	if (!drm_crtc) {
+		set_no_fbc_reason(dev_priv, "no output");
 		goto out_disable;
 	}
 
 	if (!multiple_pipes_ok(dev_priv)) {
-		set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES);
+		set_no_fbc_reason(dev_priv, "more than one pipe active");
 		goto out_disable;
 	}
 
-	intel_crtc = to_intel_crtc(crtc);
-	fb = crtc->primary->fb;
+	crtc = to_intel_crtc(drm_crtc);
+	fb = crtc->base.primary->fb;
 	obj = intel_fb_obj(fb);
-	adjusted_mode = &intel_crtc->config->base.adjusted_mode;
+	adjusted_mode = &crtc->config->base.adjusted_mode;
 
 	if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) ||
 	    (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
-		set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE);
+		set_no_fbc_reason(dev_priv, "incompatible mode");
 		goto out_disable;
 	}
 
-	if (!intel_fbc_hw_tracking_covers_screen(intel_crtc)) {
-		set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE);
+	if (!intel_fbc_hw_tracking_covers_screen(crtc)) {
+		set_no_fbc_reason(dev_priv, "mode too large for compression");
 		goto out_disable;
 	}
 
 	if ((INTEL_INFO(dev_priv)->gen < 4 || HAS_DDI(dev_priv)) &&
-	    intel_crtc->plane != PLANE_A) {
-		set_no_fbc_reason(dev_priv, FBC_BAD_PLANE);
+	    crtc->plane != PLANE_A) {
+		set_no_fbc_reason(dev_priv, "FBC unsupported on plane");
 		goto out_disable;
 	}
 
@@ -918,41 +870,35 @@
 	 */
 	if (obj->tiling_mode != I915_TILING_X ||
 	    obj->fence_reg == I915_FENCE_REG_NONE) {
-		set_no_fbc_reason(dev_priv, FBC_NOT_TILED);
+		set_no_fbc_reason(dev_priv, "framebuffer not tiled or fenced");
 		goto out_disable;
 	}
 	if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) &&
-	    crtc->primary->state->rotation != BIT(DRM_ROTATE_0)) {
-		set_no_fbc_reason(dev_priv, FBC_ROTATION);
+	    crtc->base.primary->state->rotation != BIT(DRM_ROTATE_0)) {
+		set_no_fbc_reason(dev_priv, "rotation unsupported");
 		goto out_disable;
 	}
 
 	if (!stride_is_valid(dev_priv, fb->pitches[0])) {
-		set_no_fbc_reason(dev_priv, FBC_BAD_STRIDE);
+		set_no_fbc_reason(dev_priv, "framebuffer stride not supported");
 		goto out_disable;
 	}
 
 	if (!pixel_format_is_valid(fb)) {
-		set_no_fbc_reason(dev_priv, FBC_PIXEL_FORMAT);
-		goto out_disable;
-	}
-
-	/* If the kernel debugger is active, always disable compression */
-	if (in_dbg_master()) {
-		set_no_fbc_reason(dev_priv, FBC_IN_DBG_MASTER);
+		set_no_fbc_reason(dev_priv, "pixel format is invalid");
 		goto out_disable;
 	}
 
 	/* WaFbcExceedCdClockThreshold:hsw,bdw */
 	if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) &&
-	    ilk_pipe_pixel_rate(intel_crtc->config) >=
+	    ilk_pipe_pixel_rate(crtc->config) >=
 	    dev_priv->cdclk_freq * 95 / 100) {
-		set_no_fbc_reason(dev_priv, FBC_PIXEL_RATE);
+		set_no_fbc_reason(dev_priv, "pixel rate is too big");
 		goto out_disable;
 	}
 
-	if (intel_fbc_setup_cfb(intel_crtc)) {
-		set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL);
+	if (intel_fbc_setup_cfb(crtc)) {
+		set_no_fbc_reason(dev_priv, "not enough stolen memory");
 		goto out_disable;
 	}
 
@@ -961,9 +907,9 @@
 	 * cannot be unpinned (and have its GTT offset and fence revoked)
 	 * without first being decoupled from the scanout and FBC disabled.
 	 */
-	if (dev_priv->fbc.crtc == intel_crtc &&
+	if (dev_priv->fbc.crtc == crtc &&
 	    dev_priv->fbc.fb_id == fb->base.id &&
-	    dev_priv->fbc.y == crtc->y)
+	    dev_priv->fbc.y == crtc->base.y)
 		return;
 
 	if (intel_fbc_enabled(dev_priv)) {
@@ -994,8 +940,8 @@
 		__intel_fbc_disable(dev_priv);
 	}
 
-	intel_fbc_schedule_enable(intel_crtc);
-	dev_priv->fbc.no_fbc_reason = FBC_OK;
+	intel_fbc_schedule_enable(crtc);
+	dev_priv->fbc.no_fbc_reason = "FBC enabled (not necessarily active)";
 	return;
 
 out_disable:
@@ -1085,10 +1031,10 @@
 	enum pipe pipe;
 
 	mutex_init(&dev_priv->fbc.lock);
+	dev_priv->fbc.enabled = false;
 
 	if (!HAS_FBC(dev_priv)) {
-		dev_priv->fbc.enabled = false;
-		dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;
+		dev_priv->fbc.no_fbc_reason = "unsupported by this chipset";
 		return;
 	}
 
@@ -1096,7 +1042,7 @@
 		dev_priv->fbc.possible_framebuffer_bits |=
 				INTEL_FRONTBUFFER_PRIMARY(pipe);
 
-		if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
+		if (fbc_on_pipe_a_only(dev_priv))
 			break;
 	}
 
@@ -1121,5 +1067,9 @@
 		I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
 	}
 
-	dev_priv->fbc.enabled = dev_priv->fbc.fbc_enabled(dev_priv);
+	/* We still don't have any sort of hardware state readout for FBC, so
+	 * disable it in case the BIOS enabled it to make sure software matches
+	 * the hardware state. */
+	if (dev_priv->fbc.fbc_enabled(dev_priv))
+		dev_priv->fbc.disable_fbc(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 4fd5fdf..7ccde58 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -119,7 +119,7 @@
 {
 	struct intel_fbdev *ifbdev =
 		container_of(helper, struct intel_fbdev, helper);
-	struct drm_framebuffer *fb;
+	struct drm_framebuffer *fb = NULL;
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_mode_fb_cmd2 mode_cmd = {};
@@ -138,6 +138,8 @@
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 							  sizes->surface_depth);
 
+	mutex_lock(&dev->struct_mutex);
+
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 	size = PAGE_ALIGN(size);
 
@@ -156,26 +158,28 @@
 
 	fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
 	if (IS_ERR(fb)) {
+		drm_gem_object_unreference(&obj->base);
 		ret = PTR_ERR(fb);
-		goto out_unref;
+		goto out;
 	}
 
 	/* Flush everything out, we'll be doing GTT only from now on */
-	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
+	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL);
 	if (ret) {
 		DRM_ERROR("failed to pin obj: %d\n", ret);
-		goto out_fb;
+		goto out;
 	}
 
+	mutex_unlock(&dev->struct_mutex);
+
 	ifbdev->fb = to_intel_framebuffer(fb);
 
 	return 0;
 
-out_fb:
-	drm_framebuffer_remove(fb);
-out_unref:
-	drm_gem_object_unreference(&obj->base);
 out:
+	mutex_unlock(&dev->struct_mutex);
+	if (!IS_ERR_OR_NULL(fb))
+		drm_framebuffer_unreference(fb);
 	return ret;
 }
 
@@ -193,8 +197,6 @@
 	int size, ret;
 	bool prealloc = false;
 
-	mutex_lock(&dev->struct_mutex);
-
 	if (intel_fb &&
 	    (sizes->fb_width > intel_fb->base.width ||
 	     sizes->fb_height > intel_fb->base.height)) {
@@ -209,7 +211,7 @@
 		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
 		ret = intelfb_alloc(helper, sizes);
 		if (ret)
-			goto out_unlock;
+			return ret;
 		intel_fb = ifbdev->fb;
 	} else {
 		DRM_DEBUG_KMS("re-using BIOS fb\n");
@@ -221,8 +223,11 @@
 	obj = intel_fb->obj;
 	size = obj->base.size;
 
+	mutex_lock(&dev->struct_mutex);
+
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
+		DRM_ERROR("Failed to allocate fb_info\n");
 		ret = PTR_ERR(info);
 		goto out_unpin;
 	}
@@ -249,6 +254,7 @@
 		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
 			   size);
 	if (!info->screen_base) {
+		DRM_ERROR("Failed to remap framebuffer into virtual memory\n");
 		ret = -ENOSPC;
 		goto out_destroy_fbi;
 	}
@@ -281,8 +287,6 @@
 	drm_fb_helper_release_fbi(helper);
 out_unpin:
 	i915_gem_object_ggtt_unpin(obj);
-	drm_gem_object_unreference(&obj->base);
-out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
@@ -526,8 +530,10 @@
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	drm_framebuffer_unregister_private(&ifbdev->fb->base);
-	drm_framebuffer_remove(&ifbdev->fb->base);
+	if (ifbdev->fb) {
+		drm_framebuffer_unregister_private(&ifbdev->fb->base);
+		drm_framebuffer_remove(&ifbdev->fb->base);
+	}
 }
 
 /*
@@ -702,13 +708,20 @@
 	return 0;
 }
 
-void intel_fbdev_initial_config(void *data, async_cookie_t cookie)
+static void intel_fbdev_initial_config(void *data, async_cookie_t cookie)
 {
 	struct drm_i915_private *dev_priv = data;
 	struct intel_fbdev *ifbdev = dev_priv->fbdev;
 
 	/* Due to peculiar init order wrt to hpd handling this is separate. */
-	drm_fb_helper_initial_config(&ifbdev->helper, ifbdev->preferred_bpp);
+	if (drm_fb_helper_initial_config(&ifbdev->helper,
+					 ifbdev->preferred_bpp))
+		intel_fbdev_fini(dev_priv->dev);
+}
+
+void intel_fbdev_initial_config_async(struct drm_device *dev)
+{
+	async_schedule(intel_fbdev_initial_config, to_i915(dev));
 }
 
 void intel_fbdev_fini(struct drm_device *dev)
@@ -719,7 +732,8 @@
 
 	flush_work(&dev_priv->fbdev_suspend_work);
 
-	async_synchronize_full();
+	if (!current_is_async())
+		async_synchronize_full();
 	intel_fbdev_destroy(dev, dev_priv->fbdev);
 	kfree(dev_priv->fbdev);
 	dev_priv->fbdev = NULL;
diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c
index 54daa66..7ae182d 100644
--- a/drivers/gpu/drm/i915/intel_fifo_underrun.c
+++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c
@@ -84,38 +84,21 @@
 	return true;
 }
 
-/**
- * i9xx_check_fifo_underruns - check for fifo underruns
- * @dev_priv: i915 device instance
- *
- * This function checks for fifo underruns on GMCH platforms. This needs to be
- * done manually on modeset to make sure that we catch all underruns since they
- * do not generate an interrupt by themselves on these platforms.
- */
-void i9xx_check_fifo_underruns(struct drm_i915_private *dev_priv)
+static void i9xx_check_fifo_underruns(struct intel_crtc *crtc)
 {
-	struct intel_crtc *crtc;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	i915_reg_t reg = PIPESTAT(crtc->pipe);
+	u32 pipestat = I915_READ(reg) & 0xffff0000;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	assert_spin_locked(&dev_priv->irq_lock);
 
-	for_each_intel_crtc(dev_priv->dev, crtc) {
-		u32 reg = PIPESTAT(crtc->pipe);
-		u32 pipestat;
+	if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0)
+		return;
 
-		if (crtc->cpu_fifo_underrun_disabled)
-			continue;
+	I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS);
+	POSTING_READ(reg);
 
-		pipestat = I915_READ(reg) & 0xffff0000;
-		if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0)
-			continue;
-
-		I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS);
-		POSTING_READ(reg);
-
-		DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe));
-	}
-
-	spin_unlock_irq(&dev_priv->irq_lock);
+	DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe));
 }
 
 static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev,
@@ -123,7 +106,7 @@
 					     bool enable, bool old)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg = PIPESTAT(pipe);
+	i915_reg_t reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & 0xffff0000;
 
 	assert_spin_locked(&dev_priv->irq_lock);
@@ -150,6 +133,23 @@
 		ironlake_disable_display_irq(dev_priv, bit);
 }
 
+static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+	uint32_t err_int = I915_READ(GEN7_ERR_INT);
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if ((err_int & ERR_INT_FIFO_UNDERRUN(pipe)) == 0)
+		return;
+
+	I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN(pipe));
+	POSTING_READ(GEN7_ERR_INT);
+
+	DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe));
+}
+
 static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
 						  enum pipe pipe,
 						  bool enable, bool old)
@@ -202,6 +202,24 @@
 		ibx_disable_display_interrupt(dev_priv, bit);
 }
 
+static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum transcoder pch_transcoder = (enum transcoder) crtc->pipe;
+	uint32_t serr_int = I915_READ(SERR_INT);
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if ((serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) == 0)
+		return;
+
+	I915_WRITE(SERR_INT, SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder));
+	POSTING_READ(SERR_INT);
+
+	DRM_ERROR("pch fifo underrun on pch transcoder %c\n",
+		  transcoder_name(pch_transcoder));
+}
+
 static void cpt_set_fifo_underrun_reporting(struct drm_device *dev,
 					    enum transcoder pch_transcoder,
 					    bool enable, bool old)
@@ -375,3 +393,56 @@
 		DRM_ERROR("PCH transcoder %c FIFO underrun\n",
 			  transcoder_name(pch_transcoder));
 }
+
+/**
+ * intel_check_cpu_fifo_underruns - check for CPU fifo underruns immediately
+ * @dev_priv: i915 device instance
+ *
+ * Check for CPU fifo underruns immediately. Useful on IVB/HSW where the shared
+ * error interrupt may have been disabled, and so CPU fifo underruns won't
+ * necessarily raise an interrupt, and on GMCH platforms where underruns never
+ * raise an interrupt.
+ */
+void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+
+	for_each_intel_crtc(dev_priv->dev, crtc) {
+		if (crtc->cpu_fifo_underrun_disabled)
+			continue;
+
+		if (HAS_GMCH_DISPLAY(dev_priv))
+			i9xx_check_fifo_underruns(crtc);
+		else if (IS_GEN7(dev_priv))
+			ivybridge_check_fifo_underruns(crtc);
+	}
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+/**
+ * intel_check_pch_fifo_underruns - check for PCH fifo underruns immediately
+ * @dev_priv: i915 device instance
+ *
+ * Check for PCH fifo underruns immediately. Useful on CPT/PPT where the shared
+ * error interrupt may have been disabled, and so PCH fifo underruns won't
+ * necessarily raise an interrupt.
+ */
+void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+
+	for_each_intel_crtc(dev_priv->dev, crtc) {
+		if (crtc->pch_fifo_underrun_disabled)
+			continue;
+
+		if (HAS_PCH_CPT(dev_priv))
+			cpt_check_pch_fifo_underruns(crtc);
+	}
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 081d5f6..5ba5866 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -76,11 +76,17 @@
 	uint16_t			guc_fw_minor_wanted;
 	uint16_t			guc_fw_major_found;
 	uint16_t			guc_fw_minor_found;
+
+	uint32_t header_size;
+	uint32_t header_offset;
+	uint32_t rsa_size;
+	uint32_t rsa_offset;
+	uint32_t ucode_size;
+	uint32_t ucode_offset;
 };
 
 struct intel_guc {
 	struct intel_guc_fw guc_fw;
-
 	uint32_t log_flags;
 	struct drm_i915_gem_object *log_obj;
 
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 593d2f5..40b2ea5 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -122,6 +122,78 @@
 
 #define GUC_CTL_MAX_DWORDS		(GUC_CTL_RSRVD + 1)
 
+/**
+ * DOC: GuC Firmware Layout
+ *
+ * The GuC firmware layout looks like this:
+ *
+ *     +-------------------------------+
+ *     |        guc_css_header         |
+ *     | contains major/minor version  |
+ *     +-------------------------------+
+ *     |             uCode             |
+ *     +-------------------------------+
+ *     |         RSA signature         |
+ *     +-------------------------------+
+ *     |          modulus key          |
+ *     +-------------------------------+
+ *     |          exponent val         |
+ *     +-------------------------------+
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ * in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ * in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct guc_css_header {
+	uint32_t module_type;
+	/* header_size includes all non-uCode bits, including css_header, rsa
+	 * key, modulus key and exponent data. */
+	uint32_t header_size_dw;
+	uint32_t header_version;
+	uint32_t module_id;
+	uint32_t module_vendor;
+	union {
+		struct {
+			uint8_t day;
+			uint8_t month;
+			uint16_t year;
+		};
+		uint32_t date;
+	};
+	uint32_t size_dw; /* uCode plus header_size_dw */
+	uint32_t key_size_dw;
+	uint32_t modulus_size_dw;
+	uint32_t exponent_size_dw;
+	union {
+		struct {
+			uint8_t hour;
+			uint8_t min;
+			uint16_t sec;
+		};
+		uint32_t time;
+	};
+
+	char username[8];
+	char buildnumber[12];
+	uint32_t device_id;
+	uint32_t guc_sw_version;
+	uint32_t prod_preprod_fw;
+	uint32_t reserved[12];
+	uint32_t header_info;
+} __packed;
+
 struct guc_doorbell_info {
 	u32 db_status;
 	u32 cookie;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 3541f76..550921f 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -31,7 +31,7 @@
 #include "intel_guc.h"
 
 /**
- * DOC: GuC
+ * DOC: GuC-specific firmware loader
  *
  * intel_guc:
  * Top level structure of guc. It handles firmware loading and manages client
@@ -208,16 +208,6 @@
 /*
  * Transfer the firmware image to RAM for execution by the microcontroller.
  *
- * GuC Firmware layout:
- * +-------------------------------+  ----
- * |          CSS header           |  128B
- * | contains major/minor version  |
- * +-------------------------------+  ----
- * |             uCode             |
- * +-------------------------------+  ----
- * |         RSA signature         |  256B
- * +-------------------------------+  ----
- *
  * Architecturally, the DMA engine is bidirectional, and can potentially even
  * transfer between GTT locations. This functionality is left out of the API
  * for now as there is no need for it.
@@ -225,33 +215,29 @@
  * Note that GuC needs the CSS header plus uKernel code to be copied by the
  * DMA engine in one operation, whereas the RSA signature is loaded via MMIO.
  */
-
-#define UOS_CSS_HEADER_OFFSET		0
-#define UOS_VER_MINOR_OFFSET		0x44
-#define UOS_VER_MAJOR_OFFSET		0x46
-#define UOS_CSS_HEADER_SIZE		0x80
-#define UOS_RSA_SIG_SIZE		0x100
-
 static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
 	struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj;
 	unsigned long offset;
 	struct sg_table *sg = fw_obj->pages;
-	u32 status, ucode_size, rsa[UOS_RSA_SIG_SIZE / sizeof(u32)];
+	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
 	int i, ret = 0;
 
-	/* uCode size, also is where RSA signature starts */
-	offset = ucode_size = guc_fw->guc_fw_size - UOS_RSA_SIG_SIZE;
-	I915_WRITE(DMA_COPY_SIZE, ucode_size);
+	/* where RSA signature starts */
+	offset = guc_fw->rsa_offset;
 
 	/* Copy RSA signature from the fw image to HW for verification */
-	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, UOS_RSA_SIG_SIZE, offset);
-	for (i = 0; i < UOS_RSA_SIG_SIZE / sizeof(u32); i++)
+	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), offset);
+	for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++)
 		I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]);
 
+	/* The header plus uCode will be copied to WOPCM via DMA, excluding any
+	 * other components */
+	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
+
 	/* Set the source address for the new blob */
-	offset = i915_gem_obj_ggtt_offset(fw_obj);
+	offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
 	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
 
@@ -322,8 +308,8 @@
 	I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
 
 	/* WaDisableMinuteIaClockGating:skl,bxt */
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		I915_WRITE(GUC_SHIM_CONTROL, (I915_READ(GUC_SHIM_CONTROL) &
 					      ~GUC_ENABLE_MIA_CLOCK_GATING));
 	}
@@ -378,6 +364,9 @@
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
 	int err = 0;
 
+	if (!i915.enable_guc_submission)
+		return 0;
+
 	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
 		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
 		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
@@ -457,10 +446,8 @@
 {
 	struct drm_i915_gem_object *obj;
 	const struct firmware *fw;
-	const u8 *css_header;
-	const size_t minsize = UOS_CSS_HEADER_SIZE + UOS_RSA_SIG_SIZE;
-	const size_t maxsize = GUC_WOPCM_SIZE_VALUE + UOS_RSA_SIG_SIZE
-			- 0x8000; /* 32k reserved (8K stack + 24k context) */
+	struct guc_css_header *css;
+	size_t size;
 	int err;
 
 	DRM_DEBUG_DRIVER("before requesting firmware: GuC fw fetch status %s\n",
@@ -474,12 +461,52 @@
 
 	DRM_DEBUG_DRIVER("fetch GuC fw from %s succeeded, fw %p\n",
 		guc_fw->guc_fw_path, fw);
-	DRM_DEBUG_DRIVER("firmware file size %zu (minimum %zu, maximum %zu)\n",
-		fw->size, minsize, maxsize);
 
-	/* Check the size of the blob befoe examining buffer contents */
-	if (fw->size < minsize || fw->size > maxsize)
+	/* Check the size of the blob before examining buffer contents */
+	if (fw->size < sizeof(struct guc_css_header)) {
+		DRM_ERROR("Firmware header is missing\n");
 		goto fail;
+	}
+
+	css = (struct guc_css_header *)fw->data;
+
+	/* Firmware bits always start from header */
+	guc_fw->header_offset = 0;
+	guc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
+		css->key_size_dw - css->exponent_size_dw) * sizeof(u32);
+
+	if (guc_fw->header_size != sizeof(struct guc_css_header)) {
+		DRM_ERROR("CSS header definition mismatch\n");
+		goto fail;
+	}
+
+	/* then, uCode */
+	guc_fw->ucode_offset = guc_fw->header_offset + guc_fw->header_size;
+	guc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+	/* now RSA */
+	if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) {
+		DRM_ERROR("RSA key size is bad\n");
+		goto fail;
+	}
+	guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size;
+	guc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+	/* At least, it should have header, uCode and RSA. Size of all three. */
+	size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size;
+	if (fw->size < size) {
+		DRM_ERROR("Missing firmware components\n");
+		goto fail;
+	}
+
+	/* Header and uCode will be loaded to WOPCM. Size of the two. */
+	size = guc_fw->header_size + guc_fw->ucode_size;
+
+	/* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */
+	if (size > GUC_WOPCM_SIZE_VALUE - 0x8000) {
+		DRM_ERROR("Firmware is too large to fit in WOPCM\n");
+		goto fail;
+	}
 
 	/*
 	 * The GuC firmware image has the version number embedded at a well-known
@@ -487,9 +514,8 @@
 	 * TWO bytes each (i.e. u16), although all pointers and offsets are defined
 	 * in terms of bytes (u8).
 	 */
-	css_header = fw->data + UOS_CSS_HEADER_OFFSET;
-	guc_fw->guc_fw_major_found = *(u16 *)(css_header + UOS_VER_MAJOR_OFFSET);
-	guc_fw->guc_fw_minor_found = *(u16 *)(css_header + UOS_VER_MINOR_OFFSET);
+	guc_fw->guc_fw_major_found = css->guc_sw_version >> 16;
+	guc_fw->guc_fw_minor_found = css->guc_sw_version & 0xFFFF;
 
 	if (guc_fw->guc_fw_major_found != guc_fw->guc_fw_major_wanted ||
 	    guc_fw->guc_fw_minor_found < guc_fw->guc_fw_minor_wanted) {
@@ -566,6 +592,9 @@
 		fw_path = "";	/* unknown device */
 	}
 
+	if (!i915.enable_guc_submission)
+		return;
+
 	guc_fw->guc_dev = dev;
 	guc_fw->guc_fw_path = fw_path;
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 9eafa19..bdd462e 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -113,10 +113,11 @@
 	}
 }
 
-static u32 hsw_dip_data_reg(struct drm_i915_private *dev_priv,
-			    enum transcoder cpu_transcoder,
-			    enum hdmi_infoframe_type type,
-			    int i)
+static i915_reg_t
+hsw_dip_data_reg(struct drm_i915_private *dev_priv,
+		 enum transcoder cpu_transcoder,
+		 enum hdmi_infoframe_type type,
+		 int i)
 {
 	switch (type) {
 	case HDMI_INFOFRAME_TYPE_AVI:
@@ -127,7 +128,7 @@
 		return HSW_TVIDEO_DIP_VS_DATA(cpu_transcoder, i);
 	default:
 		DRM_DEBUG_DRIVER("unknown info frame type %d\n", type);
-		return 0;
+		return INVALID_MMIO_REG;
 	}
 }
 
@@ -193,8 +194,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int i, reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
+	int i;
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
@@ -229,7 +231,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
@@ -251,8 +253,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int i, reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
+	int i;
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
@@ -289,8 +292,7 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
-	u32 val = I915_READ(reg);
+	u32 val = I915_READ(TVIDEO_DIP_CTL(intel_crtc->pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
 		return false;
@@ -308,8 +310,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int i, reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
+	int i;
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
@@ -344,8 +347,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	int reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
-	u32 val = I915_READ(reg);
+	u32 val = I915_READ(VLV_TVIDEO_DIP_CTL(intel_crtc->pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
 		return false;
@@ -367,13 +369,13 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
-	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
-	u32 data_reg;
+	i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
+	i915_reg_t data_reg;
 	int i;
 	u32 val = I915_READ(ctl_reg);
 
 	data_reg = hsw_dip_data_reg(dev_priv, cpu_transcoder, type, 0);
-	if (data_reg == 0)
+	if (i915_mmio_reg_valid(data_reg))
 		return;
 
 	val &= ~hsw_infoframe_enable(type);
@@ -401,8 +403,7 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder);
-	u32 val = I915_READ(ctl_reg);
+	u32 val = I915_READ(HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder));
 
 	return val & (VIDEO_DIP_ENABLE_VSC_HSW | VIDEO_DIP_ENABLE_AVI_HSW |
 		      VIDEO_DIP_ENABLE_GCP_HSW | VIDEO_DIP_ENABLE_VS_HSW |
@@ -513,7 +514,7 @@
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
-	u32 reg = VIDEO_DIP_CTL;
+	i915_reg_t reg = VIDEO_DIP_CTL;
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(intel_dig_port->port);
 
@@ -633,7 +634,8 @@
 {
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_crtc *crtc = to_intel_crtc(encoder->crtc);
-	u32 reg, val = 0;
+	i915_reg_t reg;
+	u32 val = 0;
 
 	if (HAS_DDI(dev_priv))
 		reg = HSW_TVIDEO_DIP_GCP(crtc->config->cpu_transcoder);
@@ -666,7 +668,7 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
-	u32 reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(intel_dig_port->port);
 
@@ -717,7 +719,7 @@
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	u32 reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
 	assert_hdmi_port_disabled(intel_hdmi);
@@ -760,7 +762,7 @@
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	u32 reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(intel_dig_port->port);
 
@@ -811,7 +813,7 @@
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	u32 reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder);
+	i915_reg_t reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder);
 	u32 val = I915_READ(reg);
 
 	assert_hdmi_port_disabled(intel_hdmi);
@@ -1108,6 +1110,13 @@
 	 * matching DP port to be enabled on transcoder A.
 	 */
 	if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B) {
+		/*
+		 * We get CPU/PCH FIFO underruns on the other pipe when
+		 * doing the workaround. Sweep them under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+
 		temp &= ~SDVO_PIPE_B_SELECT;
 		temp |= SDVO_ENABLE;
 		/*
@@ -1122,6 +1131,10 @@
 		temp &= ~SDVO_ENABLE;
 		I915_WRITE(intel_hdmi->hdmi_reg, temp);
 		POSTING_READ(intel_hdmi->hdmi_reg);
+
+		intel_wait_for_vblank_if_active(dev_priv->dev, PIPE_A);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 	}
 
 	intel_hdmi->set_infoframes(&encoder->base, false, NULL);
@@ -1335,21 +1348,18 @@
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
-	struct intel_encoder *intel_encoder =
-		&hdmi_to_dig_port(intel_hdmi)->base;
-	enum intel_display_power_domain power_domain;
 	struct edid *edid = NULL;
 	bool connected = false;
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
-	intel_display_power_get(dev_priv, power_domain);
+	if (force) {
+		intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
-	if (force)
 		edid = drm_get_edid(connector,
 				    intel_gmbus_get_adapter(dev_priv,
 				    intel_hdmi->ddc_bus));
 
-	intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	}
 
 	to_intel_connector(connector)->detect_edid = edid;
 	if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) {
@@ -1383,6 +1393,8 @@
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 
+	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+
 	while (!live_status && --retry) {
 		live_status = intel_digital_port_connected(dev_priv,
 				hdmi_to_dig_port(intel_hdmi));
@@ -1402,6 +1414,8 @@
 	} else
 		status = connector_status_disconnected;
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+
 	return status;
 }
 
@@ -2039,7 +2053,7 @@
 		 * On BXT A0/A1, sw needs to activate DDIA HPD logic and
 		 * interrupts to check the external panel connection.
 		 */
-		if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0))
+		if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
 			intel_encoder->hpd_pin = HPD_PORT_A;
 		else
 			intel_encoder->hpd_pin = HPD_PORT_B;
@@ -2131,7 +2145,8 @@
 	}
 }
 
-void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port)
+void intel_hdmi_init(struct drm_device *dev,
+		     i915_reg_t hdmi_reg, enum port port)
 {
 	struct intel_digital_port *intel_dig_port;
 	struct intel_encoder *intel_encoder;
@@ -2202,7 +2217,7 @@
 
 	intel_dig_port->port = port;
 	intel_dig_port->hdmi.hdmi_reg = hdmi_reg;
-	intel_dig_port->dp.output_reg = 0;
+	intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 
 	intel_hdmi_init_connector(intel_dig_port, intel_connector);
 }
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 1369fc4..1110c83 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -36,7 +36,7 @@
 
 struct gmbus_pin {
 	const char *name;
-	int reg;
+	i915_reg_t reg;
 };
 
 /* Map gmbus pin pairs to names and registers. */
@@ -63,9 +63,9 @@
 };
 
 static const struct gmbus_pin gmbus_pins_bxt[] = {
-	[GMBUS_PIN_1_BXT] = { "dpb", PCH_GPIOB },
-	[GMBUS_PIN_2_BXT] = { "dpc", PCH_GPIOC },
-	[GMBUS_PIN_3_BXT] = { "misc", PCH_GPIOD },
+	[GMBUS_PIN_1_BXT] = { "dpb", GPIOB },
+	[GMBUS_PIN_2_BXT] = { "dpc", GPIOC },
+	[GMBUS_PIN_3_BXT] = { "misc", GPIOD },
 };
 
 /* pin is expected to be valid */
@@ -74,7 +74,7 @@
 {
 	if (IS_BROXTON(dev_priv))
 		return &gmbus_pins_bxt[pin];
-	else if (IS_SKYLAKE(dev_priv))
+	else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		return &gmbus_pins_skl[pin];
 	else if (IS_BROADWELL(dev_priv))
 		return &gmbus_pins_bdw[pin];
@@ -89,14 +89,15 @@
 
 	if (IS_BROXTON(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_bxt);
-	else if (IS_SKYLAKE(dev_priv))
+	else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_skl);
 	else if (IS_BROADWELL(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_bdw);
 	else
 		size = ARRAY_SIZE(gmbus_pins);
 
-	return pin < size && get_gmbus_pin(dev_priv, pin)->reg;
+	return pin < size &&
+		i915_mmio_reg_valid(get_gmbus_pin(dev_priv, pin)->reg);
 }
 
 /* Intel GPIO access functions */
@@ -240,9 +241,8 @@
 
 	algo = &bus->bit_algo;
 
-	bus->gpio_reg = dev_priv->gpio_mmio_base +
-		get_gmbus_pin(dev_priv, pin)->reg;
-
+	bus->gpio_reg = _MMIO(dev_priv->gpio_mmio_base +
+			      i915_mmio_reg_offset(get_gmbus_pin(dev_priv, pin)->reg));
 	bus->adapter.algo_data = algo;
 	algo->setsda = set_data;
 	algo->setscl = set_clock;
@@ -483,7 +483,7 @@
 	int i = 0, inc, try = 0;
 	int ret = 0;
 
-	intel_aux_display_runtime_get(dev_priv);
+	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 	mutex_lock(&dev_priv->gmbus_mutex);
 
 	if (bus->force_bit) {
@@ -595,7 +595,9 @@
 
 out:
 	mutex_unlock(&dev_priv->gmbus_mutex);
-	intel_aux_display_runtime_put(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+
 	return ret;
 }
 
@@ -626,12 +628,13 @@
 
 	if (HAS_PCH_NOP(dev))
 		return 0;
-	else if (HAS_PCH_SPLIT(dev))
-		dev_priv->gpio_mmio_base = PCH_GPIOA - GPIOA;
-	else if (IS_VALLEYVIEW(dev))
+
+	if (IS_VALLEYVIEW(dev))
 		dev_priv->gpio_mmio_base = VLV_DISPLAY_BASE;
-	else
-		dev_priv->gpio_mmio_base = 0;
+	else if (!HAS_GMCH_DISPLAY(dev_priv))
+		dev_priv->gpio_mmio_base =
+			i915_mmio_reg_offset(PCH_GPIOA) -
+			i915_mmio_reg_offset(GPIOA);
 
 	mutex_init(&dev_priv->gmbus_mutex);
 	init_waitqueue_head(&dev_priv->gmbus_wait_queue);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 88e12bd..4ebafab 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -190,16 +190,21 @@
 #define GEN8_CTX_L3LLC_COHERENT (1<<5)
 #define GEN8_CTX_PRIVILEGE (1<<8)
 
-#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
+#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \
+	(reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
+	(reg_state)[(pos)+1] = (val); \
+} while (0)
+
+#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do {		\
 	const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n));	\
 	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
 	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
-}
+} while (0)
 
-#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
+#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
 	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
 	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
-}
+} while (0)
 
 enum {
 	ADVANCED_CONTEXT = 0,
@@ -284,8 +289,8 @@
 {
 	struct drm_device *dev = ring->dev;
 
-	return ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-		(IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)) &&
+	return (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+		IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
 	       (ring->id == VCS || ring->id == VCS2);
 }
 
@@ -921,7 +926,7 @@
 
 		intel_logical_ring_emit(ringbuf, MI_NOOP);
 		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
-		intel_logical_ring_emit(ringbuf, INSTPM);
+		intel_logical_ring_emit_reg(ringbuf, INSTPM);
 		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
 		intel_logical_ring_advance(ringbuf);
 
@@ -1096,7 +1101,7 @@
 
 	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
 	for (i = 0; i < w->count; i++) {
-		intel_logical_ring_emit(ringbuf, w->reg[i].addr);
+		intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr);
 		intel_logical_ring_emit(ringbuf, w->reg[i].value);
 	}
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
@@ -1120,6 +1125,8 @@
 		batch[__index] = (cmd);					\
 	} while (0)
 
+#define wa_ctx_emit_reg(batch, index, reg) \
+	wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg))
 
 /*
  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
@@ -1149,17 +1156,17 @@
 	 * this batch updates GEN8_L3SQCREG4 with default value we need to
 	 * set this bit here to retain the WA during flush.
 	 */
-	if (IS_SKYLAKE(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_E0)
+	if (IS_SKL_REVID(ring->dev, 0, SKL_REVID_E0))
 		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
 
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
 				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
 	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
 	wa_ctx_emit(batch, index, 0);
 
 	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
 	wa_ctx_emit(batch, index, l3sqc4_flush);
 
 	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
@@ -1172,7 +1179,7 @@
 
 	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
 				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
 	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
 	wa_ctx_emit(batch, index, 0);
 
@@ -1314,8 +1321,8 @@
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
@@ -1340,18 +1347,18 @@
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_B0)) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0))) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-		wa_ctx_emit(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
+		wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
 		wa_ctx_emit(batch, index,
 			    _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
 		wa_ctx_emit(batch, index, MI_NOOP);
 	}
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
 
 	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
@@ -1472,12 +1479,6 @@
 	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
 	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
 
-	if (ring->status_page.obj) {
-		I915_WRITE(RING_HWS_PGA(ring->mmio_base),
-			   (u32)ring->status_page.gfx_addr);
-		POSTING_READ(RING_HWS_PGA(ring->mmio_base));
-	}
-
 	I915_WRITE(RING_MODE_GEN7(ring),
 		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
 		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
@@ -1562,9 +1563,9 @@
 	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
 
-		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_UDW(ring, i));
+		intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_UDW(ring, i));
 		intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
-		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_LDW(ring, i));
+		intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_LDW(ring, i));
 		intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
 	}
 
@@ -1923,6 +1924,7 @@
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	init_waitqueue_head(&ring->irq_queue);
 
+	INIT_LIST_HEAD(&ring->buffers);
 	INIT_LIST_HEAD(&ring->execlist_queue);
 	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
 	spin_lock_init(&ring->execlist_lock);
@@ -1972,7 +1974,7 @@
 		ring->init_hw = gen8_init_render_ring;
 	ring->init_context = gen8_init_rcs_context;
 	ring->cleanup = intel_fini_pipe_control;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2024,7 +2026,7 @@
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2079,7 +2081,7 @@
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2109,7 +2111,7 @@
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2263,46 +2265,31 @@
 	 * only for the first context restore: on a subsequent save, the GPU will
 	 * recreate this batchbuffer with new values (including all the missing
 	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
-	if (ring->id == RCS)
-		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
-	else
-		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
-	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
-	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
-	reg_state[CTX_CONTEXT_CONTROL+1] =
-		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				   CTX_CTRL_RS_CTX_ENABLE);
-	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
-	reg_state[CTX_RING_HEAD+1] = 0;
-	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
-	reg_state[CTX_RING_TAIL+1] = 0;
-	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
+	reg_state[CTX_LRI_HEADER_0] =
+		MI_LOAD_REGISTER_IMM(ring->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED;
+	ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(ring),
+		       _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+					  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					  CTX_CTRL_RS_CTX_ENABLE));
+	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(ring->mmio_base), 0);
 	/* Ring buffer start address is not known until the buffer is pinned.
 	 * It is written to the context image in execlists_update_context()
 	 */
-	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
-	reg_state[CTX_RING_BUFFER_CONTROL+1] =
-			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
-	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
-	reg_state[CTX_BB_HEAD_U+1] = 0;
-	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
-	reg_state[CTX_BB_HEAD_L+1] = 0;
-	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
-	reg_state[CTX_BB_STATE+1] = (1<<5);
-	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
-	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
-	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
-	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
-	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
-	reg_state[CTX_SECOND_BB_STATE+1] = 0;
+	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(ring->mmio_base),
+		       ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
+	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, RING_BBADDR(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, RING_BBSTATE(ring->mmio_base),
+		       RING_BB_PPGTT);
+	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, RING_SBBSTATE(ring->mmio_base), 0);
 	if (ring->id == RCS) {
-		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
-		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
-		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
-		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
-		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
-		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
+		ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(ring->mmio_base), 0);
+		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(ring->mmio_base), 0);
+		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(ring->mmio_base), 0);
 		if (ring->wa_ctx.obj) {
 			struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
 			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
@@ -2319,18 +2306,17 @@
 				0x01;
 		}
 	}
-	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
-	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
-	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
-	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
-	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
-	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
-	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
-	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
-	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
-	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
-	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
-	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
+	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+	ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(ring->mmio_base), 0);
+	/* PDP values well be assigned later if needed */
+	ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(ring, 3), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(ring, 3), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(ring, 2), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(ring, 2), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(ring, 1), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(ring, 1), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(ring, 0), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(ring, 0), 0);
 
 	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
 		/* 64b PPGTT (48bit canonical)
@@ -2352,8 +2338,8 @@
 
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
-		reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
+		ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+			       make_rpcs(dev));
 	}
 
 	kunmap_atomic(reg_state);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4e60d54..0b821b9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -29,16 +29,16 @@
 #define GEN8_CSB_PTR_MASK 0x07
 
 /* Execlists regs */
-#define RING_ELSP(ring)			((ring)->mmio_base+0x230)
-#define RING_EXECLIST_STATUS_LO(ring)	((ring)->mmio_base+0x234)
-#define RING_EXECLIST_STATUS_HI(ring)	((ring)->mmio_base+0x234 + 4)
-#define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
+#define RING_ELSP(ring)				_MMIO((ring)->mmio_base + 0x230)
+#define RING_EXECLIST_STATUS_LO(ring)		_MMIO((ring)->mmio_base + 0x234)
+#define RING_EXECLIST_STATUS_HI(ring)		_MMIO((ring)->mmio_base + 0x234 + 4)
+#define RING_CONTEXT_CONTROL(ring)		_MMIO((ring)->mmio_base + 0x244)
 #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	(1 << 3)
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	(1 << 0)
 #define   CTX_CTRL_RS_CTX_ENABLE                (1 << 1)
-#define RING_CONTEXT_STATUS_BUF_LO(ring, i)	((ring)->mmio_base+0x370 + (i) * 8)
-#define RING_CONTEXT_STATUS_BUF_HI(ring, i)	((ring)->mmio_base+0x370 + (i) * 8 + 4)
-#define RING_CONTEXT_STATUS_PTR(ring)	((ring)->mmio_base+0x3a0)
+#define RING_CONTEXT_STATUS_BUF_LO(ring, i)	_MMIO((ring)->mmio_base + 0x370 + (i) * 8)
+#define RING_CONTEXT_STATUS_BUF_HI(ring, i)	_MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4)
+#define RING_CONTEXT_STATUS_PTR(ring)		_MMIO((ring)->mmio_base + 0x3a0)
 
 /* Logical Rings */
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request);
@@ -70,6 +70,11 @@
 	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
+static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
+					       i915_reg_t reg)
+{
+	intel_logical_ring_emit(ringbuf, i915_mmio_reg_offset(reg));
+}
 
 /* Logical Ring Contexts */
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 7f39b8a..61f1145 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -51,7 +51,7 @@
 	struct intel_encoder base;
 
 	bool is_dual_link;
-	u32 reg;
+	i915_reg_t reg;
 	u32 a3_power;
 
 	struct intel_lvds_connector *attached_connector;
@@ -210,7 +210,7 @@
 	struct intel_connector *intel_connector =
 		&lvds_encoder->attached_connector->base;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 ctl_reg, stat_reg;
+	i915_reg_t ctl_reg, stat_reg;
 
 	if (HAS_PCH_SPLIT(dev)) {
 		ctl_reg = PCH_PP_CONTROL;
@@ -235,7 +235,7 @@
 	struct drm_device *dev = encoder->base.dev;
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 ctl_reg, stat_reg;
+	i915_reg_t ctl_reg, stat_reg;
 
 	if (HAS_PCH_SPLIT(dev)) {
 		ctl_reg = PCH_PP_CONTROL;
@@ -939,7 +939,7 @@
 	struct drm_display_mode *downclock_mode = NULL;
 	struct edid *edid;
 	struct drm_crtc *crtc;
-	u32 lvds_reg;
+	i915_reg_t lvds_reg;
 	u32 lvds;
 	int pipe;
 	u8 pin;
@@ -1164,8 +1164,7 @@
 	DRM_DEBUG_KMS("detected %s-link lvds configuration\n",
 		      lvds_encoder->is_dual_link ? "dual" : "single");
 
-	lvds_encoder->a3_power = I915_READ(lvds_encoder->reg) &
-				 LVDS_A3_POWER_MASK;
+	lvds_encoder->a3_power = lvds & LVDS_A3_POWER_MASK;
 
 	lvds_connector->lid_notifier.notifier_call = intel_lid_notify;
 	if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) {
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 6d3c6c0..fed7bea 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -143,7 +143,7 @@
 {
 	bool result = false;
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
 		table->table = skylake_mocs_table;
 		result = true;
@@ -159,11 +159,30 @@
 	return result;
 }
 
+static i915_reg_t mocs_register(enum intel_ring_id ring, int index)
+{
+	switch (ring) {
+	case RCS:
+		return GEN9_GFX_MOCS(index);
+	case VCS:
+		return GEN9_MFX0_MOCS(index);
+	case BCS:
+		return GEN9_BLT_MOCS(index);
+	case VECS:
+		return GEN9_VEBOX_MOCS(index);
+	case VCS2:
+		return GEN9_MFX1_MOCS(index);
+	default:
+		MISSING_CASE(ring);
+		return INVALID_MMIO_REG;
+	}
+}
+
 /**
  * emit_mocs_control_table() - emit the mocs control table
  * @req:	Request to set up the MOCS table for.
  * @table:	The values to program into the control regs.
- * @reg_base:	The base for the engine that needs to be programmed.
+ * @ring:	The engine for whom to emit the registers.
  *
  * This function simply emits a MI_LOAD_REGISTER_IMM command for the
  * given table starting at the given address.
@@ -172,7 +191,7 @@
  */
 static int emit_mocs_control_table(struct drm_i915_gem_request *req,
 				   const struct drm_i915_mocs_table *table,
-				   u32 reg_base)
+				   enum intel_ring_id ring)
 {
 	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	unsigned int index;
@@ -191,7 +210,7 @@
 				MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES));
 
 	for (index = 0; index < table->size; index++) {
-		intel_logical_ring_emit(ringbuf, reg_base + index * 4);
+		intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
 		intel_logical_ring_emit(ringbuf,
 					table->table[index].control_value);
 	}
@@ -205,7 +224,7 @@
 	 * that value to all the used entries.
 	 */
 	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
-		intel_logical_ring_emit(ringbuf, reg_base + index * 4);
+		intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
 		intel_logical_ring_emit(ringbuf, table->table[0].control_value);
 	}
 
@@ -253,7 +272,7 @@
 		value = (table->table[count].l3cc_value & 0xffff) |
 			((table->table[count + 1].l3cc_value & 0xffff) << 16);
 
-		intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + i * 4);
+		intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i));
 		intel_logical_ring_emit(ringbuf, value);
 	}
 
@@ -270,7 +289,7 @@
 	 * they are reserved by the hardware.
 	 */
 	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
-		intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + i * 4);
+		intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i));
 		intel_logical_ring_emit(ringbuf, value);
 
 		value = filler;
@@ -304,26 +323,16 @@
 	int ret;
 
 	if (get_mocs_settings(req->ring->dev, &t)) {
+		struct drm_i915_private *dev_priv = req->i915;
+		struct intel_engine_cs *ring;
+		enum intel_ring_id ring_id;
+
 		/* Program the control registers */
-		ret = emit_mocs_control_table(req, &t, GEN9_GFX_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_MFX0_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_MFX1_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_VEBOX_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_BLT_MOCS_0);
-		if (ret)
-			return ret;
+		for_each_ring(ring, dev_priv, ring_id) {
+			ret = emit_mocs_control_table(req, &t, ring_id);
+			if (ret)
+				return ret;
+		}
 
 		/* Now program the l3cc registers */
 		ret = emit_mocs_l3cc_table(req, &t);
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 6dc13c0..e362a30 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -682,7 +682,7 @@
 	}
 
 	if (!acpi_video_bus) {
-		DRM_ERROR("No ACPI video bus found\n");
+		DRM_DEBUG_KMS("No ACPI video bus found\n");
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 4445426..76f1980 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -749,7 +749,7 @@
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
+	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
 						   &i915_ggtt_view_normal);
 	if (ret != 0)
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 071a76b..96f45d7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -1708,13 +1708,6 @@
 	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
 }
 
-struct skl_pipe_wm_parameters {
-	bool active;
-	uint32_t pipe_htotal;
-	uint32_t pixel_rate; /* in KHz */
-	struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
-};
-
 struct ilk_wm_maximums {
 	uint16_t pri;
 	uint16_t spr;
@@ -1722,13 +1715,6 @@
 	uint16_t fbc;
 };
 
-/* used in computing the new watermarks state */
-struct intel_wm_config {
-	unsigned int num_pipes_active;
-	bool sprites_enabled;
-	bool sprites_scaled;
-};
-
 /*
  * For both WM_PIPE and WM_LP.
  * mem_value must be in 0.1us units.
@@ -1979,9 +1965,11 @@
 				 const struct intel_crtc *intel_crtc,
 				 int level,
 				 struct intel_crtc_state *cstate,
+				 struct intel_plane_state *pristate,
+				 struct intel_plane_state *sprstate,
+				 struct intel_plane_state *curstate,
 				 struct intel_wm_level *result)
 {
-	struct intel_plane *intel_plane;
 	uint16_t pri_latency = dev_priv->wm.pri_latency[level];
 	uint16_t spr_latency = dev_priv->wm.spr_latency[level];
 	uint16_t cur_latency = dev_priv->wm.cur_latency[level];
@@ -1993,29 +1981,11 @@
 		cur_latency *= 5;
 	}
 
-	for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) {
-		struct intel_plane_state *pstate =
-			to_intel_plane_state(intel_plane->base.state);
-
-		switch (intel_plane->base.type) {
-		case DRM_PLANE_TYPE_PRIMARY:
-			result->pri_val = ilk_compute_pri_wm(cstate, pstate,
-							     pri_latency,
-							     level);
-			result->fbc_val = ilk_compute_fbc_wm(cstate, pstate,
-							     result->pri_val);
-			break;
-		case DRM_PLANE_TYPE_OVERLAY:
-			result->spr_val = ilk_compute_spr_wm(cstate, pstate,
-							     spr_latency);
-			break;
-		case DRM_PLANE_TYPE_CURSOR:
-			result->cur_val = ilk_compute_cur_wm(cstate, pstate,
-							     cur_latency);
-			break;
-		}
-	}
-
+	result->pri_val = ilk_compute_pri_wm(cstate, pristate,
+					     pri_latency, level);
+	result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
+	result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
+	result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
 	result->enable = true;
 }
 
@@ -2274,34 +2244,19 @@
 	intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
 }
 
-static void ilk_compute_wm_config(struct drm_device *dev,
-				  struct intel_wm_config *config)
-{
-	struct intel_crtc *intel_crtc;
-
-	/* Compute the currently _active_ config */
-	for_each_intel_crtc(dev, intel_crtc) {
-		const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
-
-		if (!wm->pipe_enabled)
-			continue;
-
-		config->sprites_enabled |= wm->sprites_enabled;
-		config->sprites_scaled |= wm->sprites_scaled;
-		config->num_pipes_active++;
-	}
-}
-
 /* Compute new watermarks for the pipe */
-static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate,
-				  struct intel_pipe_wm *pipe_wm)
+static int ilk_compute_pipe_wm(struct intel_crtc *intel_crtc,
+			       struct drm_atomic_state *state)
 {
-	struct drm_crtc *crtc = cstate->base.crtc;
-	struct drm_device *dev = crtc->dev;
+	struct intel_pipe_wm *pipe_wm;
+	struct drm_device *dev = intel_crtc->base.dev;
 	const struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *cstate = NULL;
 	struct intel_plane *intel_plane;
+	struct drm_plane_state *ps;
+	struct intel_plane_state *pristate = NULL;
 	struct intel_plane_state *sprstate = NULL;
+	struct intel_plane_state *curstate = NULL;
 	int level, max_level = ilk_wm_max_level(dev);
 	/* LP0 watermark maximums depend on this pipe alone */
 	struct intel_wm_config config = {
@@ -2309,11 +2264,24 @@
 	};
 	struct ilk_wm_maximums max;
 
+	cstate = intel_atomic_get_crtc_state(state, intel_crtc);
+	if (IS_ERR(cstate))
+		return PTR_ERR(cstate);
+
+	pipe_wm = &cstate->wm.optimal.ilk;
+
 	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
-		if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) {
-			sprstate = to_intel_plane_state(intel_plane->base.state);
-			break;
-		}
+		ps = drm_atomic_get_plane_state(state,
+						&intel_plane->base);
+		if (IS_ERR(ps))
+			return PTR_ERR(ps);
+
+		if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY)
+			pristate = to_intel_plane_state(ps);
+		else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY)
+			sprstate = to_intel_plane_state(ps);
+		else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR)
+			curstate = to_intel_plane_state(ps);
 	}
 
 	config.sprites_enabled = sprstate->visible;
@@ -2322,7 +2290,7 @@
 		drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
 
 	pipe_wm->pipe_enabled = cstate->base.active;
-	pipe_wm->sprites_enabled = sprstate->visible;
+	pipe_wm->sprites_enabled = config.sprites_enabled;
 	pipe_wm->sprites_scaled = config.sprites_scaled;
 
 	/* ILK/SNB: LP2+ watermarks only w/o sprites */
@@ -2333,24 +2301,27 @@
 	if (config.sprites_scaled)
 		max_level = 0;
 
-	ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]);
+	ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
+			     pristate, sprstate, curstate, &pipe_wm->wm[0]);
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-		pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
+		pipe_wm->linetime = hsw_compute_linetime_wm(dev,
+							    &intel_crtc->base);
 
 	/* LP0 watermarks always use 1/2 DDB partitioning */
 	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
 
 	/* At least LP0 must be valid */
 	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
-		return false;
+		return -EINVAL;
 
 	ilk_compute_wm_reg_maximums(dev, 1, &max);
 
 	for (level = 1; level <= max_level; level++) {
 		struct intel_wm_level wm = {};
 
-		ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm);
+		ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
+				     pristate, sprstate, curstate, &wm);
 
 		/*
 		 * Disable any watermark level that exceeds the
@@ -2363,7 +2334,7 @@
 		pipe_wm->wm[level] = wm;
 	}
 
-	return true;
+	return 0;
 }
 
 /*
@@ -2378,7 +2349,9 @@
 	ret_wm->enable = true;
 
 	for_each_intel_crtc(dev, intel_crtc) {
-		const struct intel_pipe_wm *active = &intel_crtc->wm.active;
+		const struct intel_crtc_state *cstate =
+			to_intel_crtc_state(intel_crtc->base.state);
+		const struct intel_pipe_wm *active = &cstate->wm.optimal.ilk;
 		const struct intel_wm_level *wm = &active->wm[level];
 
 		if (!active->pipe_enabled)
@@ -2526,14 +2499,15 @@
 
 	/* LP0 register values */
 	for_each_intel_crtc(dev, intel_crtc) {
+		const struct intel_crtc_state *cstate =
+			to_intel_crtc_state(intel_crtc->base.state);
 		enum pipe pipe = intel_crtc->pipe;
-		const struct intel_wm_level *r =
-			&intel_crtc->wm.active.wm[0];
+		const struct intel_wm_level *r = &cstate->wm.optimal.ilk.wm[0];
 
 		if (WARN_ON(!r->enable))
 			continue;
 
-		results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
+		results->wm_linetime[pipe] = cstate->wm.optimal.ilk.linetime;
 
 		results->wm_pipe[pipe] =
 			(r->pri_val << WM0_PIPE_PLANE_SHIFT) |
@@ -2755,18 +2729,40 @@
 #define SKL_DDB_SIZE		896	/* in blocks */
 #define BXT_DDB_SIZE		512
 
+/*
+ * Return the index of a plane in the SKL DDB and wm result arrays.  Primary
+ * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and
+ * other universal planes are in indices 1..n.  Note that this may leave unused
+ * indices between the top "sprite" plane and the cursor.
+ */
+static int
+skl_wm_plane_id(const struct intel_plane *plane)
+{
+	switch (plane->base.type) {
+	case DRM_PLANE_TYPE_PRIMARY:
+		return 0;
+	case DRM_PLANE_TYPE_CURSOR:
+		return PLANE_CURSOR;
+	case DRM_PLANE_TYPE_OVERLAY:
+		return plane->plane + 1;
+	default:
+		MISSING_CASE(plane->base.type);
+		return plane->plane;
+	}
+}
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
-				   struct drm_crtc *for_crtc,
+				   const struct intel_crtc_state *cstate,
 				   const struct intel_wm_config *config,
-				   const struct skl_pipe_wm_parameters *params,
 				   struct skl_ddb_entry *alloc /* out */)
 {
+	struct drm_crtc *for_crtc = cstate->base.crtc;
 	struct drm_crtc *crtc;
 	unsigned int pipe_size, ddb_size;
 	int nth_active_pipe;
 
-	if (!params->active) {
+	if (!cstate->base.active) {
 		alloc->start = 0;
 		alloc->end = 0;
 		return;
@@ -2837,19 +2833,29 @@
 }
 
 static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
+skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
+			     const struct drm_plane_state *pstate,
+			     int y)
 {
+	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
+	struct drm_framebuffer *fb = pstate->fb;
 
 	/* for planar format */
-	if (p->y_bytes_per_pixel) {
+	if (fb->pixel_format == DRM_FORMAT_NV12) {
 		if (y)  /* y-plane data rate */
-			return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+			return intel_crtc->config->pipe_src_w *
+				intel_crtc->config->pipe_src_h *
+				drm_format_plane_cpp(fb->pixel_format, 0);
 		else    /* uv-plane data rate */
-			return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+			return (intel_crtc->config->pipe_src_w/2) *
+				(intel_crtc->config->pipe_src_h/2) *
+				drm_format_plane_cpp(fb->pixel_format, 1);
 	}
 
 	/* for packed formats */
-	return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
+	return intel_crtc->config->pipe_src_w *
+		intel_crtc->config->pipe_src_h *
+		drm_format_plane_cpp(fb->pixel_format, 0);
 }
 
 /*
@@ -2858,46 +2864,55 @@
  *   3 * 4096 * 8192  * 4 < 2^32
  */
 static unsigned int
-skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
-				 const struct skl_pipe_wm_parameters *params)
+skl_get_total_relative_data_rate(const struct intel_crtc_state *cstate)
 {
+	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
+	struct drm_device *dev = intel_crtc->base.dev;
+	const struct intel_plane *intel_plane;
 	unsigned int total_data_rate = 0;
-	int plane;
 
-	for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
-		const struct intel_plane_wm_parameters *p;
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		const struct drm_plane_state *pstate = intel_plane->base.state;
 
-		p = &params->plane[plane];
-		if (!p->enabled)
+		if (pstate->fb == NULL)
 			continue;
 
-		total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
-		if (p->y_bytes_per_pixel) {
-			total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
-		}
+		if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR)
+			continue;
+
+		/* packed/uv */
+		total_data_rate += skl_plane_relative_data_rate(cstate,
+								pstate,
+								0);
+
+		if (pstate->fb->pixel_format == DRM_FORMAT_NV12)
+			/* y-plane */
+			total_data_rate += skl_plane_relative_data_rate(cstate,
+									pstate,
+									1);
 	}
 
 	return total_data_rate;
 }
 
 static void
-skl_allocate_pipe_ddb(struct drm_crtc *crtc,
-		      const struct intel_wm_config *config,
-		      const struct skl_pipe_wm_parameters *params,
+skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		      struct skl_ddb_allocation *ddb /* out */)
 {
+	struct drm_crtc *crtc = cstate->base.crtc;
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_wm_config *config = &dev_priv->wm.config;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_plane *intel_plane;
 	enum pipe pipe = intel_crtc->pipe;
 	struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
 	uint16_t alloc_size, start, cursor_blocks;
 	uint16_t minimum[I915_MAX_PLANES];
 	uint16_t y_minimum[I915_MAX_PLANES];
 	unsigned int total_data_rate;
-	int plane;
 
-	skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
+	skl_ddb_get_pipe_allocation_limits(dev, cstate, config, alloc);
 	alloc_size = skl_ddb_entry_size(alloc);
 	if (alloc_size == 0) {
 		memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
@@ -2914,17 +2929,20 @@
 	alloc->end -= cursor_blocks;
 
 	/* 1. Allocate the mininum required blocks for each active plane */
-	for_each_plane(dev_priv, pipe, plane) {
-		const struct intel_plane_wm_parameters *p;
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		struct drm_plane *plane = &intel_plane->base;
+		struct drm_framebuffer *fb = plane->state->fb;
+		int id = skl_wm_plane_id(intel_plane);
 
-		p = &params->plane[plane];
-		if (!p->enabled)
+		if (fb == NULL)
+			continue;
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
 
-		minimum[plane] = 8;
-		alloc_size -= minimum[plane];
-		y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
-		alloc_size -= y_minimum[plane];
+		minimum[id] = 8;
+		alloc_size -= minimum[id];
+		y_minimum[id] = (fb->pixel_format == DRM_FORMAT_NV12) ? 8 : 0;
+		alloc_size -= y_minimum[id];
 	}
 
 	/*
@@ -2933,45 +2951,50 @@
 	 *
 	 * FIXME: we may not allocate every single block here.
 	 */
-	total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
+	total_data_rate = skl_get_total_relative_data_rate(cstate);
 
 	start = alloc->start;
-	for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
-		const struct intel_plane_wm_parameters *p;
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		struct drm_plane *plane = &intel_plane->base;
+		struct drm_plane_state *pstate = intel_plane->base.state;
 		unsigned int data_rate, y_data_rate;
 		uint16_t plane_blocks, y_plane_blocks = 0;
+		int id = skl_wm_plane_id(intel_plane);
 
-		p = &params->plane[plane];
-		if (!p->enabled)
+		if (pstate->fb == NULL)
+			continue;
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
 
-		data_rate = skl_plane_relative_data_rate(p, 0);
+		data_rate = skl_plane_relative_data_rate(cstate, pstate, 0);
 
 		/*
 		 * allocation for (packed formats) or (uv-plane part of planar format):
 		 * promote the expression to 64 bits to avoid overflowing, the
 		 * result is < available as data_rate / total_data_rate < 1
 		 */
-		plane_blocks = minimum[plane];
+		plane_blocks = minimum[id];
 		plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
 					total_data_rate);
 
-		ddb->plane[pipe][plane].start = start;
-		ddb->plane[pipe][plane].end = start + plane_blocks;
+		ddb->plane[pipe][id].start = start;
+		ddb->plane[pipe][id].end = start + plane_blocks;
 
 		start += plane_blocks;
 
 		/*
 		 * allocation for y_plane part of planar format:
 		 */
-		if (p->y_bytes_per_pixel) {
-			y_data_rate = skl_plane_relative_data_rate(p, 1);
-			y_plane_blocks = y_minimum[plane];
+		if (pstate->fb->pixel_format == DRM_FORMAT_NV12) {
+			y_data_rate = skl_plane_relative_data_rate(cstate,
+								   pstate,
+								   1);
+			y_plane_blocks = y_minimum[id];
 			y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
 						total_data_rate);
 
-			ddb->y_plane[pipe][plane].start = start;
-			ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+			ddb->y_plane[pipe][id].start = start;
+			ddb->y_plane[pipe][id].end = start + y_plane_blocks;
 
 			start += y_plane_blocks;
 		}
@@ -3041,104 +3064,27 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
-	enum pipe pipe = intel_crtc->pipe;
 
-	if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
-		   sizeof(new_ddb->plane[pipe])))
-		return true;
-
-	if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR],
-		    sizeof(new_ddb->plane[pipe][PLANE_CURSOR])))
+	/*
+	 * If ddb allocation of pipes changed, it may require recalculation of
+	 * watermarks
+	 */
+	if (memcmp(new_ddb->pipe, cur_ddb->pipe, sizeof(new_ddb->pipe)))
 		return true;
 
 	return false;
 }
 
-static void skl_compute_wm_global_parameters(struct drm_device *dev,
-					     struct intel_wm_config *config)
-{
-	struct drm_crtc *crtc;
-	struct drm_plane *plane;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		config->num_pipes_active += to_intel_crtc(crtc)->active;
-
-	/* FIXME: I don't think we need those two global parameters on SKL */
-	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
-		struct intel_plane *intel_plane = to_intel_plane(plane);
-
-		config->sprites_enabled |= intel_plane->wm.enabled;
-		config->sprites_scaled |= intel_plane->wm.scaled;
-	}
-}
-
-static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
-					   struct skl_pipe_wm_parameters *p)
-{
-	struct drm_device *dev = crtc->dev;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum pipe pipe = intel_crtc->pipe;
-	struct drm_plane *plane;
-	struct drm_framebuffer *fb;
-	int i = 1; /* Index for sprite planes start */
-
-	p->active = intel_crtc->active;
-	if (p->active) {
-		p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
-		p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
-
-		fb = crtc->primary->state->fb;
-		/* For planar: Bpp is for uv plane, y_Bpp is for y plane */
-		if (fb) {
-			p->plane[0].enabled = true;
-			p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
-				drm_format_plane_cpp(fb->pixel_format, 1) :
-				drm_format_plane_cpp(fb->pixel_format, 0);
-			p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
-				drm_format_plane_cpp(fb->pixel_format, 0) : 0;
-			p->plane[0].tiling = fb->modifier[0];
-		} else {
-			p->plane[0].enabled = false;
-			p->plane[0].bytes_per_pixel = 0;
-			p->plane[0].y_bytes_per_pixel = 0;
-			p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
-		}
-		p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
-		p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
-		p->plane[0].rotation = crtc->primary->state->rotation;
-
-		fb = crtc->cursor->state->fb;
-		p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0;
-		if (fb) {
-			p->plane[PLANE_CURSOR].enabled = true;
-			p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8;
-			p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w;
-			p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h;
-		} else {
-			p->plane[PLANE_CURSOR].enabled = false;
-			p->plane[PLANE_CURSOR].bytes_per_pixel = 0;
-			p->plane[PLANE_CURSOR].horiz_pixels = 64;
-			p->plane[PLANE_CURSOR].vert_pixels = 64;
-		}
-	}
-
-	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
-		struct intel_plane *intel_plane = to_intel_plane(plane);
-
-		if (intel_plane->pipe == pipe &&
-			plane->type == DRM_PLANE_TYPE_OVERLAY)
-			p->plane[i++] = intel_plane->wm;
-	}
-}
-
 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
-				 struct skl_pipe_wm_parameters *p,
-				 struct intel_plane_wm_parameters *p_params,
+				 struct intel_crtc_state *cstate,
+				 struct intel_plane *intel_plane,
 				 uint16_t ddb_allocation,
 				 int level,
 				 uint16_t *out_blocks, /* out */
 				 uint8_t *out_lines /* out */)
 {
+	struct drm_plane *plane = &intel_plane->base;
+	struct drm_framebuffer *fb = plane->state->fb;
 	uint32_t latency = dev_priv->wm.skl_latency[level];
 	uint32_t method1, method2;
 	uint32_t plane_bytes_per_line, plane_blocks_per_line;
@@ -3146,31 +3092,33 @@
 	uint32_t selected_result;
 	uint8_t bytes_per_pixel;
 
-	if (latency == 0 || !p->active || !p_params->enabled)
+	if (latency == 0 || !cstate->base.active || !fb)
 		return false;
 
-	bytes_per_pixel = p_params->y_bytes_per_pixel ?
-		p_params->y_bytes_per_pixel :
-		p_params->bytes_per_pixel;
-	method1 = skl_wm_method1(p->pixel_rate,
+	bytes_per_pixel = drm_format_plane_cpp(fb->pixel_format, 0);
+	method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate),
 				 bytes_per_pixel,
 				 latency);
-	method2 = skl_wm_method2(p->pixel_rate,
-				 p->pipe_htotal,
-				 p_params->horiz_pixels,
+	method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate),
+				 cstate->base.adjusted_mode.crtc_htotal,
+				 cstate->pipe_src_w,
 				 bytes_per_pixel,
-				 p_params->tiling,
+				 fb->modifier[0],
 				 latency);
 
-	plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
+	plane_bytes_per_line = cstate->pipe_src_w * bytes_per_pixel;
 	plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 
-	if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
-	    p_params->tiling == I915_FORMAT_MOD_Yf_TILED) {
+	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
 		uint32_t min_scanlines = 4;
 		uint32_t y_tile_minimum;
-		if (intel_rotation_90_or_270(p_params->rotation)) {
-			switch (p_params->bytes_per_pixel) {
+		if (intel_rotation_90_or_270(plane->state->rotation)) {
+			int bpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
+				drm_format_plane_cpp(fb->pixel_format, 1) :
+				drm_format_plane_cpp(fb->pixel_format, 0);
+
+			switch (bpp) {
 			case 1:
 				min_scanlines = 16;
 				break;
@@ -3194,8 +3142,8 @@
 	res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
 
 	if (level >= 1 && level <= 7) {
-		if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
-		    p_params->tiling == I915_FORMAT_MOD_Yf_TILED)
+		if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+		    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)
 			res_lines += 4;
 		else
 			res_blocks++;
@@ -3212,84 +3160,80 @@
 
 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
 				 struct skl_ddb_allocation *ddb,
-				 struct skl_pipe_wm_parameters *p,
-				 enum pipe pipe,
+				 struct intel_crtc_state *cstate,
 				 int level,
-				 int num_planes,
 				 struct skl_wm_level *result)
 {
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
+	struct intel_plane *intel_plane;
 	uint16_t ddb_blocks;
-	int i;
+	enum pipe pipe = intel_crtc->pipe;
 
-	for (i = 0; i < num_planes; i++) {
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		int i = skl_wm_plane_id(intel_plane);
+
 		ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
 
 		result->plane_en[i] = skl_compute_plane_wm(dev_priv,
-						p, &p->plane[i],
+						cstate,
+						intel_plane,
 						ddb_blocks,
 						level,
 						&result->plane_res_b[i],
 						&result->plane_res_l[i]);
 	}
-
-	ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]);
-	result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p,
-						 &p->plane[PLANE_CURSOR],
-						 ddb_blocks, level,
-						 &result->plane_res_b[PLANE_CURSOR],
-						 &result->plane_res_l[PLANE_CURSOR]);
 }
 
 static uint32_t
-skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
+skl_compute_linetime_wm(struct intel_crtc_state *cstate)
 {
-	if (!to_intel_crtc(crtc)->active)
+	if (!cstate->base.active)
 		return 0;
 
-	if (WARN_ON(p->pixel_rate == 0))
+	if (WARN_ON(skl_pipe_pixel_rate(cstate) == 0))
 		return 0;
 
-	return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
+	return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000,
+			    skl_pipe_pixel_rate(cstate));
 }
 
-static void skl_compute_transition_wm(struct drm_crtc *crtc,
-				      struct skl_pipe_wm_parameters *params,
+static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
 				      struct skl_wm_level *trans_wm /* out */)
 {
+	struct drm_crtc *crtc = cstate->base.crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int i;
+	struct intel_plane *intel_plane;
 
-	if (!params->active)
+	if (!cstate->base.active)
 		return;
 
 	/* Until we know more, just disable transition WMs */
-	for (i = 0; i < intel_num_planes(intel_crtc); i++)
+	for_each_intel_plane_on_crtc(crtc->dev, intel_crtc, intel_plane) {
+		int i = skl_wm_plane_id(intel_plane);
+
 		trans_wm->plane_en[i] = false;
-	trans_wm->plane_en[PLANE_CURSOR] = false;
+	}
 }
 
-static void skl_compute_pipe_wm(struct drm_crtc *crtc,
+static void skl_compute_pipe_wm(struct intel_crtc_state *cstate,
 				struct skl_ddb_allocation *ddb,
-				struct skl_pipe_wm_parameters *params,
 				struct skl_pipe_wm *pipe_wm)
 {
-	struct drm_device *dev = crtc->dev;
+	struct drm_device *dev = cstate->base.crtc->dev;
 	const struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int level, max_level = ilk_wm_max_level(dev);
 
 	for (level = 0; level <= max_level; level++) {
-		skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
-				     level, intel_num_planes(intel_crtc),
-				     &pipe_wm->wm[level]);
+		skl_compute_wm_level(dev_priv, ddb, cstate,
+				     level, &pipe_wm->wm[level]);
 	}
-	pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
+	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
 
-	skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
+	skl_compute_transition_wm(cstate, &pipe_wm->trans_wm);
 }
 
 static void skl_compute_wm_results(struct drm_device *dev,
-				   struct skl_pipe_wm_parameters *p,
 				   struct skl_pipe_wm *p_wm,
 				   struct skl_wm_values *r,
 				   struct intel_crtc *intel_crtc)
@@ -3346,7 +3290,8 @@
 	r->wm_linetime[pipe] = p_wm->linetime;
 }
 
-static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
+static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
+				i915_reg_t reg,
 				const struct skl_ddb_entry *entry)
 {
 	if (entry->end)
@@ -3533,28 +3478,25 @@
 }
 
 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
-			       struct skl_pipe_wm_parameters *params,
-			       struct intel_wm_config *config,
 			       struct skl_ddb_allocation *ddb, /* out */
 			       struct skl_pipe_wm *pipe_wm /* out */)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
 
-	skl_compute_wm_pipe_parameters(crtc, params);
-	skl_allocate_pipe_ddb(crtc, config, params, ddb);
-	skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
+	skl_allocate_pipe_ddb(cstate, ddb);
+	skl_compute_pipe_wm(cstate, ddb, pipe_wm);
 
-	if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
+	if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm)))
 		return false;
 
-	intel_crtc->wm.skl_active = *pipe_wm;
+	intel_crtc->wm.active.skl = *pipe_wm;
 
 	return true;
 }
 
 static void skl_update_other_pipe_wm(struct drm_device *dev,
 				     struct drm_crtc *crtc,
-				     struct intel_wm_config *config,
 				     struct skl_wm_values *r)
 {
 	struct intel_crtc *intel_crtc;
@@ -3575,7 +3517,6 @@
 	 */
 	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
 				base.head) {
-		struct skl_pipe_wm_parameters params = {};
 		struct skl_pipe_wm pipe_wm = {};
 		bool wm_changed;
 
@@ -3586,7 +3527,6 @@
 			continue;
 
 		wm_changed = skl_update_pipe_wm(&intel_crtc->base,
-						&params, config,
 						&r->ddb, &pipe_wm);
 
 		/*
@@ -3596,7 +3536,7 @@
 		 */
 		WARN_ON(!wm_changed);
 
-		skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
+		skl_compute_wm_results(dev, &pipe_wm, r, intel_crtc);
 		r->dirty[intel_crtc->pipe] = true;
 	}
 }
@@ -3626,10 +3566,9 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct skl_pipe_wm_parameters params = {};
 	struct skl_wm_values *results = &dev_priv->wm.skl_results;
-	struct skl_pipe_wm pipe_wm = {};
-	struct intel_wm_config config = {};
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+	struct skl_pipe_wm *pipe_wm = &cstate->wm.optimal.skl;
 
 
 	/* Clear all dirty flags */
@@ -3637,16 +3576,13 @@
 
 	skl_clear_wm(results, intel_crtc->pipe);
 
-	skl_compute_wm_global_parameters(dev, &config);
-
-	if (!skl_update_pipe_wm(crtc, &params, &config,
-				&results->ddb, &pipe_wm))
+	if (!skl_update_pipe_wm(crtc, &results->ddb, pipe_wm))
 		return;
 
-	skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
+	skl_compute_wm_results(dev, pipe_wm, results, intel_crtc);
 	results->dirty[intel_crtc->pipe] = true;
 
-	skl_update_other_pipe_wm(dev, crtc, &config, results);
+	skl_update_other_pipe_wm(dev, crtc, results);
 	skl_write_wm_values(dev_priv, results);
 	skl_flush_wm_values(dev_priv, results);
 
@@ -3654,71 +3590,23 @@
 	dev_priv->wm.skl_hw = *results;
 }
 
-static void
-skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
-		     uint32_t sprite_width, uint32_t sprite_height,
-		     int pixel_size, bool enabled, bool scaled)
+static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
 {
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane->state->fb;
-
-	intel_plane->wm.enabled = enabled;
-	intel_plane->wm.scaled = scaled;
-	intel_plane->wm.horiz_pixels = sprite_width;
-	intel_plane->wm.vert_pixels = sprite_height;
-	intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
-
-	/* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
-	intel_plane->wm.bytes_per_pixel =
-		(fb && fb->pixel_format == DRM_FORMAT_NV12) ?
-		drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
-	intel_plane->wm.y_bytes_per_pixel =
-		(fb && fb->pixel_format == DRM_FORMAT_NV12) ?
-		drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
-
-	/*
-	 * Framebuffer can be NULL on plane disable, but it does not
-	 * matter for watermarks if we assume no tiling in that case.
-	 */
-	if (fb)
-		intel_plane->wm.tiling = fb->modifier[0];
-	intel_plane->wm.rotation = plane->state->rotation;
-
-	skl_update_wm(crtc);
-}
-
-static void ilk_update_wm(struct drm_crtc *crtc)
-{
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
 	struct ilk_wm_maximums max;
+	struct intel_wm_config *config = &dev_priv->wm.config;
 	struct ilk_wm_values results = {};
 	enum intel_ddb_partitioning partitioning;
-	struct intel_pipe_wm pipe_wm = {};
-	struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
-	struct intel_wm_config config = {};
 
-	WARN_ON(cstate->base.active != intel_crtc->active);
-
-	intel_compute_pipe_wm(cstate, &pipe_wm);
-
-	if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
-		return;
-
-	intel_crtc->wm.active = pipe_wm;
-
-	ilk_compute_wm_config(dev, &config);
-
-	ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
-	ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
+	ilk_compute_wm_maximums(dev, 1, config, INTEL_DDB_PART_1_2, &max);
+	ilk_wm_merge(dev, config, &max, &lp_wm_1_2);
 
 	/* 5/6 split only in single pipe config on IVB+ */
 	if (INTEL_INFO(dev)->gen >= 7 &&
-	    config.num_pipes_active == 1 && config.sprites_enabled) {
-		ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
-		ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
+	    config->num_pipes_active == 1 && config->sprites_enabled) {
+		ilk_compute_wm_maximums(dev, 1, config, INTEL_DDB_PART_5_6, &max);
+		ilk_wm_merge(dev, config, &max, &lp_wm_5_6);
 
 		best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
 	} else {
@@ -3733,14 +3621,13 @@
 	ilk_write_wm_values(dev_priv, &results);
 }
 
-static void
-ilk_update_sprite_wm(struct drm_plane *plane,
-		     struct drm_crtc *crtc,
-		     uint32_t sprite_width, uint32_t sprite_height,
-		     int pixel_size, bool enabled, bool scaled)
+static void ilk_update_wm(struct drm_crtc *crtc)
 {
-	struct drm_device *dev = plane->dev;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+
+	WARN_ON(cstate->base.active != intel_crtc->active);
 
 	/*
 	 * IVB workaround: must disable low power watermarks for at least
@@ -3749,10 +3636,14 @@
 	 *
 	 * WaCxSRDisabledForSpriteScaling:ivb
 	 */
-	if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
-		intel_wait_for_vblank(dev, intel_plane->pipe);
+	if (cstate->disable_lp_wm) {
+		ilk_disable_lp_wm(crtc->dev);
+		intel_wait_for_vblank(crtc->dev, intel_crtc->pipe);
+	}
 
-	ilk_update_wm(crtc);
+	intel_crtc->wm.active.ilk = cstate->wm.optimal.ilk;
+
+	ilk_program_watermarks(dev_priv);
 }
 
 static void skl_pipe_wm_active_state(uint32_t val,
@@ -3805,7 +3696,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+	struct skl_pipe_wm *active = &cstate->wm.optimal.skl;
 	enum pipe pipe = intel_crtc->pipe;
 	int level, i, max_level;
 	uint32_t temp;
@@ -3849,6 +3741,8 @@
 
 	temp = hw->plane_trans[pipe][PLANE_CURSOR];
 	skl_pipe_wm_active_state(temp, active, true, true, i, 0);
+
+	intel_crtc->wm.active.skl = *active;
 }
 
 void skl_wm_get_hw_state(struct drm_device *dev)
@@ -3868,9 +3762,10 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct ilk_wm_values *hw = &dev_priv->wm.hw;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_pipe_wm *active = &intel_crtc->wm.active;
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+	struct intel_pipe_wm *active = &cstate->wm.optimal.ilk;
 	enum pipe pipe = intel_crtc->pipe;
-	static const unsigned int wm0_pipe_reg[] = {
+	static const i915_reg_t wm0_pipe_reg[] = {
 		[PIPE_A] = WM0_PIPEA_ILK,
 		[PIPE_B] = WM0_PIPEB_ILK,
 		[PIPE_C] = WM0_PIPEC_IVB,
@@ -3907,6 +3802,8 @@
 		for (level = 0; level <= max_level; level++)
 			active->wm[level].enable = true;
 	}
+
+	intel_crtc->wm.active.ilk = *active;
 }
 
 #define _FW_WM(value, plane) \
@@ -4132,21 +4029,6 @@
 		dev_priv->display.update_wm(crtc);
 }
 
-void intel_update_sprite_watermarks(struct drm_plane *plane,
-				    struct drm_crtc *crtc,
-				    uint32_t sprite_width,
-				    uint32_t sprite_height,
-				    int pixel_size,
-				    bool enabled, bool scaled)
-{
-	struct drm_i915_private *dev_priv = plane->dev->dev_private;
-
-	if (dev_priv->display.update_sprite_wm)
-		dev_priv->display.update_sprite_wm(plane, crtc,
-						   sprite_width, sprite_height,
-						   pixel_size, enabled, scaled);
-}
-
 /**
  * Lock protecting IPS related data structures
  */
@@ -4414,7 +4296,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
-	if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0))
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		return;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@ -4689,7 +4571,8 @@
 	dev_priv->rps.max_freq		= dev_priv->rps.rp0_freq;
 
 	dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
-	if (IS_HASWELL(dev) || IS_BROADWELL(dev) || IS_SKYLAKE(dev)) {
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev) ||
+	    IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		ret = sandybridge_pcode_read(dev_priv,
 					HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
 					&ddcc_status);
@@ -4701,7 +4584,7 @@
 					dev_priv->rps.max_freq);
 	}
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		/* Store the frequency values in 16.66 MHZ units, which is
 		   the natural hardware unit for SKL */
 		dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
@@ -4738,7 +4621,7 @@
 	gen6_init_rps_frequencies(dev);
 
 	/* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
-	if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 		return;
 	}
@@ -4783,7 +4666,7 @@
 
 	/* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
 	if (IS_SKYLAKE(dev) && !((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) &&
-				 (INTEL_REVID(dev) <= SKL_REVID_E0)))
+				 IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
 		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
 	else
 		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
@@ -4807,8 +4690,8 @@
 	DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
 			"on" : "off");
 	/* WaRsUseTimeoutMode */
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_D0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_A0)) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
 		I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
 			   GEN7_RC_CTL_TO_MODE |
@@ -4824,8 +4707,9 @@
 	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
 	 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
 	 */
-	if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
-	    ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_E0)))
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1) ||
+	    ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) &&
+	     IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
 		I915_WRITE(GEN9_PG_ENABLE, 0);
 	else
 		I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
@@ -5056,7 +4940,7 @@
 	/* convert DDR frequency from units of 266.6MHz to bandwidth */
 	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		/* Convert GT frequency to 50 HZ units */
 		min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
 		max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
@@ -5074,7 +4958,7 @@
 		int diff = max_gpu_freq - gpu_freq;
 		unsigned int ia_freq = 0, ring_freq = 0;
 
-		if (IS_SKYLAKE(dev)) {
+		if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 			/*
 			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
 			 * No floor required for ring frequency on SKL.
@@ -6202,7 +6086,7 @@
 	} else if (INTEL_INFO(dev)->gen >= 9) {
 		gen9_enable_rc6(dev);
 		gen9_enable_rps(dev);
-		if (IS_SKYLAKE(dev))
+		if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 			__gen6_update_ring_freq(dev);
 	} else if (IS_BROADWELL(dev)) {
 		gen8_enable_rps(dev);
@@ -7058,7 +6942,6 @@
 			dev_priv->display.init_clock_gating =
 				bxt_init_clock_gating;
 		dev_priv->display.update_wm = skl_update_wm;
-		dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		ilk_setup_wm_latency(dev);
 
@@ -7067,7 +6950,7 @@
 		    (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
 		     dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
 			dev_priv->display.update_wm = ilk_update_wm;
-			dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
+			dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
 		} else {
 			DRM_DEBUG_KMS("Failed to read display plane latency. "
 				      "Disable CxSR\n");
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 213581c..bc5ea2a 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -80,7 +80,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
 	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
-	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
+	i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
 	uint32_t *data = (uint32_t *) vsc_psr;
 	unsigned int i;
 
@@ -151,13 +151,31 @@
 			   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
 }
 
+static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return DP_AUX_CH_CTL(port);
+	else
+		return EDP_PSR_AUX_CTL;
+}
+
+static i915_reg_t psr_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return DP_AUX_CH_DATA(port, index);
+	else
+		return EDP_PSR_AUX_DATA(index);
+}
+
 static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t aux_clock_divider;
-	uint32_t aux_data_reg, aux_ctl_reg;
+	i915_reg_t aux_ctl_reg;
 	int precharge = 0x3;
 	static const uint8_t aux_msg[] = {
 		[0] = DP_AUX_NATIVE_WRITE << 4,
@@ -166,6 +184,7 @@
 		[3] = 1 - 1,
 		[4] = DP_SET_POWER_D0,
 	};
+	enum port port = dig_port->port;
 	int i;
 
 	BUILD_BUG_ON(sizeof(aux_msg) > 20);
@@ -181,14 +200,11 @@
 				DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF,
 				DP_AUX_FRAME_SYNC_ENABLE);
 
-	aux_data_reg = (INTEL_INFO(dev)->gen >= 9) ?
-				DPA_AUX_CH_DATA1 : EDP_PSR_AUX_DATA1(dev);
-	aux_ctl_reg = (INTEL_INFO(dev)->gen >= 9) ?
-				DPA_AUX_CH_CTL : EDP_PSR_AUX_CTL(dev);
+	aux_ctl_reg = psr_aux_ctl_reg(dev_priv, port);
 
 	/* Setup AUX registers */
 	for (i = 0; i < sizeof(aux_msg); i += 4)
-		I915_WRITE(aux_data_reg + i,
+		I915_WRITE(psr_aux_data_reg(dev_priv, port, i >> 2),
 			   intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i));
 
 	if (INTEL_INFO(dev)->gen >= 9) {
@@ -267,16 +283,11 @@
 	const uint32_t link_entry_time = EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES;
 
 	if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT) {
-		/* It doesn't mean we shouldn't send TPS patters, so let's
-		   send the minimal TP1 possible and skip TP2. */
-		val |= EDP_PSR_TP1_TIME_100us;
-		val |= EDP_PSR_TP2_TP3_TIME_0us;
-		val |= EDP_PSR_SKIP_AUX_EXIT;
 		/* Sink should be able to train with the 5 or 6 idle patterns */
 		idle_frames += 4;
 	}
 
-	I915_WRITE(EDP_PSR_CTL(dev), val |
+	I915_WRITE(EDP_PSR_CTL, val |
 		   (IS_BROADWELL(dev) ? 0 : link_entry_time) |
 		   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
 		   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
@@ -340,7 +351,7 @@
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	WARN_ON(I915_READ(EDP_PSR_CTL(dev)) & EDP_PSR_ENABLE);
+	WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
 	WARN_ON(dev_priv->psr.active);
 	lockdep_assert_held(&dev_priv->psr.lock);
 
@@ -404,7 +415,7 @@
 		}
 
 		/* Avoid continuous PSR exit by masking memup and hpd */
-		I915_WRITE(EDP_PSR_DEBUG_CTL(dev), EDP_PSR_DEBUG_MASK_MEMUP |
+		I915_WRITE(EDP_PSR_DEBUG_CTL, EDP_PSR_DEBUG_MASK_MEMUP |
 			   EDP_PSR_DEBUG_MASK_HPD);
 
 		/* Enable PSR on the panel */
@@ -427,6 +438,19 @@
 		vlv_psr_enable_source(intel_dp);
 	}
 
+	/*
+	 * FIXME: Activation should happen immediately since this function
+	 * is just called after pipe is fully trained and enabled.
+	 * However on every platform we face issues when first activation
+	 * follows a modeset so quickly.
+	 *     - On VLV/CHV we get bank screen on first activation
+	 *     - On HSW/BDW we get a recoverable frozen screen until next
+	 *       exit-activate sequence.
+	 */
+	if (INTEL_INFO(dev)->gen < 9)
+		schedule_delayed_work(&dev_priv->psr.work,
+				      msecs_to_jiffies(intel_dp->panel_power_cycle_delay * 5));
+
 	dev_priv->psr.enabled = intel_dp;
 unlock:
 	mutex_unlock(&dev_priv->psr.lock);
@@ -466,17 +490,17 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->psr.active) {
-		I915_WRITE(EDP_PSR_CTL(dev),
-			   I915_READ(EDP_PSR_CTL(dev)) & ~EDP_PSR_ENABLE);
+		I915_WRITE(EDP_PSR_CTL,
+			   I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE);
 
 		/* Wait till PSR is idle */
-		if (_wait_for((I915_READ(EDP_PSR_STATUS_CTL(dev)) &
+		if (_wait_for((I915_READ(EDP_PSR_STATUS_CTL) &
 			       EDP_PSR_STATUS_STATE_MASK) == 0, 2000, 10))
 			DRM_ERROR("Timed out waiting for PSR Idle State\n");
 
 		dev_priv->psr.active = false;
 	} else {
-		WARN_ON(I915_READ(EDP_PSR_CTL(dev)) & EDP_PSR_ENABLE);
+		WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
 	}
 }
 
@@ -523,7 +547,7 @@
 	 * and be ready for re-enable.
 	 */
 	if (HAS_DDI(dev_priv->dev)) {
-		if (wait_for((I915_READ(EDP_PSR_STATUS_CTL(dev_priv->dev)) &
+		if (wait_for((I915_READ(EDP_PSR_STATUS_CTL) &
 			      EDP_PSR_STATUS_STATE_MASK) == 0, 50)) {
 			DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n");
 			return;
@@ -566,11 +590,11 @@
 		return;
 
 	if (HAS_DDI(dev)) {
-		val = I915_READ(EDP_PSR_CTL(dev));
+		val = I915_READ(EDP_PSR_CTL);
 
 		WARN_ON(!(val & EDP_PSR_ENABLE));
 
-		I915_WRITE(EDP_PSR_CTL(dev), val & ~EDP_PSR_ENABLE);
+		I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE);
 	} else {
 		val = I915_READ(VLV_PSRCTL(pipe));
 
@@ -700,7 +724,6 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 	enum pipe pipe;
-	int delay_ms = HAS_DDI(dev) ? 100 : 500;
 
 	mutex_lock(&dev_priv->psr.lock);
 	if (!dev_priv->psr.enabled) {
@@ -735,8 +758,9 @@
 	}
 
 	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
-		schedule_delayed_work(&dev_priv->psr.work,
-				      msecs_to_jiffies(delay_ms));
+		if (!work_busy(&dev_priv->psr.work.work))
+			schedule_delayed_work(&dev_priv->psr.work,
+					      msecs_to_jiffies(100));
 	mutex_unlock(&dev_priv->psr.lock);
 }
 
@@ -751,6 +775,9 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ?
+		HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE;
+
 	INIT_DELAYED_WORK(&dev_priv->psr.work, intel_psr_work);
 	mutex_init(&dev_priv->psr.lock);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9461a23..57d78f2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -481,7 +481,7 @@
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 mmio = 0;
+	i915_reg_t mmio;
 
 	/* The ring status page addresses are no longer next to the rest of
 	 * the ring registers as of gen7.
@@ -524,7 +524,7 @@
 	 * invalidating the TLB?
 	 */
 	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
-		u32 reg = RING_INSTPM(ring->mmio_base);
+		i915_reg_t reg = RING_INSTPM(ring->mmio_base);
 
 		/* ring should be idle before issuing a sync flush*/
 		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
@@ -733,7 +733,7 @@
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
 	for (i = 0; i < w->count; i++) {
-		intel_ring_emit(ring, w->reg[i].addr);
+		intel_ring_emit_reg(ring, w->reg[i].addr);
 		intel_ring_emit(ring, w->reg[i].value);
 	}
 	intel_ring_emit(ring, MI_NOOP);
@@ -766,7 +766,8 @@
 }
 
 static int wa_add(struct drm_i915_private *dev_priv,
-		  const u32 addr, const u32 mask, const u32 val)
+		  i915_reg_t addr,
+		  const u32 mask, const u32 val)
 {
 	const u32 idx = dev_priv->workarounds.count;
 
@@ -924,17 +925,15 @@
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
-	    INTEL_REVID(dev) == SKL_REVID_B0)) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
-		/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
+	/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 				  GEN9_DG_MIRROR_FIX_ENABLE);
-	}
 
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
-		/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
+	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
 				  GEN9_RHWO_OPTIMIZATION_DISABLE);
 		/*
@@ -944,12 +943,10 @@
 		 */
 	}
 
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
-	    IS_BROXTON(dev)) {
-		/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
+	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
 		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 				  GEN9_ENABLE_YV12_BUGFIX);
-	}
 
 	/* Wa4x4STCOptimizationDisable:skl,bxt */
 	/* WaDisablePartialResolveInVc:skl,bxt */
@@ -961,24 +958,22 @@
 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
 	/* WaDisableMaskBasedCammingInRCC:skl,bxt */
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
 				  PIXEL_MASK_CAMMING_DISABLE);
 
 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
 	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+	if (IS_SKL_REVID(dev, SKL_REVID_F0, SKL_REVID_F0) ||
+	    IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
 		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
 	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
 
 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
-	if (IS_SKYLAKE(dev) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) {
+	if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
-	}
 
 	/* WaDisableSTUnitPowerOptimization:skl,bxt */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
@@ -1038,7 +1033,7 @@
 	if (ret)
 		return ret;
 
-	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
 		/* WaDisableHDCInvalidation:skl */
 		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
 			   BDW_DISABLE_HDC_INVALIDATION);
@@ -1051,23 +1046,23 @@
 	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
 	 * involving this register should also be added to WA batch as required.
 	 */
-	if (INTEL_REVID(dev) <= SKL_REVID_E0)
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
 		/* WaDisableLSQCROPERFforOCL:skl */
 		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
 			   GEN8_LQSC_RO_PERF_DIS);
 
 	/* WaEnableGapsTsvCreditFix:skl */
-	if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
 		I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
 					   GEN9_GAPS_TSV_CREDIT_DISABLE));
 	}
 
 	/* WaDisablePowerCompilerClockGating:skl */
-	if (INTEL_REVID(dev) == SKL_REVID_B0)
+	if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
 		WA_SET_BIT_MASKED(HIZ_CHICKEN,
 				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
 		/*
 		 *Use Force Non-Coherent whenever executing a 3D context. This
 		 * is a workaround for a possible hang in the unlikely event
@@ -1078,19 +1073,17 @@
 				  HDC_FORCE_NON_COHERENT);
 	}
 
-	if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-	    INTEL_REVID(dev) == SKL_REVID_D0)
-		/* WaBarrierPerformanceFixDisable:skl */
+	/* WaBarrierPerformanceFixDisable:skl */
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
 				  HDC_FENCE_DEST_SLM_DISABLE |
 				  HDC_BARRIER_PERFORMANCE_DISABLE);
 
 	/* WaDisableSbeCacheDispatchPortSharing:skl */
-	if (INTEL_REVID(dev) <= SKL_REVID_F0) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
 		WA_SET_BIT_MASKED(
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-	}
 
 	return skl_tune_iz_hashing(ring);
 }
@@ -1107,11 +1100,11 @@
 
 	/* WaStoreMultiplePTEenable:bxt */
 	/* This is a requirement according to Hardware specification */
-	if (INTEL_REVID(dev) == BXT_REVID_A0)
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
 
 	/* WaSetClckGatingDisableMedia:bxt */
-	if (INTEL_REVID(dev) == BXT_REVID_A0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
 					    ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
 	}
@@ -1121,7 +1114,7 @@
 			  STALL_DOP_GATING_DISABLE);
 
 	/* WaDisableSbeCacheDispatchPortSharing:bxt */
-	if (INTEL_REVID(dev) <= BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
 		WA_SET_BIT_MASKED(
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
@@ -1319,11 +1312,13 @@
 		return ret;
 
 	for_each_ring(useless, dev_priv, i) {
-		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
-		if (mbox_reg != GEN6_NOSYNC) {
+		i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];
+
+		if (i915_mmio_reg_valid(mbox_reg)) {
 			u32 seqno = i915_gem_request_get_seqno(signaller_req);
+
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
-			intel_ring_emit(signaller, mbox_reg);
+			intel_ring_emit_reg(signaller, mbox_reg);
 			intel_ring_emit(signaller, seqno);
 		}
 	}
@@ -2004,11 +1999,35 @@
 
 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
-	iounmap(ringbuf->virtual_start);
+	if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
+		vunmap(ringbuf->virtual_start);
+	else
+		iounmap(ringbuf->virtual_start);
 	ringbuf->virtual_start = NULL;
 	i915_gem_object_ggtt_unpin(ringbuf->obj);
 }
 
+static u32 *vmap_obj(struct drm_i915_gem_object *obj)
+{
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	void *addr;
+	int i;
+
+	pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
+	if (pages == NULL)
+		return NULL;
+
+	i = 0;
+	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0)
+		pages[i++] = sg_page_iter_page(&sg_iter);
+
+	addr = vmap(pages, i, 0, PAGE_KERNEL);
+	drm_free_large(pages);
+
+	return addr;
+}
+
 int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 				     struct intel_ringbuffer *ringbuf)
 {
@@ -2016,21 +2035,39 @@
 	struct drm_i915_gem_object *obj = ringbuf->obj;
 	int ret;
 
-	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
-	if (ret)
-		return ret;
+	if (HAS_LLC(dev_priv) && !obj->stolen) {
+		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0);
+		if (ret)
+			return ret;
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret) {
-		i915_gem_object_ggtt_unpin(obj);
-		return ret;
-	}
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret) {
+			i915_gem_object_ggtt_unpin(obj);
+			return ret;
+		}
 
-	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
-			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
-	if (ringbuf->virtual_start == NULL) {
-		i915_gem_object_ggtt_unpin(obj);
-		return -EINVAL;
+		ringbuf->virtual_start = vmap_obj(obj);
+		if (ringbuf->virtual_start == NULL) {
+			i915_gem_object_ggtt_unpin(obj);
+			return -ENOMEM;
+		}
+	} else {
+		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
+		if (ret)
+			return ret;
+
+		ret = i915_gem_object_set_to_gtt_domain(obj, true);
+		if (ret) {
+			i915_gem_object_ggtt_unpin(obj);
+			return ret;
+		}
+
+		ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
+						    i915_gem_obj_ggtt_offset(obj), ringbuf->size);
+		if (ringbuf->virtual_start == NULL) {
+			i915_gem_object_ggtt_unpin(obj);
+			return -EINVAL;
+		}
 	}
 
 	return 0;
@@ -2070,10 +2107,14 @@
 	int ret;
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-	if (ring == NULL)
+	if (ring == NULL) {
+		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
+				 engine->name);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	ring->ring = engine;
+	list_add(&ring->link, &engine->buffers);
 
 	ring->size = size;
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -2089,8 +2130,9 @@
 
 	ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
 	if (ret) {
-		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
-			  engine->name, ret);
+		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
+				 engine->name, ret);
+		list_del(&ring->link);
 		kfree(ring);
 		return ERR_PTR(ret);
 	}
@@ -2102,6 +2144,7 @@
 intel_ringbuffer_free(struct intel_ringbuffer *ring)
 {
 	intel_destroy_ringbuffer_obj(ring);
+	list_del(&ring->link);
 	kfree(ring);
 }
 
@@ -2117,6 +2160,7 @@
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	INIT_LIST_HEAD(&ring->buffers);
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 49fa41d..5d1eb20 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -100,6 +100,7 @@
 	void __iomem *virtual_start;
 
 	struct intel_engine_cs *ring;
+	struct list_head link;
 
 	u32 head;
 	u32 tail;
@@ -157,6 +158,7 @@
 	u32		mmio_base;
 	struct		drm_device *dev;
 	struct intel_ringbuffer *buffer;
+	struct list_head buffers;
 
 	/*
 	 * A pool of objects to use as shadow copies of client batch buffers
@@ -247,7 +249,7 @@
 				/* our mbox written by others */
 				u32		wait[I915_NUM_RINGS];
 				/* mboxes this ring signals to */
-				u32		signal[I915_NUM_RINGS];
+				i915_reg_t	signal[I915_NUM_RINGS];
 			} mbox;
 			u64		signal_ggtt[I915_NUM_RINGS];
 		};
@@ -441,6 +443,11 @@
 	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
+static inline void intel_ring_emit_reg(struct intel_engine_cs *ring,
+				       i915_reg_t reg)
+{
+	intel_ring_emit(ring, i915_mmio_reg_offset(reg));
+}
 static inline void intel_ring_advance(struct intel_engine_cs *ring)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 13f1420..afca6c9 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -49,9 +49,6 @@
  * present for a given platform.
  */
 
-#define GEN9_ENABLE_DC5(dev) 0
-#define SKL_ENABLE_DC6(dev) IS_SKYLAKE(dev)
-
 #define for_each_power_well(i, power_well, domain_mask, power_domains)	\
 	for (i = 0;							\
 	     i < (power_domains)->power_well_count &&			\
@@ -244,12 +241,6 @@
 		gen8_irq_power_well_post_enable(dev_priv,
 						1 << PIPE_C | 1 << PIPE_B);
 	}
-
-	if (power_well->data == SKL_DISP_PW_1) {
-		if (!dev_priv->power_domains.initializing)
-			intel_prepare_ddi(dev);
-		gen8_irq_power_well_post_enable(dev_priv, 1 << PIPE_A);
-	}
 }
 
 static void hsw_set_power_well(struct drm_i915_private *dev_priv,
@@ -292,58 +283,38 @@
 	BIT(POWER_DOMAIN_TRANSCODER_C) |		\
 	BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
 	BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_E_2_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_E_LANES) |		\
 	BIT(POWER_DOMAIN_AUX_B) |                       \
 	BIT(POWER_DOMAIN_AUX_C) |			\
 	BIT(POWER_DOMAIN_AUX_D) |			\
 	BIT(POWER_DOMAIN_AUDIO) |			\
 	BIT(POWER_DOMAIN_VGA) |				\
 	BIT(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS (		\
-	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	BIT(POWER_DOMAIN_PLLS) |			\
-	BIT(POWER_DOMAIN_PIPE_A) |			\
-	BIT(POWER_DOMAIN_TRANSCODER_EDP) |		\
-	BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
-	BIT(POWER_DOMAIN_AUX_A) |			\
-	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_E_2_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_E_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_B_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_C_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_D_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_MISC_IO_POWER_DOMAINS (		\
-	SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS |		\
-	BIT(POWER_DOMAIN_PLLS) |			\
+#define SKL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT(POWER_DOMAIN_MODESET) |			\
+	BIT(POWER_DOMAIN_AUX_A) |			\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_ALWAYS_ON_POWER_DOMAINS (		\
-	(POWER_DOMAIN_MASK & ~(SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS |	\
+	(POWER_DOMAIN_MASK & ~(				\
 	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_A_E_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_B_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_C_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_D_POWER_DOMAINS |		\
-	SKL_DISPLAY_MISC_IO_POWER_DOMAINS)) |		\
+	SKL_DISPLAY_DC_OFF_POWER_DOMAINS)) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
@@ -354,25 +325,28 @@
 	BIT(POWER_DOMAIN_TRANSCODER_C) |		\
 	BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
 	BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
 	BIT(POWER_DOMAIN_AUX_B) |			\
 	BIT(POWER_DOMAIN_AUX_C) |			\
 	BIT(POWER_DOMAIN_AUDIO) |			\
 	BIT(POWER_DOMAIN_VGA) |				\
+	BIT(POWER_DOMAIN_GMBUS) |			\
 	BIT(POWER_DOMAIN_INIT))
 #define BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS (		\
 	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
 	BIT(POWER_DOMAIN_PIPE_A) |			\
 	BIT(POWER_DOMAIN_TRANSCODER_EDP) |		\
 	BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
 	BIT(POWER_DOMAIN_AUX_A) |			\
 	BIT(POWER_DOMAIN_PLLS) |			\
 	BIT(POWER_DOMAIN_INIT))
+#define BXT_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT(POWER_DOMAIN_MODESET) |			\
+	BIT(POWER_DOMAIN_AUX_A) |			\
+	BIT(POWER_DOMAIN_INIT))
 #define BXT_DISPLAY_ALWAYS_ON_POWER_DOMAINS (		\
 	(POWER_DOMAIN_MASK & ~(BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS |	\
 	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS)) |	\
@@ -416,34 +390,6 @@
 	  */
 }
 
-void bxt_enable_dc9(struct drm_i915_private *dev_priv)
-{
-	uint32_t val;
-
-	assert_can_enable_dc9(dev_priv);
-
-	DRM_DEBUG_KMS("Enabling DC9\n");
-
-	val = I915_READ(DC_STATE_EN);
-	val |= DC_STATE_EN_DC9;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
-}
-
-void bxt_disable_dc9(struct drm_i915_private *dev_priv)
-{
-	uint32_t val;
-
-	assert_can_disable_dc9(dev_priv);
-
-	DRM_DEBUG_KMS("Disabling DC9\n");
-
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_DC9;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
-}
-
 static void gen9_set_dc_state_debugmask_memory_up(
 			struct drm_i915_private *dev_priv)
 {
@@ -458,6 +404,62 @@
 	}
 }
 
+static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
+{
+	uint32_t val;
+	uint32_t mask;
+
+	mask = DC_STATE_EN_UPTO_DC5;
+	if (IS_BROXTON(dev_priv))
+		mask |= DC_STATE_EN_DC9;
+	else
+		mask |= DC_STATE_EN_UPTO_DC6;
+
+	WARN_ON_ONCE(state & ~mask);
+
+	if (i915.enable_dc == 0)
+		state = DC_STATE_DISABLE;
+	else if (i915.enable_dc == 1 && state > DC_STATE_EN_UPTO_DC5)
+		state = DC_STATE_EN_UPTO_DC5;
+
+	if (state & DC_STATE_EN_UPTO_DC5_DC6_MASK)
+		gen9_set_dc_state_debugmask_memory_up(dev_priv);
+
+	val = I915_READ(DC_STATE_EN);
+	DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
+		      val & mask, state);
+	val &= ~mask;
+	val |= state;
+	I915_WRITE(DC_STATE_EN, val);
+	POSTING_READ(DC_STATE_EN);
+}
+
+void bxt_enable_dc9(struct drm_i915_private *dev_priv)
+{
+	assert_can_enable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Enabling DC9\n");
+
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_DC9);
+}
+
+void bxt_disable_dc9(struct drm_i915_private *dev_priv)
+{
+	assert_can_disable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Disabling DC9\n");
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+}
+
+static void assert_csr_loaded(struct drm_i915_private *dev_priv)
+{
+	WARN_ONCE(!I915_READ(CSR_PROGRAM(0)),
+		  "CSR program storage start is NULL\n");
+	WARN_ONCE(!I915_READ(CSR_SSP_BASE), "CSR SSP Base Not fine\n");
+	WARN_ONCE(!I915_READ(CSR_HTP_SKL), "CSR HTP Not fine\n");
+}
+
 static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -478,8 +480,6 @@
 
 static void assert_can_disable_dc5(struct drm_i915_private *dev_priv)
 {
-	bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv,
-					SKL_DISP_PW_2);
 	/*
 	 * During initialization, the firmware may not be loaded yet.
 	 * We still want to make sure that the DC enabling flag is cleared.
@@ -487,40 +487,17 @@
 	if (dev_priv->power_domains.initializing)
 		return;
 
-	WARN_ONCE(!pg2_enabled, "PG2 not enabled to disable DC5.\n");
 	WARN_ONCE(dev_priv->pm.suspended,
 		"Disabling of DC5 while platform is runtime-suspended should never happen.\n");
 }
 
 static void gen9_enable_dc5(struct drm_i915_private *dev_priv)
 {
-	uint32_t val;
-
 	assert_can_enable_dc5(dev_priv);
 
 	DRM_DEBUG_KMS("Enabling DC5\n");
 
-	gen9_set_dc_state_debugmask_memory_up(dev_priv);
-
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC5_DC6_MASK;
-	val |= DC_STATE_EN_UPTO_DC5;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
-}
-
-static void gen9_disable_dc5(struct drm_i915_private *dev_priv)
-{
-	uint32_t val;
-
-	assert_can_disable_dc5(dev_priv);
-
-	DRM_DEBUG_KMS("Disabling DC5\n");
-
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC5;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
 }
 
 static void assert_can_enable_dc6(struct drm_i915_private *dev_priv)
@@ -546,40 +523,37 @@
 	if (dev_priv->power_domains.initializing)
 		return;
 
-	assert_csr_loaded(dev_priv);
 	WARN_ONCE(!(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6),
 		  "DC6 already programmed to be disabled.\n");
 }
 
-static void skl_enable_dc6(struct drm_i915_private *dev_priv)
+static void gen9_disable_dc5_dc6(struct drm_i915_private *dev_priv)
 {
-	uint32_t val;
+	assert_can_disable_dc5(dev_priv);
 
+	if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 && i915.enable_dc != 1)
+		assert_can_disable_dc6(dev_priv);
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+}
+
+void skl_enable_dc6(struct drm_i915_private *dev_priv)
+{
 	assert_can_enable_dc6(dev_priv);
 
 	DRM_DEBUG_KMS("Enabling DC6\n");
 
-	gen9_set_dc_state_debugmask_memory_up(dev_priv);
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
 
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC5_DC6_MASK;
-	val |= DC_STATE_EN_UPTO_DC6;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
 }
 
-static void skl_disable_dc6(struct drm_i915_private *dev_priv)
+void skl_disable_dc6(struct drm_i915_private *dev_priv)
 {
-	uint32_t val;
-
 	assert_can_disable_dc6(dev_priv);
 
 	DRM_DEBUG_KMS("Disabling DC6\n");
 
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC6;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
 }
 
 static void skl_set_power_well(struct drm_i915_private *dev_priv,
@@ -629,20 +603,16 @@
 				!I915_READ(HSW_PWR_WELL_BIOS),
 				"Invalid for power well status to be enabled, unless done by the BIOS, \
 				when request is to disable!\n");
-			if ((GEN9_ENABLE_DC5(dev) || SKL_ENABLE_DC6(dev)) &&
-				power_well->data == SKL_DISP_PW_2) {
-				if (SKL_ENABLE_DC6(dev)) {
-					skl_disable_dc6(dev_priv);
-					/*
-					 * DDI buffer programming unnecessary during driver-load/resume
-					 * as it's already done during modeset initialization then.
-					 * It's also invalid here as encoder list is still uninitialized.
-					 */
-					if (!dev_priv->power_domains.initializing)
-						intel_prepare_ddi(dev);
-				} else {
-					gen9_disable_dc5(dev_priv);
-				}
+			if (power_well->data == SKL_DISP_PW_2) {
+				/*
+				 * DDI buffer programming unnecessary during
+				 * driver-load/resume as it's already done
+				 * during modeset initialization then. It's
+				 * also invalid here as encoder list is still
+				 * uninitialized.
+				 */
+				if (!dev_priv->power_domains.initializing)
+					intel_prepare_ddi(dev);
 			}
 			I915_WRITE(HSW_PWR_WELL_DRIVER, tmp | req_mask);
 		}
@@ -657,34 +627,9 @@
 		}
 	} else {
 		if (enable_requested) {
-			if (IS_SKYLAKE(dev) &&
-				(power_well->data == SKL_DISP_PW_1) &&
-				(intel_csr_load_status_get(dev_priv) == FW_LOADED))
-				DRM_DEBUG_KMS("Not Disabling PW1, dmc will handle\n");
-			else {
-				I915_WRITE(HSW_PWR_WELL_DRIVER,	tmp & ~req_mask);
-				POSTING_READ(HSW_PWR_WELL_DRIVER);
-				DRM_DEBUG_KMS("Disabling %s\n", power_well->name);
-			}
-
-			if ((GEN9_ENABLE_DC5(dev) || SKL_ENABLE_DC6(dev)) &&
-				power_well->data == SKL_DISP_PW_2) {
-				enum csr_state state;
-				/* TODO: wait for a completion event or
-				 * similar here instead of busy
-				 * waiting using wait_for function.
-				 */
-				wait_for((state = intel_csr_load_status_get(dev_priv)) !=
-						FW_UNINITIALIZED, 1000);
-				if (state != FW_LOADED)
-					DRM_DEBUG("CSR firmware not ready (%d)\n",
-							state);
-				else
-					if (SKL_ENABLE_DC6(dev))
-						skl_enable_dc6(dev_priv);
-					else
-						gen9_enable_dc5(dev_priv);
-			}
+			I915_WRITE(HSW_PWR_WELL_DRIVER,	tmp & ~req_mask);
+			POSTING_READ(HSW_PWR_WELL_DRIVER);
+			DRM_DEBUG_KMS("Disabling %s\n", power_well->name);
 		}
 	}
 
@@ -759,6 +704,41 @@
 	skl_set_power_well(dev_priv, power_well, false);
 }
 
+static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0;
+}
+
+static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv,
+					  struct i915_power_well *power_well)
+{
+	gen9_disable_dc5_dc6(dev_priv);
+}
+
+static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 && i915.enable_dc != 1)
+		skl_enable_dc6(dev_priv);
+	else
+		gen9_enable_dc5(dev_priv);
+}
+
+static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	if (power_well->count > 0) {
+		gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+	} else {
+		if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 &&
+		    i915.enable_dc != 1)
+			gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+		else
+			gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
+	}
+}
+
 static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv,
 					   struct i915_power_well *power_well)
 {
@@ -973,10 +953,12 @@
 						 int power_well_id)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *power_well;
 	int i;
 
-	for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) {
+	for (i = 0; i < power_domains->power_well_count; i++) {
+		struct i915_power_well *power_well;
+
+		power_well = &power_domains->power_wells[i];
 		if (power_well->data == power_well_id)
 			return power_well;
 	}
@@ -1457,7 +1439,7 @@
 	for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
 		WARN_ON(!power_well->count);
 
-		if (!--power_well->count && i915.disable_power_well)
+		if (!--power_well->count)
 			intel_power_well_disable(dev_priv, power_well);
 	}
 
@@ -1469,20 +1451,17 @@
 #define HSW_ALWAYS_ON_POWER_DOMAINS (			\
 	BIT(POWER_DOMAIN_PIPE_A) |			\
 	BIT(POWER_DOMAIN_TRANSCODER_EDP) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
 	BIT(POWER_DOMAIN_PORT_CRT) |			\
 	BIT(POWER_DOMAIN_PLLS) |			\
 	BIT(POWER_DOMAIN_AUX_A) |			\
 	BIT(POWER_DOMAIN_AUX_B) |			\
 	BIT(POWER_DOMAIN_AUX_C) |			\
 	BIT(POWER_DOMAIN_AUX_D) |			\
+	BIT(POWER_DOMAIN_GMBUS) |			\
 	BIT(POWER_DOMAIN_INIT))
 #define HSW_DISPLAY_POWER_DOMAINS (				\
 	(POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) |	\
@@ -1499,49 +1478,42 @@
 #define VLV_DISPLAY_POWER_DOMAINS	POWER_DOMAIN_MASK
 
 #define VLV_DPIO_CMN_BC_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_PORT_CRT) |		\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define CHV_DPIO_CMN_BC_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define CHV_DPIO_CMN_D_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_D) |		\
 	BIT(POWER_DOMAIN_INIT))
 
@@ -1589,6 +1561,13 @@
 	.is_enabled = skl_power_well_enabled,
 };
 
+static const struct i915_power_well_ops gen9_dc_off_power_well_ops = {
+	.sync_hw = gen9_dc_off_power_well_sync_hw,
+	.enable = gen9_dc_off_power_well_enable,
+	.disable = gen9_dc_off_power_well_disable,
+	.is_enabled = gen9_dc_off_power_well_enabled,
+};
+
 static struct i915_power_well hsw_power_wells[] = {
 	{
 		.name = "always-on",
@@ -1644,6 +1623,7 @@
 		.always_on = 1,
 		.domains = VLV_ALWAYS_ON_POWER_DOMAINS,
 		.ops = &i9xx_always_on_power_well_ops,
+		.data = PUNIT_POWER_WELL_ALWAYS_ON,
 	},
 	{
 		.name = "display",
@@ -1745,20 +1725,29 @@
 		.always_on = 1,
 		.domains = SKL_DISPLAY_ALWAYS_ON_POWER_DOMAINS,
 		.ops = &i9xx_always_on_power_well_ops,
+		.data = SKL_DISP_PW_ALWAYS_ON,
 	},
 	{
 		.name = "power well 1",
-		.domains = SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS,
+		/* Handled by the DMC firmware */
+		.domains = 0,
 		.ops = &skl_power_well_ops,
 		.data = SKL_DISP_PW_1,
 	},
 	{
 		.name = "MISC IO power well",
-		.domains = SKL_DISPLAY_MISC_IO_POWER_DOMAINS,
+		/* Handled by the DMC firmware */
+		.domains = 0,
 		.ops = &skl_power_well_ops,
 		.data = SKL_DISP_PW_MISC_IO,
 	},
 	{
+		.name = "DC off",
+		.domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.data = SKL_DISP_PW_DC_OFF,
+	},
+	{
 		.name = "power well 2",
 		.domains = SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS,
 		.ops = &skl_power_well_ops,
@@ -1790,6 +1779,34 @@
 	},
 };
 
+void skl_pw1_misc_io_init(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *well;
+
+	if (!IS_SKYLAKE(dev_priv))
+		return;
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_enable(dev_priv, well);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_MISC_IO);
+	intel_power_well_enable(dev_priv, well);
+}
+
+void skl_pw1_misc_io_fini(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *well;
+
+	if (!IS_SKYLAKE(dev_priv))
+		return;
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_disable(dev_priv, well);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_MISC_IO);
+	intel_power_well_disable(dev_priv, well);
+}
+
 static struct i915_power_well bxt_power_wells[] = {
 	{
 		.name = "always-on",
@@ -1804,11 +1821,17 @@
 		.data = SKL_DISP_PW_1,
 	},
 	{
+		.name = "DC off",
+		.domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.data = SKL_DISP_PW_DC_OFF,
+	},
+	{
 		.name = "power well 2",
 		.domains = BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS,
 		.ops = &skl_power_well_ops,
 		.data = SKL_DISP_PW_2,
-	}
+	},
 };
 
 static int
@@ -1845,6 +1868,8 @@
 	i915.disable_power_well = sanitize_disable_power_well_option(dev_priv,
 						     i915.disable_power_well);
 
+	BUILD_BUG_ON(POWER_DOMAIN_NUM > 31);
+
 	mutex_init(&power_domains->lock);
 
 	/*
@@ -1855,7 +1880,7 @@
 		set_power_wells(power_domains, hsw_power_wells);
 	} else if (IS_BROADWELL(dev_priv->dev)) {
 		set_power_wells(power_domains, bdw_power_wells);
-	} else if (IS_SKYLAKE(dev_priv->dev)) {
+	} else if (IS_SKYLAKE(dev_priv->dev) || IS_KABYLAKE(dev_priv->dev)) {
 		set_power_wells(power_domains, skl_power_wells);
 	} else if (IS_BROXTON(dev_priv->dev)) {
 		set_power_wells(power_domains, bxt_power_wells);
@@ -1870,21 +1895,6 @@
 	return 0;
 }
 
-static void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-	struct device *device = &dev->pdev->dev;
-
-	if (!HAS_RUNTIME_PM(dev))
-		return;
-
-	if (!intel_enable_rc6(dev))
-		return;
-
-	/* Make sure we're not suspended first. */
-	pm_runtime_get_sync(device);
-}
-
 /**
  * intel_power_domains_fini - finalizes the power domain structures
  * @dev_priv: i915 device instance
@@ -1895,15 +1905,17 @@
  */
 void intel_power_domains_fini(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_disable(dev_priv);
-
 	/* The i915.ko module is still not prepared to be loaded when
 	 * the power well is not enabled, so just enable it in case
 	 * we're going to unload/reload. */
 	intel_display_set_init_power(dev_priv, true);
+
+	/* Remove the refcount we took to keep power well support disabled. */
+	if (!i915.disable_power_well)
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
 }
 
-static void intel_power_domains_resume(struct drm_i915_private *dev_priv)
+static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 	struct i915_power_well *power_well;
@@ -1918,6 +1930,47 @@
 	mutex_unlock(&power_domains->lock);
 }
 
+static void skl_display_core_init(struct drm_i915_private *dev_priv,
+				  bool resume)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	uint32_t val;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/* enable PCH reset handshake */
+	val = I915_READ(HSW_NDE_RSTWRN_OPT);
+	I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE);
+
+	/* enable PG1 and Misc I/O */
+	mutex_lock(&power_domains->lock);
+	skl_pw1_misc_io_init(dev_priv);
+	mutex_unlock(&power_domains->lock);
+
+	if (!resume)
+		return;
+
+	skl_init_cdclk(dev_priv);
+
+	if (dev_priv->csr.dmc_payload)
+		intel_csr_load_program(dev_priv);
+}
+
+static void skl_display_core_uninit(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	skl_uninit_cdclk(dev_priv);
+
+	/* The spec doesn't call for removing the reset handshake flag */
+	/* disable PG1 and Misc I/O */
+	mutex_lock(&power_domains->lock);
+	skl_pw1_misc_io_fini(dev_priv);
+	mutex_unlock(&power_domains->lock);
+}
+
 static void chv_phy_control_init(struct drm_i915_private *dev_priv)
 {
 	struct i915_power_well *cmn_bc =
@@ -2040,14 +2093,16 @@
  * This function initializes the hardware power domain state and enables all
  * power domains using intel_display_set_init_power().
  */
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv)
+void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
 {
 	struct drm_device *dev = dev_priv->dev;
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
 	power_domains->initializing = true;
 
-	if (IS_CHERRYVIEW(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
+		skl_display_core_init(dev_priv, resume);
+	} else if (IS_CHERRYVIEW(dev)) {
 		mutex_lock(&power_domains->lock);
 		chv_phy_control_init(dev_priv);
 		mutex_unlock(&power_domains->lock);
@@ -2059,38 +2114,31 @@
 
 	/* For now, we need the power well to be always enabled. */
 	intel_display_set_init_power(dev_priv, true);
-	intel_power_domains_resume(dev_priv);
+	/* Disable power support if the user asked so. */
+	if (!i915.disable_power_well)
+		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	intel_power_domains_sync_hw(dev_priv);
 	power_domains->initializing = false;
 }
 
 /**
- * intel_aux_display_runtime_get - grab an auxiliary power domain reference
+ * intel_power_domains_suspend - suspend power domain state
  * @dev_priv: i915 device instance
  *
- * This function grabs a power domain reference for the auxiliary power domain
- * (for access to the GMBUS and DP AUX blocks) and ensures that it and all its
- * parents are powered up. Therefore users should only grab a reference to the
- * innermost power domain they need.
- *
- * Any power domain reference obtained by this function must have a symmetric
- * call to intel_aux_display_runtime_put() to release the reference again.
+ * This function prepares the hardware power domain state before entering
+ * system suspend. It must be paired with intel_power_domains_init_hw().
  */
-void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
+void intel_power_domains_suspend(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_get(dev_priv);
-}
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+		skl_display_core_uninit(dev_priv);
 
-/**
- * intel_aux_display_runtime_put - release an auxiliary power domain reference
- * @dev_priv: i915 device instance
- *
- * This function drops the auxiliary power domain reference obtained by
- * intel_aux_display_runtime_get() and might power down the corresponding
- * hardware block right away if this is the last reference.
- */
-void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
-{
-	intel_runtime_pm_put(dev_priv);
+	/*
+	 * Even if power well support was disabled we still want to disable
+	 * power wells while we are system suspended.
+	 */
+	if (!i915.disable_power_well)
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index c42b636..06679f1 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -74,7 +74,7 @@
 	struct i2c_adapter ddc;
 
 	/* Register for the SDVO device: SDVOB or SDVOC */
-	uint32_t sdvo_reg;
+	i915_reg_t sdvo_reg;
 
 	/* Active outputs controlled by this SDVO output */
 	uint16_t controlled_output;
@@ -120,8 +120,7 @@
 	 */
 	bool is_tv;
 
-	/* On different gens SDVOB is at different places. */
-	bool is_sdvob;
+	enum port port;
 
 	/* This is for current tv format name */
 	int tv_format_index;
@@ -245,7 +244,7 @@
 	u32 bval = val, cval = val;
 	int i;
 
-	if (intel_sdvo->sdvo_reg == PCH_SDVOB) {
+	if (HAS_PCH_SPLIT(dev_priv)) {
 		I915_WRITE(intel_sdvo->sdvo_reg, val);
 		POSTING_READ(intel_sdvo->sdvo_reg);
 		/*
@@ -259,7 +258,7 @@
 		return;
 	}
 
-	if (intel_sdvo->sdvo_reg == GEN3_SDVOB)
+	if (intel_sdvo->port == PORT_B)
 		cval = I915_READ(GEN3_SDVOC);
 	else
 		bval = I915_READ(GEN3_SDVOB);
@@ -422,7 +421,7 @@
 	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA),
 };
 
-#define SDVO_NAME(svdo) ((svdo)->is_sdvob ? "SDVOB" : "SDVOC")
+#define SDVO_NAME(svdo) ((svdo)->port == PORT_B ? "SDVOB" : "SDVOC")
 
 static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd,
 				   const void *args, int args_len)
@@ -1282,14 +1281,10 @@
 			sdvox |= SDVO_BORDER_ENABLE;
 	} else {
 		sdvox = I915_READ(intel_sdvo->sdvo_reg);
-		switch (intel_sdvo->sdvo_reg) {
-		case GEN3_SDVOB:
+		if (intel_sdvo->port == PORT_B)
 			sdvox &= SDVOB_PRESERVE_MASK;
-			break;
-		case GEN3_SDVOC:
+		else
 			sdvox &= SDVOC_PRESERVE_MASK;
-			break;
-		}
 		sdvox |= (9 << 19) | SDVO_BORDER_ENABLE;
 	}
 
@@ -1464,12 +1459,23 @@
 	 * matching DP port to be enabled on transcoder A.
 	 */
 	if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) {
+		/*
+		 * We get CPU/PCH FIFO underruns on the other pipe when
+		 * doing the workaround. Sweep them under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+
 		temp &= ~SDVO_PIPE_B_SELECT;
 		temp |= SDVO_ENABLE;
 		intel_sdvo_write_sdvox(intel_sdvo, temp);
 
 		temp &= ~SDVO_ENABLE;
 		intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+		intel_wait_for_vblank_if_active(dev_priv->dev, PIPE_A);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 	}
 }
 
@@ -2251,7 +2257,7 @@
 {
 	struct sdvo_device_mapping *mapping;
 
-	if (sdvo->is_sdvob)
+	if (sdvo->port == PORT_B)
 		mapping = &(dev_priv->sdvo_mappings[0]);
 	else
 		mapping = &(dev_priv->sdvo_mappings[1]);
@@ -2269,7 +2275,7 @@
 	struct sdvo_device_mapping *mapping;
 	u8 pin;
 
-	if (sdvo->is_sdvob)
+	if (sdvo->port == PORT_B)
 		mapping = &dev_priv->sdvo_mappings[0];
 	else
 		mapping = &dev_priv->sdvo_mappings[1];
@@ -2307,7 +2313,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct sdvo_device_mapping *my_mapping, *other_mapping;
 
-	if (sdvo->is_sdvob) {
+	if (sdvo->port == PORT_B) {
 		my_mapping = &dev_priv->sdvo_mappings[0];
 		other_mapping = &dev_priv->sdvo_mappings[1];
 	} else {
@@ -2332,7 +2338,7 @@
 	/* No SDVO device info is found for another DVO port,
 	 * so use mapping assumption we had before BIOS parsing.
 	 */
-	if (sdvo->is_sdvob)
+	if (sdvo->port == PORT_B)
 		return 0x70;
 	else
 		return 0x72;
@@ -2939,18 +2945,31 @@
 	return i2c_add_adapter(&sdvo->ddc) == 0;
 }
 
-bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob)
+static void assert_sdvo_port_valid(const struct drm_i915_private *dev_priv,
+				   enum port port)
+{
+	if (HAS_PCH_SPLIT(dev_priv))
+		WARN_ON(port != PORT_B);
+	else
+		WARN_ON(port != PORT_B && port != PORT_C);
+}
+
+bool intel_sdvo_init(struct drm_device *dev,
+		     i915_reg_t sdvo_reg, enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_encoder *intel_encoder;
 	struct intel_sdvo *intel_sdvo;
 	int i;
+
+	assert_sdvo_port_valid(dev_priv, port);
+
 	intel_sdvo = kzalloc(sizeof(*intel_sdvo), GFP_KERNEL);
 	if (!intel_sdvo)
 		return false;
 
 	intel_sdvo->sdvo_reg = sdvo_reg;
-	intel_sdvo->is_sdvob = is_sdvob;
+	intel_sdvo->port = port;
 	intel_sdvo->slave_addr = intel_sdvo_get_slave_addr(dev, intel_sdvo) >> 1;
 	intel_sdvo_select_i2c_bus(dev_priv, intel_sdvo);
 	if (!intel_sdvo_init_ddc_proxy(intel_sdvo, dev))
@@ -3000,8 +3019,10 @@
 	 * hotplug lines.
 	 */
 	if (intel_sdvo->hotplug_active) {
-		intel_encoder->hpd_pin =
-			intel_sdvo->is_sdvob ?  HPD_SDVO_B : HPD_SDVO_C;
+		if (intel_sdvo->port == PORT_B)
+			intel_encoder->hpd_pin = HPD_SDVO_B;
+		else
+			intel_encoder->hpd_pin = HPD_SDVO_C;
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index c3b7359..2b96f33 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -192,10 +192,9 @@
 	const int pipe = intel_plane->pipe;
 	const int plane = intel_plane->plane + 1;
 	u32 plane_ctl, stride_div, stride;
-	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
 	const struct drm_intel_sprite_colorkey *key =
 		&to_intel_plane_state(drm_plane->state)->ckey;
-	unsigned long surf_addr;
+	u32 surf_addr;
 	u32 tile_height, plane_offset, plane_size;
 	unsigned int rotation;
 	int x_offset, y_offset;
@@ -212,10 +211,6 @@
 	rotation = drm_plane->state->rotation;
 	plane_ctl |= skl_plane_ctl_rotation(rotation);
 
-	intel_update_sprite_watermarks(drm_plane, crtc, src_w, src_h,
-				       pixel_size, true,
-				       src_w != crtc_w || src_h != crtc_h);
-
 	stride_div = intel_fb_stride_alignment(dev, fb->modifier[0],
 					       fb->pixel_format);
 
@@ -297,8 +292,6 @@
 
 	I915_WRITE(PLANE_SURF(pipe, plane), 0);
 	POSTING_READ(PLANE_SURF(pipe, plane));
-
-	intel_update_sprite_watermarks(dplane, crtc, 0, 0, 0, false, false);
 }
 
 static void
@@ -541,10 +534,6 @@
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		sprctl |= SPRITE_PIPE_CSC_ENABLE;
 
-	intel_update_sprite_watermarks(plane, crtc, src_w, src_h, pixel_size,
-				       true,
-				       src_w != crtc_w || src_h != crtc_h);
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -678,10 +667,6 @@
 	if (IS_GEN6(dev))
 		dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */
 
-	intel_update_sprite_watermarks(plane, crtc, src_w, src_h,
-				       pixel_size, true,
-				       src_w != crtc_w || src_h != crtc_h);
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -938,9 +923,6 @@
 
 	crtc = crtc ? crtc : plane->crtc;
 
-	if (!crtc->state->active)
-		return;
-
 	if (state->visible) {
 		intel_plane->update_plane(plane, crtc, fb,
 					  state->dst.x1, state->dst.y1,
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 43cba12..c2358ba 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -29,19 +29,7 @@
 
 #define FORCEWAKE_ACK_TIMEOUT_MS 50
 
-#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__))
-#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__))
-#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
-#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + (reg__))
-#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__)
+#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__))
 
 static const char * const forcewake_domain_names[] = {
 	"render",
@@ -72,7 +60,7 @@
 static inline void
 fw_domain_reset(const struct intel_uncore_forcewake_domain *d)
 {
-	WARN_ON(d->reg_set == 0);
+	WARN_ON(!i915_mmio_reg_valid(d->reg_set));
 	__raw_i915_write32(d->i915, d->reg_set, d->val_reset);
 }
 
@@ -118,7 +106,7 @@
 fw_domain_posting_read(const struct intel_uncore_forcewake_domain *d)
 {
 	/* something from same cacheline, but not from the set register */
-	if (d->reg_post)
+	if (i915_mmio_reg_valid(d->reg_post))
 		__raw_posting_read(d->i915, d->reg_post);
 }
 
@@ -525,8 +513,7 @@
 }
 
 /* We give fast paths for the really cool registers */
-#define NEEDS_FORCE_WAKE(reg) \
-	 ((reg) < 0x40000 && (reg) != FORCEWAKE)
+#define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000)
 
 #define REG_RANGE(reg, start, end) ((reg) >= (start) && (reg) < (end))
 
@@ -589,7 +576,7 @@
 	REG_RANGE((reg), 0x9400, 0x9800)
 
 #define FORCEWAKE_GEN9_BLITTER_RANGE_OFFSET(reg) \
-	((reg) < 0x40000 &&\
+	((reg) < 0x40000 && \
 	 !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg) && \
 	 !FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(reg) && \
 	 !FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(reg) && \
@@ -605,8 +592,8 @@
 }
 
 static void
-hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv, u32 reg, bool read,
-			bool before)
+hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv,
+			i915_reg_t reg, bool read, bool before)
 {
 	const char *op = read ? "reading" : "writing to";
 	const char *when = before ? "before" : "after";
@@ -616,7 +603,7 @@
 
 	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
 		WARN(1, "Unclaimed register detected %s %s register 0x%x\n",
-		     when, op, reg);
+		     when, op, i915_mmio_reg_offset(reg));
 		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
 		i915.mmio_debug--; /* Only report the first N failures */
 	}
@@ -649,7 +636,7 @@
 
 #define __gen2_read(x) \
 static u##x \
-gen2_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen2_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN2_READ_HEADER(x); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN2_READ_FOOTER; \
@@ -657,7 +644,7 @@
 
 #define __gen5_read(x) \
 static u##x \
-gen5_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen5_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN2_READ_HEADER(x); \
 	ilk_dummy_write(dev_priv); \
 	val = __raw_i915_read##x(dev_priv, reg); \
@@ -680,6 +667,7 @@
 #undef GEN2_READ_HEADER
 
 #define GEN6_READ_HEADER(x) \
+	u32 offset = i915_mmio_reg_offset(reg); \
 	unsigned long irqflags; \
 	u##x val = 0; \
 	assert_device_not_suspended(dev_priv); \
@@ -714,20 +702,12 @@
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
 }
 
-#define __vgpu_read(x) \
-static u##x \
-vgpu_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
-	GEN6_READ_HEADER(x); \
-	val = __raw_i915_read##x(dev_priv, reg); \
-	GEN6_READ_FOOTER; \
-}
-
 #define __gen6_read(x) \
 static u##x \
-gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN6_READ_HEADER(x); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, true, true); \
-	if (NEEDS_FORCE_WAKE(reg)) \
+	if (NEEDS_FORCE_WAKE(offset)) \
 		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, true, false); \
@@ -736,47 +716,56 @@
 
 #define __vlv_read(x) \
 static u##x \
-vlv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+vlv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine = 0; \
 	GEN6_READ_HEADER(x); \
-	if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
-	else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_MEDIA); \
+	if (!NEEDS_FORCE_WAKE(offset)) \
+		fw_engine = 0; \
+	else if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_MEDIA; \
+	if (fw_engine) \
+		__force_wake_get(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN6_READ_FOOTER; \
 }
 
 #define __chv_read(x) \
 static u##x \
-chv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+chv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine = 0; \
 	GEN6_READ_HEADER(x); \
-	if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
-	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_MEDIA); \
-	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, \
-				 FORCEWAKE_RENDER | FORCEWAKE_MEDIA); \
+	if (!NEEDS_FORCE_WAKE(offset)) \
+		fw_engine = 0; \
+	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	if (fw_engine) \
+		__force_wake_get(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN6_READ_FOOTER; \
 }
 
 #define SKL_NEEDS_FORCE_WAKE(reg) \
-	 ((reg) < 0x40000 && !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg))
+	((reg) < 0x40000 && !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg))
 
 #define __gen9_read(x) \
 static u##x \
-gen9_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_READ_HEADER(x); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, true, true); \
-	if (!SKL_NEEDS_FORCE_WAKE(reg)) \
+	if (!SKL_NEEDS_FORCE_WAKE(offset)) \
 		fw_engine = 0; \
-	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
 	else \
 		fw_engine = FORCEWAKE_BLITTER; \
@@ -787,10 +776,6 @@
 	GEN6_READ_FOOTER; \
 }
 
-__vgpu_read(8)
-__vgpu_read(16)
-__vgpu_read(32)
-__vgpu_read(64)
 __gen9_read(8)
 __gen9_read(16)
 __gen9_read(32)
@@ -812,10 +797,37 @@
 #undef __chv_read
 #undef __vlv_read
 #undef __gen6_read
-#undef __vgpu_read
 #undef GEN6_READ_FOOTER
 #undef GEN6_READ_HEADER
 
+#define VGPU_READ_HEADER(x) \
+	unsigned long irqflags; \
+	u##x val = 0; \
+	assert_device_not_suspended(dev_priv); \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags)
+
+#define VGPU_READ_FOOTER \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
+	trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \
+	return val
+
+#define __vgpu_read(x) \
+static u##x \
+vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	VGPU_READ_HEADER(x); \
+	val = __raw_i915_read##x(dev_priv, reg); \
+	VGPU_READ_FOOTER; \
+}
+
+__vgpu_read(8)
+__vgpu_read(16)
+__vgpu_read(32)
+__vgpu_read(64)
+
+#undef __vgpu_read
+#undef VGPU_READ_FOOTER
+#undef VGPU_READ_HEADER
+
 #define GEN2_WRITE_HEADER \
 	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
 	assert_device_not_suspended(dev_priv); \
@@ -824,7 +836,7 @@
 
 #define __gen2_write(x) \
 static void \
-gen2_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen2_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	GEN2_WRITE_HEADER; \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	GEN2_WRITE_FOOTER; \
@@ -832,7 +844,7 @@
 
 #define __gen5_write(x) \
 static void \
-gen5_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen5_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	GEN2_WRITE_HEADER; \
 	ilk_dummy_write(dev_priv); \
 	__raw_i915_write##x(dev_priv, reg, val); \
@@ -855,6 +867,7 @@
 #undef GEN2_WRITE_HEADER
 
 #define GEN6_WRITE_HEADER \
+	u32 offset = i915_mmio_reg_offset(reg); \
 	unsigned long irqflags; \
 	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
 	assert_device_not_suspended(dev_priv); \
@@ -865,10 +878,10 @@
 
 #define __gen6_write(x) \
 static void \
-gen6_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	u32 __fifo_ret = 0; \
 	GEN6_WRITE_HEADER; \
-	if (NEEDS_FORCE_WAKE(reg)) { \
+	if (NEEDS_FORCE_WAKE(offset)) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
 	__raw_i915_write##x(dev_priv, reg, val); \
@@ -880,10 +893,10 @@
 
 #define __hsw_write(x) \
 static void \
-hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+hsw_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	u32 __fifo_ret = 0; \
 	GEN6_WRITE_HEADER; \
-	if (NEEDS_FORCE_WAKE(reg)) { \
+	if (NEEDS_FORCE_WAKE(offset)) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
@@ -896,15 +909,7 @@
 	GEN6_WRITE_FOOTER; \
 }
 
-#define __vgpu_write(x) \
-static void vgpu_write##x(struct drm_i915_private *dev_priv, \
-			  off_t reg, u##x val, bool trace) { \
-	GEN6_WRITE_HEADER; \
-	__raw_i915_write##x(dev_priv, reg, val); \
-	GEN6_WRITE_FOOTER; \
-}
-
-static const u32 gen8_shadowed_regs[] = {
+static const i915_reg_t gen8_shadowed_regs[] = {
 	FORCEWAKE_MT,
 	GEN6_RPNSWREQ,
 	GEN6_RC_VIDEO_FREQ,
@@ -915,11 +920,12 @@
 	/* TODO: Other registers are not yet used */
 };
 
-static bool is_gen8_shadowed(struct drm_i915_private *dev_priv, u32 reg)
+static bool is_gen8_shadowed(struct drm_i915_private *dev_priv,
+			     i915_reg_t reg)
 {
 	int i;
 	for (i = 0; i < ARRAY_SIZE(gen8_shadowed_regs); i++)
-		if (reg == gen8_shadowed_regs[i])
+		if (i915_mmio_reg_equal(reg, gen8_shadowed_regs[i]))
 			return true;
 
 	return false;
@@ -927,10 +933,10 @@
 
 #define __gen8_write(x) \
 static void \
-gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	GEN6_WRITE_HEADER; \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
-	if (reg < 0x40000 && !is_gen8_shadowed(dev_priv, reg)) \
+	if (NEEDS_FORCE_WAKE(offset) && !is_gen8_shadowed(dev_priv, reg)) \
 		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
@@ -940,22 +946,25 @@
 
 #define __chv_write(x) \
 static void \
-chv_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
-	bool shadowed = is_gen8_shadowed(dev_priv, reg); \
+chv_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
+	enum forcewake_domains fw_engine = 0; \
 	GEN6_WRITE_HEADER; \
-	if (!shadowed) { \
-		if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(reg)) \
-			__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
-		else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(reg)) \
-			__force_wake_get(dev_priv, FORCEWAKE_MEDIA); \
-		else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(reg)) \
-			__force_wake_get(dev_priv, FORCEWAKE_RENDER | FORCEWAKE_MEDIA); \
-	} \
+	if (!NEEDS_FORCE_WAKE(offset) || \
+	    is_gen8_shadowed(dev_priv, reg)) \
+		fw_engine = 0; \
+	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	if (fw_engine) \
+		__force_wake_get(dev_priv, fw_engine); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	GEN6_WRITE_FOOTER; \
 }
 
-static const u32 gen9_shadowed_regs[] = {
+static const i915_reg_t gen9_shadowed_regs[] = {
 	RING_TAIL(RENDER_RING_BASE),
 	RING_TAIL(GEN6_BSD_RING_BASE),
 	RING_TAIL(VEBOX_RING_BASE),
@@ -968,11 +977,12 @@
 	/* TODO: Other registers are not yet used */
 };
 
-static bool is_gen9_shadowed(struct drm_i915_private *dev_priv, u32 reg)
+static bool is_gen9_shadowed(struct drm_i915_private *dev_priv,
+			     i915_reg_t reg)
 {
 	int i;
 	for (i = 0; i < ARRAY_SIZE(gen9_shadowed_regs); i++)
-		if (reg == gen9_shadowed_regs[i])
+		if (i915_mmio_reg_equal(reg, gen9_shadowed_regs[i]))
 			return true;
 
 	return false;
@@ -980,19 +990,19 @@
 
 #define __gen9_write(x) \
 static void \
-gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \
+gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 		bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_WRITE_HEADER; \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
-	if (!SKL_NEEDS_FORCE_WAKE(reg) || \
+	if (!SKL_NEEDS_FORCE_WAKE(offset) || \
 	    is_gen9_shadowed(dev_priv, reg)) \
 		fw_engine = 0; \
-	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
 	else \
 		fw_engine = FORCEWAKE_BLITTER; \
@@ -1024,20 +1034,41 @@
 __gen6_write(16)
 __gen6_write(32)
 __gen6_write(64)
-__vgpu_write(8)
-__vgpu_write(16)
-__vgpu_write(32)
-__vgpu_write(64)
 
 #undef __gen9_write
 #undef __chv_write
 #undef __gen8_write
 #undef __hsw_write
 #undef __gen6_write
-#undef __vgpu_write
 #undef GEN6_WRITE_FOOTER
 #undef GEN6_WRITE_HEADER
 
+#define VGPU_WRITE_HEADER \
+	unsigned long irqflags; \
+	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
+	assert_device_not_suspended(dev_priv); \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags)
+
+#define VGPU_WRITE_FOOTER \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags)
+
+#define __vgpu_write(x) \
+static void vgpu_write##x(struct drm_i915_private *dev_priv, \
+			  i915_reg_t reg, u##x val, bool trace) { \
+	VGPU_WRITE_HEADER; \
+	__raw_i915_write##x(dev_priv, reg, val); \
+	VGPU_WRITE_FOOTER; \
+}
+
+__vgpu_write(8)
+__vgpu_write(16)
+__vgpu_write(32)
+__vgpu_write(64)
+
+#undef __vgpu_write
+#undef VGPU_WRITE_FOOTER
+#undef VGPU_WRITE_HEADER
+
 #define ASSIGN_WRITE_MMIO_VFUNCS(x) \
 do { \
 	dev_priv->uncore.funcs.mmio_writeb = x##_write8; \
@@ -1057,7 +1088,8 @@
 
 static void fw_domain_init(struct drm_i915_private *dev_priv,
 			   enum forcewake_domain_id domain_id,
-			   u32 reg_set, u32 reg_ack)
+			   i915_reg_t reg_set,
+			   i915_reg_t reg_ack)
 {
 	struct intel_uncore_forcewake_domain *d;
 
@@ -1087,8 +1119,6 @@
 		d->reg_post = FORCEWAKE_ACK_VLV;
 	else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv))
 		d->reg_post = ECOBUS;
-	else
-		d->reg_post = 0;
 
 	d->i915 = dev_priv;
 	d->id = domain_id;
@@ -1262,12 +1292,14 @@
 #define GEN_RANGE(l, h) GENMASK(h, l)
 
 static const struct register_whitelist {
-	uint64_t offset;
+	i915_reg_t offset_ldw, offset_udw;
 	uint32_t size;
 	/* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
 	uint32_t gen_bitmask;
 } whitelist[] = {
-	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, GEN_RANGE(4, 9) },
+	{ .offset_ldw = RING_TIMESTAMP(RENDER_RING_BASE),
+	  .offset_udw = RING_TIMESTAMP_UDW(RENDER_RING_BASE),
+	  .size = 8, .gen_bitmask = GEN_RANGE(4, 9) },
 };
 
 int i915_reg_read_ioctl(struct drm_device *dev,
@@ -1277,11 +1309,11 @@
 	struct drm_i915_reg_read *reg = data;
 	struct register_whitelist const *entry = whitelist;
 	unsigned size;
-	u64 offset;
+	i915_reg_t offset_ldw, offset_udw;
 	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
-		if (entry->offset == (reg->offset & -entry->size) &&
+		if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) &&
 		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
 			break;
 	}
@@ -1293,27 +1325,28 @@
 	 * be naturally aligned (and those that are not so aligned merely
 	 * limit the available flags for that register).
 	 */
-	offset = entry->offset;
+	offset_ldw = entry->offset_ldw;
+	offset_udw = entry->offset_udw;
 	size = entry->size;
-	size |= reg->offset ^ offset;
+	size |= reg->offset ^ i915_mmio_reg_offset(offset_ldw);
 
 	intel_runtime_pm_get(dev_priv);
 
 	switch (size) {
 	case 8 | 1:
-		reg->val = I915_READ64_2x32(offset, offset+4);
+		reg->val = I915_READ64_2x32(offset_ldw, offset_udw);
 		break;
 	case 8:
-		reg->val = I915_READ64(offset);
+		reg->val = I915_READ64(offset_ldw);
 		break;
 	case 4:
-		reg->val = I915_READ(offset);
+		reg->val = I915_READ(offset_ldw);
 		break;
 	case 2:
-		reg->val = I915_READ16(offset);
+		reg->val = I915_READ16(offset_ldw);
 		break;
 	case 1:
-		reg->val = I915_READ8(offset);
+		reg->val = I915_READ8(offset_ldw);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1470,7 +1503,7 @@
 }
 
 static int wait_for_register(struct drm_i915_private *dev_priv,
-			     const u32 reg,
+			     i915_reg_t reg,
 			     const u32 mask,
 			     const u32 value,
 			     const unsigned long timeout_ms)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 7e32730..c2dd52e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3405,7 +3405,9 @@
 	return 0;
 }
 
-#include "../gpu/drm/i915/i915_reg.h"
+#define SOUTH_CHICKEN2		0xc2004
+#define PCH_PP_STATUS		0xc7200
+#define PCH_PP_CONTROL		0xc7204
 #define MSG_CTL			0x45010
 #define NSDE_PWR_STATE		0xd0100
 #define IGD_OPERATION_TIMEOUT	10000     /* set timeout 10 seconds */
diff --git a/include/drm/i915_component.h b/include/drm/i915_component.h
index 30d89e0..fab1385 100644
--- a/include/drm/i915_component.h
+++ b/include/drm/i915_component.h
@@ -31,47 +31,80 @@
 #define MAX_PORTS 5
 
 /**
- * struct i915_audio_component_ops - callbacks defined in gfx driver
- * @owner: the module owner
- * @get_power: get the POWER_DOMAIN_AUDIO power well
- * @put_power: put the POWER_DOMAIN_AUDIO power well
- * @codec_wake_override: Enable/Disable generating the codec wake signal
- * @get_cdclk_freq: get the Core Display Clock in KHz
- * @sync_audio_rate: set n/cts based on the sample rate
+ * struct i915_audio_component_ops - Ops implemented by i915 driver, called by hda driver
  */
 struct i915_audio_component_ops {
+	/**
+	 * @owner: i915 module
+	 */
 	struct module *owner;
+	/**
+	 * @get_power: get the POWER_DOMAIN_AUDIO power well
+	 *
+	 * Request the power well to be turned on.
+	 */
 	void (*get_power)(struct device *);
+	/**
+	 * @put_power: put the POWER_DOMAIN_AUDIO power well
+	 *
+	 * Allow the power well to be turned off.
+	 */
 	void (*put_power)(struct device *);
+	/**
+	 * @codec_wake_override: Enable/disable codec wake signal
+	 */
 	void (*codec_wake_override)(struct device *, bool enable);
+	/**
+	 * @get_cdclk_freq: Get the Core Display Clock in kHz
+	 */
 	int (*get_cdclk_freq)(struct device *);
+	/**
+	 * @sync_audio_rate: set n/cts based on the sample rate
+	 *
+	 * Called from audio driver. After audio driver sets the
+	 * sample rate, it will call this function to set n/cts
+	 */
 	int (*sync_audio_rate)(struct device *, int port, int rate);
 };
 
+/**
+ * struct i915_audio_component_audio_ops - Ops implemented by hda driver, called by i915 driver
+ */
 struct i915_audio_component_audio_ops {
+	/**
+	 * @audio_ptr: Pointer to be used in call to pin_eld_notify
+	 */
 	void *audio_ptr;
 	/**
-	 * Call from i915 driver, notifying the HDA driver that
-	 * pin sense and/or ELD information has changed.
-	 * @audio_ptr:		HDA driver object
-	 * @port:	Which port has changed (PORTA / PORTB / PORTC etc)
+	 * @pin_eld_notify: Notify the HDA driver that pin sense and/or ELD information has changed
+	 *
+	 * Called when the i915 driver has set up audio pipeline or has just
+	 * begun to tear it down. This allows the HDA driver to update its
+	 * status accordingly (even when the HDA controller is in power save
+	 * mode).
 	 */
 	void (*pin_eld_notify)(void *audio_ptr, int port);
 };
 
 /**
- * struct i915_audio_component - used for audio video interaction
- * @dev: the device from gfx driver
- * @aud_sample_rate: the array of audio sample rate per port
- * @ops: callback for audio driver calling
- * @audio_ops: Call from i915 driver
+ * struct i915_audio_component - Used for direct communication between i915 and hda drivers
  */
 struct i915_audio_component {
+	/**
+	 * @dev: i915 device, used as parameter for ops
+	 */
 	struct device *dev;
+	/**
+	 * @aud_sample_rate: the array of audio sample rate per port
+	 */
 	int aud_sample_rate[MAX_PORTS];
-
+	/**
+	 * @ops: Ops implemented by i915 driver, called by hda driver
+	 */
 	const struct i915_audio_component_ops *ops;
-
+	/**
+	 * @audio_ops: Ops implemented by hda driver, called by i915 driver
+	 */
 	const struct i915_audio_component_audio_ops *audio_ops;
 };
 
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 17c4456..f1a113e 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -291,4 +291,40 @@
 	INTEL_VGA_DEVICE(0x1A84, info), \
 	INTEL_VGA_DEVICE(0x5A84, info)
 
+#define INTEL_KBL_GT1_IDS(info)	\
+	INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \
+	INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \
+	INTEL_VGA_DEVICE(0x5917, info), /* DT  GT1.5 */ \
+	INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \
+	INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \
+	INTEL_VGA_DEVICE(0x5902, info), /* DT  GT1 */ \
+	INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \
+	INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */
+
+#define INTEL_KBL_GT2_IDS(info)	\
+	INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \
+	INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \
+	INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \
+	INTEL_VGA_DEVICE(0x5912, info), /* DT  GT2 */ \
+	INTEL_VGA_DEVICE(0x591B, info), /* Halo GT2 */ \
+	INTEL_VGA_DEVICE(0x591A, info), /* SRV GT2 */ \
+	INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */
+
+#define INTEL_KBL_GT3_IDS(info) \
+	INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
+	INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
+	INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
+
+#define INTEL_KBL_GT4_IDS(info) \
+	INTEL_VGA_DEVICE(0x5932, info), /* DT  GT4 */ \
+	INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \
+	INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \
+	INTEL_VGA_DEVICE(0x593D, info)  /* WKS GT4 */
+
+#define INTEL_KBL_IDS(info) \
+	INTEL_KBL_GT1_IDS(info), \
+	INTEL_KBL_GT2_IDS(info), \
+	INTEL_KBL_GT3_IDS(info), \
+	INTEL_KBL_GT4_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 484a9fb..67ef73a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1079,6 +1079,12 @@
 };
 
 struct drm_i915_reg_read {
+	/*
+	 * Register offset.
+	 * For 64bit wide registers where the upper 32bits don't immediately
+	 * follow the lower 32bits, the offset of the lower 32bits must
+	 * be specified
+	 */
 	__u64 offset;
 	__u64 val; /* Return value */
 };
@@ -1125,8 +1131,9 @@
 	__u32 ctx_id;
 	__u32 size;
 	__u64 param;
-#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
-#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
+#define I915_CONTEXT_PARAM_BAN_PERIOD	0x1
+#define I915_CONTEXT_PARAM_NO_ZEROMAP	0x2
+#define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 	__u64 value;
 };
 
diff --git a/kernel/async.c b/kernel/async.c
index 4c3773c..d2edd6e 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -326,3 +326,4 @@
 
 	return worker && worker->current_func == async_run_entry_fn;
 }
+EXPORT_SYMBOL_GPL(current_is_async);