Merge tag 'drm-intel-next-2015-04-23-fixed' of git://anongit.freedesktop.org/drm-intel into drm-next

drm-intel-next-2015-04-23:
- dither support for ns2501 dvo (Thomas Richter)
- some polish for the gtt code and fixes to finally enable the cmd parser on hsw
- first pile of bxt stage 1 enabling (too many different people to list ...)
- more psr fixes from Rodrigo
- skl rotation support from Chandra
- more atomic work from Ander and Matt
- pile of cleanups and micro-ops for execlist from Chris
drm-intel-next-2015-04-10:
- cdclk handling cleanup and fixes from Ville
- more prep patches for olr removal from John Harrison
- gmbus pin naming rework from Jani (prep for bxt)
- remove ->new_config from Ander (more atomic conversion work)
- rps (boost) tuning and unification with byt/bsw from Chris
- cmd parser batch bool tuning from Chris
- gen8 dynamic pte allocation (Michel Thierry, based on work from Ben Widawsky)
- execlist tuning (not yet all of it) from Chris
- add drm_plane_from_index (Chandra)
- various small things all over

* tag 'drm-intel-next-2015-04-23-fixed' of git://anongit.freedesktop.org/drm-intel: (204 commits)
  drm/i915/gtt: Allocate va range only if vma is not bound
  drm/i915: Enable cmd parser to do secure batch promotion for aliasing ppgtt
  drm/i915: fix intel_prepare_ddi
  drm/i915: factor out ddi_get_encoder_port
  drm/i915/hdmi: check port in ibx_infoframe_enabled
  drm/i915/hdmi: fix vlv infoframe port check
  drm/i915: Silence compiler warning in dvo
  drm/i915: Update DRIVER_DATE to 20150423
  drm/i915: Enable dithering on NatSemi DVO2501 for Fujitsu S6010
  rm/i915: Move i915_get_ggtt_vma_pages into ggtt_bind_vma
  drm/i915: Don't try to outsmart gcc in i915_gem_gtt.c
  drm/i915: Unduplicate i915_ggtt_unbind/bind_vma
  drm/i915: Move ppgtt_bind/unbind around
  drm/i915: move i915_gem_restore_gtt_mappings around
  drm/i915: Fix up the vma aliasing ppgtt binding
  drm/i915: Remove misleading comment around bind_to_vm
  drm/i915: Don't use atomics for pg_dirty_rings
  drm/i915: Don't look at pg_dirty_rings for aliasing ppgtt
  drm/i915/skl: Support Y tiling in MMIO flips
  drm/i915: Fixup kerneldoc for struct intel_context
  ...

Conflicts:
	drivers/gpu/drm/i915/i915_drv.c
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 9765a4c..7c68ecc 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4067,7 +4067,7 @@
         <title>DPIO</title>
 !Pdrivers/gpu/drm/i915/i915_reg.h DPIO
 	<table id="dpiox2">
-	  <title>Dual channel PHY (VLV/CHV)</title>
+	  <title>Dual channel PHY (VLV/CHV/BXT)</title>
 	  <tgroup cols="8">
 	    <colspec colname="c0" />
 	    <colspec colname="c1" />
@@ -4118,7 +4118,7 @@
 	  </tgroup>
 	</table>
 	<table id="dpiox1">
-	  <title>Single channel PHY (CHV)</title>
+	  <title>Single channel PHY (CHV/BXT)</title>
 	  <tgroup cols="4">
 	    <colspec colname="c0" />
 	    <colspec colname="c1" />
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index fe9f0b7..ab470e4 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -546,6 +546,7 @@
 	INTEL_BDW_D_IDS(&gen8_stolen_funcs),
 	INTEL_CHV_IDS(&chv_stolen_funcs),
 	INTEL_SKL_IDS(&gen9_stolen_funcs),
+	INTEL_BXT_IDS(&gen9_stolen_funcs),
 };
 
 static void __init intel_graphics_stolen(int num, int slot, int func)
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 3007b44..160647a 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1289,6 +1289,29 @@
 EXPORT_SYMBOL(drm_plane_index);
 
 /**
+ * drm_plane_from_index - find the registered plane at an index
+ * @dev: DRM device
+ * @idx: index of registered plane to find for
+ *
+ * Given a plane index, return the registered plane from DRM device's
+ * list of planes with matching index.
+ */
+struct drm_plane *
+drm_plane_from_index(struct drm_device *dev, int idx)
+{
+	struct drm_plane *plane;
+	unsigned int i = 0;
+
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		if (i == idx)
+			return plane;
+		i++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(drm_plane_from_index);
+
+/**
  * drm_plane_force_disable - Forcibly disable a plane
  * @plane: plane to disable
  *
diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c
index 0f2587ff..89b08a8 100644
--- a/drivers/gpu/drm/i915/dvo_ivch.c
+++ b/drivers/gpu/drm/i915/dvo_ivch.c
@@ -23,6 +23,9 @@
  * Authors:
  *    Eric Anholt <eric@anholt.net>
  *
+ * Minor modifications (Dithering enable):
+ *    Thomas Richter <thor@math.tu-berlin.de>
+ *
  */
 
 #include "dvo.h"
@@ -59,6 +62,8 @@
 # define VR01_DVO_BYPASS_ENABLE		(1 << 1)
 /** Enables the DVO clock */
 # define VR01_DVO_ENABLE		(1 << 0)
+/** Enable dithering for 18bpp panels. Not documented. */
+# define VR01_DITHER_ENABLE             (1 << 4)
 
 /*
  * LCD Interface Format
@@ -74,6 +79,8 @@
 # define VR10_INTERFACE_2X18		(2 << 2)
 /** Enables 2x24-bit LVDS output */
 # define VR10_INTERFACE_2X24		(3 << 2)
+/** Mask that defines the depth of the pipeline */
+# define VR10_INTERFACE_DEPTH_MASK      (3 << 2)
 
 /*
  * VR20 LCD Horizontal Display Size
@@ -342,9 +349,15 @@
 			  struct drm_display_mode *adjusted_mode)
 {
 	uint16_t vr40 = 0;
-	uint16_t vr01;
+	uint16_t vr01 = 0;
+	uint16_t vr10;
 
-	vr01 = 0;
+	ivch_read(dvo, VR10, &vr10);
+	/* Enable dithering for 18 bpp pipelines */
+	vr10 &= VR10_INTERFACE_DEPTH_MASK;
+	if (vr10 == VR10_INTERFACE_2X18 || vr10 == VR10_INTERFACE_1X18)
+		vr01 = VR01_DITHER_ENABLE;
+
 	vr40 = (VR40_STALL_ENABLE | VR40_VERTICAL_INTERP_ENABLE |
 		VR40_HORIZONTAL_INTERP_ENABLE);
 
@@ -353,7 +366,7 @@
 		uint16_t x_ratio, y_ratio;
 
 		vr01 |= VR01_PANEL_FIT_ENABLE;
-		vr40 |= VR40_CLOCK_GATING_ENABLE;
+		vr40 |= VR40_CLOCK_GATING_ENABLE | VR40_ENHANCED_PANEL_FITTING;
 		x_ratio = (((mode->hdisplay - 1) << 16) /
 			   (adjusted_mode->hdisplay - 1)) >> 2;
 		y_ratio = (((mode->vdisplay - 1) << 16) /
@@ -380,6 +393,8 @@
 	DRM_DEBUG_KMS("VR00: 0x%04x\n", val);
 	ivch_read(dvo, VR01, &val);
 	DRM_DEBUG_KMS("VR01: 0x%04x\n", val);
+	ivch_read(dvo, VR10, &val);
+	DRM_DEBUG_KMS("VR10: 0x%04x\n", val);
 	ivch_read(dvo, VR30, &val);
 	DRM_DEBUG_KMS("VR30: 0x%04x\n", val);
 	ivch_read(dvo, VR40, &val);
diff --git a/drivers/gpu/drm/i915/dvo_ns2501.c b/drivers/gpu/drm/i915/dvo_ns2501.c
index 4416304..97ae8aa 100644
--- a/drivers/gpu/drm/i915/dvo_ns2501.c
+++ b/drivers/gpu/drm/i915/dvo_ns2501.c
@@ -60,6 +60,130 @@
 
 #define NS2501_REGC 0x0c
 
+/*
+ * The following registers are not part of the official datasheet
+ * and are the result of reverse engineering.
+ */
+
+/*
+ * Register c0 controls how the DVO synchronizes with
+ * its input.
+ */
+#define NS2501_REGC0 0xc0
+#define NS2501_C0_ENABLE (1<<0)	/* enable the DVO sync in general */
+#define NS2501_C0_HSYNC (1<<1)	/* synchronize horizontal with input */
+#define NS2501_C0_VSYNC (1<<2)	/* synchronize vertical with input */
+#define NS2501_C0_RESET (1<<7)	/* reset the synchronization flip/flops */
+
+/*
+ * Register 41 is somehow related to the sync register and sync
+ * configuration. It should be 0x32 whenever regC0 is 0x05 (hsync off)
+ * and 0x00 otherwise.
+ */
+#define NS2501_REG41 0x41
+
+/*
+ * this register controls the dithering of the DVO
+ * One bit enables it, the other define the dithering depth.
+ * The higher the value, the lower the dithering depth.
+ */
+#define NS2501_F9_REG 0xf9
+#define NS2501_F9_ENABLE (1<<0)		/* if set, dithering is enabled */
+#define NS2501_F9_DITHER_MASK (0x7f<<1)	/* controls the dither depth */
+#define NS2501_F9_DITHER_SHIFT 1	/* shifts the dither mask */
+
+/*
+ * PLL configuration register. This is a pair of registers,
+ * one single byte register at 1B, and a pair at 1C,1D.
+ * These registers are counters/dividers.
+ */
+#define NS2501_REG1B 0x1b /* one byte PLL control register */
+#define NS2501_REG1C 0x1c /* low-part of the second register */
+#define NS2501_REG1D 0x1d /* high-part of the second register */
+
+/*
+ * Scaler control registers. Horizontal at b8,b9,
+ * vertical at 10,11. The scale factor is computed as
+ * 2^16/control-value. The low-byte comes first.
+ */
+#define NS2501_REG10 0x10 /* low-byte vertical scaler */
+#define NS2501_REG11 0x11 /* high-byte vertical scaler */
+#define NS2501_REGB8 0xb8 /* low-byte horizontal scaler */
+#define NS2501_REGB9 0xb9 /* high-byte horizontal scaler */
+
+/*
+ * Display window definition. This consists of four registers
+ * per dimension. One register pair defines the start of the
+ * display, one the end.
+ * As far as I understand, this defines the window within which
+ * the scaler samples the input.
+ */
+#define NS2501_REGC1 0xc1 /* low-byte horizontal display start */
+#define NS2501_REGC2 0xc2 /* high-byte horizontal display start */
+#define NS2501_REGC3 0xc3 /* low-byte horizontal display stop */
+#define NS2501_REGC4 0xc4 /* high-byte horizontal display stop */
+#define NS2501_REGC5 0xc5 /* low-byte vertical display start */
+#define NS2501_REGC6 0xc6 /* high-byte vertical display start */
+#define NS2501_REGC7 0xc7 /* low-byte vertical display stop */
+#define NS2501_REGC8 0xc8 /* high-byte vertical display stop */
+
+/*
+ * The following register pair seems to define the start of
+ * the vertical sync. If automatic syncing is enabled, and the
+ * register value defines a sync pulse that is later than the
+ * incoming sync, then the register value is ignored and the
+ * external hsync triggers the synchronization.
+ */
+#define NS2501_REG80 0x80 /* low-byte vsync-start */
+#define NS2501_REG81 0x81 /* high-byte vsync-start */
+
+/*
+ * The following register pair seems to define the total number
+ * of lines created at the output side of the scaler.
+ * This is again a low-high register pair.
+ */
+#define NS2501_REG82 0x82 /* output display height, low byte */
+#define NS2501_REG83 0x83 /* output display height, high byte */
+
+/*
+ * The following registers define the end of the front-porch
+ * in horizontal and vertical position and hence allow to shift
+ * the image left/right or up/down.
+ */
+#define NS2501_REG98 0x98 /* horizontal start of display + 256, low */
+#define NS2501_REG99 0x99 /* horizontal start of display + 256, high */
+#define NS2501_REG8E 0x8e /* vertical start of the display, low byte */
+#define NS2501_REG8F 0x8f /* vertical start of the display, high byte */
+
+/*
+ * The following register pair control the function of the
+ * backlight and the DVO output. To enable the corresponding
+ * function, the corresponding bit must be set in both registers.
+ */
+#define NS2501_REG34 0x34 /* DVO enable functions, first register */
+#define NS2501_REG35 0x35 /* DVO enable functions, second register */
+#define NS2501_34_ENABLE_OUTPUT (1<<0) /* enable DVO output */
+#define NS2501_34_ENABLE_BACKLIGHT (1<<1) /* enable backlight */
+
+/*
+ * Registers 9C and 9D define the vertical output offset
+ * of the visible region.
+ */
+#define NS2501_REG9C 0x9c
+#define NS2501_REG9D 0x9d
+
+/*
+ * The register 9F defines the dithering. This requires the
+ * scaler to be ON. Bit 0 enables dithering, the remaining
+ * bits control the depth of the dither. The higher the value,
+ * the LOWER the dithering amplitude. A good value seems to be
+ * 15 (total register value).
+ */
+#define NS2501_REGF9 0xf9
+#define NS2501_F9_ENABLE_DITHER (1<<0) /* enable dithering */
+#define NS2501_F9_DITHER_MASK (0x7f<<1) /* dither masking */
+#define NS2501_F9_DITHER_SHIFT 1	/* upshift of the dither mask */
+
 enum {
 	MODE_640x480,
 	MODE_800x600,
@@ -72,274 +196,178 @@
 };
 
 /*
- * Magic values based on what the BIOS on
- * Fujitsu-Siemens Lifebook S6010 programs (1024x768 panel).
+ * The following structure keeps the complete configuration of
+ * the DVO, given a specific output configuration.
+ * This is pretty much guess-work from reverse-engineering, so
+ * read all this with a grain of salt.
  */
-static const struct ns2501_reg regs_1024x768[][86] = {
+struct ns2501_configuration {
+	uint8_t sync;		/* configuration of the C0 register */
+	uint8_t conf;		/* configuration register 8 */
+	uint8_t syncb;		/* configuration register 41 */
+	uint8_t	dither;		/* configuration of the dithering */
+	uint8_t pll_a;		/* PLL configuration, register A, 1B */
+	uint16_t pll_b;		/* PLL configuration, register B, 1C/1D */
+	uint16_t hstart;	/* horizontal start, registers C1/C2 */
+	uint16_t hstop;		/* horizontal total, registers C3/C4 */
+	uint16_t vstart;	/* vertical start, registers C5/C6 */
+	uint16_t vstop;		/* vertical total, registers C7/C8 */
+	uint16_t vsync;         /* manual vertical sync start, 80/81 */
+	uint16_t vtotal;        /* number of lines generated, 82/83 */
+	uint16_t hpos;		/* horizontal position + 256, 98/99  */
+	uint16_t vpos;		/* vertical position, 8e/8f */
+	uint16_t voffs;		/* vertical output offset, 9c/9d */
+	uint16_t hscale;	/* horizontal scaling factor, b8/b9 */
+	uint16_t vscale;	/* vertical scaling factor, 10/11 */
+};
+
+/*
+ * DVO configuration values, partially based on what the BIOS
+ * of the Fujitsu Lifebook S6010 writes into registers,
+ * partially found by manual tweaking. These configurations assume
+ * a 1024x768 panel.
+ */
+static const struct ns2501_configuration ns2501_modes[] = {
 	[MODE_640x480] = {
-		[0] = { .offset = 0x0a, .value = 0x81, },
-		[1] = { .offset = 0x18, .value = 0x07, },
-		[2] = { .offset = 0x19, .value = 0x00, },
-		[3] = { .offset = 0x1a, .value = 0x00, },
-		[4] = { .offset = 0x1b, .value = 0x11, },
-		[5] = { .offset = 0x1c, .value = 0x54, },
-		[6] = { .offset = 0x1d, .value = 0x03, },
-		[7] = { .offset = 0x1e, .value = 0x02, },
-		[8] = { .offset = 0xf3, .value = 0x90, },
-		[9] = { .offset = 0xf9, .value = 0x00, },
-		[10] = { .offset = 0xc1, .value = 0x90, },
-		[11] = { .offset = 0xc2, .value = 0x00, },
-		[12] = { .offset = 0xc3, .value = 0x0f, },
-		[13] = { .offset = 0xc4, .value = 0x03, },
-		[14] = { .offset = 0xc5, .value = 0x16, },
-		[15] = { .offset = 0xc6, .value = 0x00, },
-		[16] = { .offset = 0xc7, .value = 0x02, },
-		[17] = { .offset = 0xc8, .value = 0x02, },
-		[18] = { .offset = 0xf4, .value = 0x00, },
-		[19] = { .offset = 0x80, .value = 0xff, },
-		[20] = { .offset = 0x81, .value = 0x07, },
-		[21] = { .offset = 0x82, .value = 0x3d, },
-		[22] = { .offset = 0x83, .value = 0x05, },
-		[23] = { .offset = 0x94, .value = 0x00, },
-		[24] = { .offset = 0x95, .value = 0x00, },
-		[25] = { .offset = 0x96, .value = 0x05, },
-		[26] = { .offset = 0x97, .value = 0x00, },
-		[27] = { .offset = 0x9a, .value = 0x88, },
-		[28] = { .offset = 0x9b, .value = 0x00, },
-		[29] = { .offset = 0x98, .value = 0x00, },
-		[30] = { .offset = 0x99, .value = 0x00, },
-		[31] = { .offset = 0xf7, .value = 0x88, },
-		[32] = { .offset = 0xf8, .value = 0x0a, },
-		[33] = { .offset = 0x9c, .value = 0x24, },
-		[34] = { .offset = 0x9d, .value = 0x00, },
-		[35] = { .offset = 0x9e, .value = 0x25, },
-		[36] = { .offset = 0x9f, .value = 0x03, },
-		[37] = { .offset = 0xa0, .value = 0x28, },
-		[38] = { .offset = 0xa1, .value = 0x01, },
-		[39] = { .offset = 0xa2, .value = 0x28, },
-		[40] = { .offset = 0xa3, .value = 0x05, },
-		[41] = { .offset = 0xb6, .value = 0x09, },
-		[42] = { .offset = 0xb8, .value = 0x00, },
-		[43] = { .offset = 0xb9, .value = 0xa0, },
-		[44] = { .offset = 0xba, .value = 0x00, },
-		[45] = { .offset = 0xbb, .value = 0x20, },
-		[46] = { .offset = 0x10, .value = 0x00, },
-		[47] = { .offset = 0x11, .value = 0xa0, },
-		[48] = { .offset = 0x12, .value = 0x02, },
-		[49] = { .offset = 0x20, .value = 0x00, },
-		[50] = { .offset = 0x22, .value = 0x00, },
-		[51] = { .offset = 0x23, .value = 0x00, },
-		[52] = { .offset = 0x24, .value = 0x00, },
-		[53] = { .offset = 0x25, .value = 0x00, },
-		[54] = { .offset = 0x8c, .value = 0x10, },
-		[55] = { .offset = 0x8d, .value = 0x02, },
-		[56] = { .offset = 0x8e, .value = 0x10, },
-		[57] = { .offset = 0x8f, .value = 0x00, },
-		[58] = { .offset = 0x90, .value = 0xff, },
-		[59] = { .offset = 0x91, .value = 0x07, },
-		[60] = { .offset = 0x92, .value = 0xa0, },
-		[61] = { .offset = 0x93, .value = 0x02, },
-		[62] = { .offset = 0xa5, .value = 0x00, },
-		[63] = { .offset = 0xa6, .value = 0x00, },
-		[64] = { .offset = 0xa7, .value = 0x00, },
-		[65] = { .offset = 0xa8, .value = 0x00, },
-		[66] = { .offset = 0xa9, .value = 0x04, },
-		[67] = { .offset = 0xaa, .value = 0x70, },
-		[68] = { .offset = 0xab, .value = 0x4f, },
-		[69] = { .offset = 0xac, .value = 0x00, },
-		[70] = { .offset = 0xa4, .value = 0x84, },
-		[71] = { .offset = 0x7e, .value = 0x18, },
-		[72] = { .offset = 0x84, .value = 0x00, },
-		[73] = { .offset = 0x85, .value = 0x00, },
-		[74] = { .offset = 0x86, .value = 0x00, },
-		[75] = { .offset = 0x87, .value = 0x00, },
-		[76] = { .offset = 0x88, .value = 0x00, },
-		[77] = { .offset = 0x89, .value = 0x00, },
-		[78] = { .offset = 0x8a, .value = 0x00, },
-		[79] = { .offset = 0x8b, .value = 0x00, },
-		[80] = { .offset = 0x26, .value = 0x00, },
-		[81] = { .offset = 0x27, .value = 0x00, },
-		[82] = { .offset = 0xad, .value = 0x00, },
-		[83] = { .offset = 0x08, .value = 0x30, }, /* 0x31 */
-		[84] = { .offset = 0x41, .value = 0x00, },
-		[85] = { .offset = 0xc0, .value = 0x05, },
+		.sync	= NS2501_C0_ENABLE | NS2501_C0_VSYNC,
+		.conf	= NS2501_8_VEN | NS2501_8_HEN | NS2501_8_PD,
+		.syncb	= 0x32,
+		.dither	= 0x0f,
+		.pll_a	= 17,
+		.pll_b	= 852,
+		.hstart	= 144,
+		.hstop	= 783,
+		.vstart	= 22,
+		.vstop	= 514,
+		.vsync	= 2047, /* actually, ignored with this config */
+		.vtotal	= 1341,
+		.hpos	= 0,
+		.vpos	= 16,
+		.voffs	= 36,
+		.hscale	= 40960,
+		.vscale	= 40960
 	},
 	[MODE_800x600] = {
-		[0] = { .offset = 0x0a, .value = 0x81, },
-		[1] = { .offset = 0x18, .value = 0x07, },
-		[2] = { .offset = 0x19, .value = 0x00, },
-		[3] = { .offset = 0x1a, .value = 0x00, },
-		[4] = { .offset = 0x1b, .value = 0x19, },
-		[5] = { .offset = 0x1c, .value = 0x64, },
-		[6] = { .offset = 0x1d, .value = 0x02, },
-		[7] = { .offset = 0x1e, .value = 0x02, },
-		[8] = { .offset = 0xf3, .value = 0x90, },
-		[9] = { .offset = 0xf9, .value = 0x00, },
-		[10] = { .offset = 0xc1, .value = 0xd7, },
-		[11] = { .offset = 0xc2, .value = 0x00, },
-		[12] = { .offset = 0xc3, .value = 0xf8, },
-		[13] = { .offset = 0xc4, .value = 0x03, },
-		[14] = { .offset = 0xc5, .value = 0x1a, },
-		[15] = { .offset = 0xc6, .value = 0x00, },
-		[16] = { .offset = 0xc7, .value = 0x73, },
-		[17] = { .offset = 0xc8, .value = 0x02, },
-		[18] = { .offset = 0xf4, .value = 0x00, },
-		[19] = { .offset = 0x80, .value = 0x27, },
-		[20] = { .offset = 0x81, .value = 0x03, },
-		[21] = { .offset = 0x82, .value = 0x41, },
-		[22] = { .offset = 0x83, .value = 0x05, },
-		[23] = { .offset = 0x94, .value = 0x00, },
-		[24] = { .offset = 0x95, .value = 0x00, },
-		[25] = { .offset = 0x96, .value = 0x05, },
-		[26] = { .offset = 0x97, .value = 0x00, },
-		[27] = { .offset = 0x9a, .value = 0x88, },
-		[28] = { .offset = 0x9b, .value = 0x00, },
-		[29] = { .offset = 0x98, .value = 0x00, },
-		[30] = { .offset = 0x99, .value = 0x00, },
-		[31] = { .offset = 0xf7, .value = 0x88, },
-		[32] = { .offset = 0xf8, .value = 0x06, },
-		[33] = { .offset = 0x9c, .value = 0x23, },
-		[34] = { .offset = 0x9d, .value = 0x00, },
-		[35] = { .offset = 0x9e, .value = 0x25, },
-		[36] = { .offset = 0x9f, .value = 0x03, },
-		[37] = { .offset = 0xa0, .value = 0x28, },
-		[38] = { .offset = 0xa1, .value = 0x01, },
-		[39] = { .offset = 0xa2, .value = 0x28, },
-		[40] = { .offset = 0xa3, .value = 0x05, },
-		[41] = { .offset = 0xb6, .value = 0x09, },
-		[42] = { .offset = 0xb8, .value = 0x30, },
-		[43] = { .offset = 0xb9, .value = 0xc8, },
-		[44] = { .offset = 0xba, .value = 0x00, },
-		[45] = { .offset = 0xbb, .value = 0x20, },
-		[46] = { .offset = 0x10, .value = 0x20, },
-		[47] = { .offset = 0x11, .value = 0xc8, },
-		[48] = { .offset = 0x12, .value = 0x02, },
-		[49] = { .offset = 0x20, .value = 0x00, },
-		[50] = { .offset = 0x22, .value = 0x00, },
-		[51] = { .offset = 0x23, .value = 0x00, },
-		[52] = { .offset = 0x24, .value = 0x00, },
-		[53] = { .offset = 0x25, .value = 0x00, },
-		[54] = { .offset = 0x8c, .value = 0x10, },
-		[55] = { .offset = 0x8d, .value = 0x02, },
-		[56] = { .offset = 0x8e, .value = 0x04, },
-		[57] = { .offset = 0x8f, .value = 0x00, },
-		[58] = { .offset = 0x90, .value = 0xff, },
-		[59] = { .offset = 0x91, .value = 0x07, },
-		[60] = { .offset = 0x92, .value = 0xa0, },
-		[61] = { .offset = 0x93, .value = 0x02, },
-		[62] = { .offset = 0xa5, .value = 0x00, },
-		[63] = { .offset = 0xa6, .value = 0x00, },
-		[64] = { .offset = 0xa7, .value = 0x00, },
-		[65] = { .offset = 0xa8, .value = 0x00, },
-		[66] = { .offset = 0xa9, .value = 0x83, },
-		[67] = { .offset = 0xaa, .value = 0x40, },
-		[68] = { .offset = 0xab, .value = 0x32, },
-		[69] = { .offset = 0xac, .value = 0x00, },
-		[70] = { .offset = 0xa4, .value = 0x80, },
-		[71] = { .offset = 0x7e, .value = 0x18, },
-		[72] = { .offset = 0x84, .value = 0x00, },
-		[73] = { .offset = 0x85, .value = 0x00, },
-		[74] = { .offset = 0x86, .value = 0x00, },
-		[75] = { .offset = 0x87, .value = 0x00, },
-		[76] = { .offset = 0x88, .value = 0x00, },
-		[77] = { .offset = 0x89, .value = 0x00, },
-		[78] = { .offset = 0x8a, .value = 0x00, },
-		[79] = { .offset = 0x8b, .value = 0x00, },
-		[80] = { .offset = 0x26, .value = 0x00, },
-		[81] = { .offset = 0x27, .value = 0x00, },
-		[82] = { .offset = 0xad, .value = 0x00, },
-		[83] = { .offset = 0x08, .value = 0x30, }, /* 0x31 */
-		[84] = { .offset = 0x41, .value = 0x00, },
-		[85] = { .offset = 0xc0, .value = 0x07, },
+		.sync	= NS2501_C0_ENABLE |
+			  NS2501_C0_HSYNC | NS2501_C0_VSYNC,
+		.conf   = NS2501_8_VEN | NS2501_8_HEN | NS2501_8_PD,
+		.syncb	= 0x00,
+		.dither	= 0x0f,
+		.pll_a	= 25,
+		.pll_b	= 612,
+		.hstart	= 215,
+		.hstop	= 1016,
+		.vstart	= 26,
+		.vstop	= 627,
+		.vsync	= 807,
+		.vtotal	= 1341,
+		.hpos	= 0,
+		.vpos	= 4,
+		.voffs	= 35,
+		.hscale	= 51248,
+		.vscale	= 51232
 	},
 	[MODE_1024x768] = {
-		[0] = { .offset = 0x0a, .value = 0x81, },
-		[1] = { .offset = 0x18, .value = 0x07, },
-		[2] = { .offset = 0x19, .value = 0x00, },
-		[3] = { .offset = 0x1a, .value = 0x00, },
-		[4] = { .offset = 0x1b, .value = 0x11, },
-		[5] = { .offset = 0x1c, .value = 0x54, },
-		[6] = { .offset = 0x1d, .value = 0x03, },
-		[7] = { .offset = 0x1e, .value = 0x02, },
-		[8] = { .offset = 0xf3, .value = 0x90, },
-		[9] = { .offset = 0xf9, .value = 0x00, },
-		[10] = { .offset = 0xc1, .value = 0x90, },
-		[11] = { .offset = 0xc2, .value = 0x00, },
-		[12] = { .offset = 0xc3, .value = 0x0f, },
-		[13] = { .offset = 0xc4, .value = 0x03, },
-		[14] = { .offset = 0xc5, .value = 0x16, },
-		[15] = { .offset = 0xc6, .value = 0x00, },
-		[16] = { .offset = 0xc7, .value = 0x02, },
-		[17] = { .offset = 0xc8, .value = 0x02, },
-		[18] = { .offset = 0xf4, .value = 0x00, },
-		[19] = { .offset = 0x80, .value = 0xff, },
-		[20] = { .offset = 0x81, .value = 0x07, },
-		[21] = { .offset = 0x82, .value = 0x3d, },
-		[22] = { .offset = 0x83, .value = 0x05, },
-		[23] = { .offset = 0x94, .value = 0x00, },
-		[24] = { .offset = 0x95, .value = 0x00, },
-		[25] = { .offset = 0x96, .value = 0x05, },
-		[26] = { .offset = 0x97, .value = 0x00, },
-		[27] = { .offset = 0x9a, .value = 0x88, },
-		[28] = { .offset = 0x9b, .value = 0x00, },
-		[29] = { .offset = 0x98, .value = 0x00, },
-		[30] = { .offset = 0x99, .value = 0x00, },
-		[31] = { .offset = 0xf7, .value = 0x88, },
-		[32] = { .offset = 0xf8, .value = 0x0a, },
-		[33] = { .offset = 0x9c, .value = 0x24, },
-		[34] = { .offset = 0x9d, .value = 0x00, },
-		[35] = { .offset = 0x9e, .value = 0x25, },
-		[36] = { .offset = 0x9f, .value = 0x03, },
-		[37] = { .offset = 0xa0, .value = 0x28, },
-		[38] = { .offset = 0xa1, .value = 0x01, },
-		[39] = { .offset = 0xa2, .value = 0x28, },
-		[40] = { .offset = 0xa3, .value = 0x05, },
-		[41] = { .offset = 0xb6, .value = 0x09, },
-		[42] = { .offset = 0xb8, .value = 0x00, },
-		[43] = { .offset = 0xb9, .value = 0xa0, },
-		[44] = { .offset = 0xba, .value = 0x00, },
-		[45] = { .offset = 0xbb, .value = 0x20, },
-		[46] = { .offset = 0x10, .value = 0x00, },
-		[47] = { .offset = 0x11, .value = 0xa0, },
-		[48] = { .offset = 0x12, .value = 0x02, },
-		[49] = { .offset = 0x20, .value = 0x00, },
-		[50] = { .offset = 0x22, .value = 0x00, },
-		[51] = { .offset = 0x23, .value = 0x00, },
-		[52] = { .offset = 0x24, .value = 0x00, },
-		[53] = { .offset = 0x25, .value = 0x00, },
-		[54] = { .offset = 0x8c, .value = 0x10, },
-		[55] = { .offset = 0x8d, .value = 0x02, },
-		[56] = { .offset = 0x8e, .value = 0x10, },
-		[57] = { .offset = 0x8f, .value = 0x00, },
-		[58] = { .offset = 0x90, .value = 0xff, },
-		[59] = { .offset = 0x91, .value = 0x07, },
-		[60] = { .offset = 0x92, .value = 0xa0, },
-		[61] = { .offset = 0x93, .value = 0x02, },
-		[62] = { .offset = 0xa5, .value = 0x00, },
-		[63] = { .offset = 0xa6, .value = 0x00, },
-		[64] = { .offset = 0xa7, .value = 0x00, },
-		[65] = { .offset = 0xa8, .value = 0x00, },
-		[66] = { .offset = 0xa9, .value = 0x04, },
-		[67] = { .offset = 0xaa, .value = 0x70, },
-		[68] = { .offset = 0xab, .value = 0x4f, },
-		[69] = { .offset = 0xac, .value = 0x00, },
-		[70] = { .offset = 0xa4, .value = 0x84, },
-		[71] = { .offset = 0x7e, .value = 0x18, },
-		[72] = { .offset = 0x84, .value = 0x00, },
-		[73] = { .offset = 0x85, .value = 0x00, },
-		[74] = { .offset = 0x86, .value = 0x00, },
-		[75] = { .offset = 0x87, .value = 0x00, },
-		[76] = { .offset = 0x88, .value = 0x00, },
-		[77] = { .offset = 0x89, .value = 0x00, },
-		[78] = { .offset = 0x8a, .value = 0x00, },
-		[79] = { .offset = 0x8b, .value = 0x00, },
-		[80] = { .offset = 0x26, .value = 0x00, },
-		[81] = { .offset = 0x27, .value = 0x00, },
-		[82] = { .offset = 0xad, .value = 0x00, },
-		[83] = { .offset = 0x08, .value = 0x34, }, /* 0x35 */
-		[84] = { .offset = 0x41, .value = 0x00, },
-		[85] = { .offset = 0xc0, .value = 0x01, },
-	},
+		.sync	= NS2501_C0_ENABLE | NS2501_C0_VSYNC,
+		.conf   = NS2501_8_VEN | NS2501_8_HEN | NS2501_8_PD,
+		.syncb	= 0x32,
+		.dither	= 0x0f,
+		.pll_a	= 11,
+		.pll_b	= 1350,
+		.hstart	= 276,
+		.hstop	= 1299,
+		.vstart	= 15,
+		.vstop	= 1056,
+		.vsync	= 2047,
+		.vtotal	= 1341,
+		.hpos	= 0,
+		.vpos	= 7,
+		.voffs	= 27,
+		.hscale	= 65535,
+		.vscale	= 65535
+	}
+};
+
+/*
+ * Other configuration values left by the BIOS of the
+ * Fujitsu S6010 in the DVO control registers. Their
+ * value does not depend on the BIOS and their meaning
+ * is unknown.
+ */
+
+static const struct ns2501_reg mode_agnostic_values[] = {
+	/* 08 is mode specific */
+	[0] = { .offset = 0x0a, .value = 0x81, },
+	/* 10,11 are part of the mode specific configuration */
+	[1] = { .offset = 0x12, .value = 0x02, },
+	[2] = { .offset = 0x18, .value = 0x07, },
+	[3] = { .offset = 0x19, .value = 0x00, },
+	[4] = { .offset = 0x1a, .value = 0x00, }, /* PLL?, ignored */
+	/* 1b,1c,1d are part of the mode specific configuration */
+	[5] = { .offset = 0x1e, .value = 0x02, },
+	[6] = { .offset = 0x1f, .value = 0x40, },
+	[7] = { .offset = 0x20, .value = 0x00, },
+	[8] = { .offset = 0x21, .value = 0x00, },
+	[9] = { .offset = 0x22, .value = 0x00, },
+	[10] = { .offset = 0x23, .value = 0x00, },
+	[11] = { .offset = 0x24, .value = 0x00, },
+	[12] = { .offset = 0x25, .value = 0x00, },
+	[13] = { .offset = 0x26, .value = 0x00, },
+	[14] = { .offset = 0x27, .value = 0x00, },
+	[15] = { .offset = 0x7e, .value = 0x18, },
+	/* 80-84 are part of the mode-specific configuration */
+	[16] = { .offset = 0x84, .value = 0x00, },
+	[17] = { .offset = 0x85, .value = 0x00, },
+	[18] = { .offset = 0x86, .value = 0x00, },
+	[19] = { .offset = 0x87, .value = 0x00, },
+	[20] = { .offset = 0x88, .value = 0x00, },
+	[21] = { .offset = 0x89, .value = 0x00, },
+	[22] = { .offset = 0x8a, .value = 0x00, },
+	[23] = { .offset = 0x8b, .value = 0x00, },
+	[24] = { .offset = 0x8c, .value = 0x10, },
+	[25] = { .offset = 0x8d, .value = 0x02, },
+	/* 8e,8f are part of the mode-specific configuration */
+	[26] = { .offset = 0x90, .value = 0xff, },
+	[27] = { .offset = 0x91, .value = 0x07, },
+	[28] = { .offset = 0x92, .value = 0xa0, },
+	[29] = { .offset = 0x93, .value = 0x02, },
+	[30] = { .offset = 0x94, .value = 0x00, },
+	[31] = { .offset = 0x95, .value = 0x00, },
+	[32] = { .offset = 0x96, .value = 0x05, },
+	[33] = { .offset = 0x97, .value = 0x00, },
+	/* 98,99 are part of the mode-specific configuration */
+	[34] = { .offset = 0x9a, .value = 0x88, },
+	[35] = { .offset = 0x9b, .value = 0x00, },
+	/* 9c,9d are part of the mode-specific configuration */
+	[36] = { .offset = 0x9e, .value = 0x25, },
+	[37] = { .offset = 0x9f, .value = 0x03, },
+	[38] = { .offset = 0xa0, .value = 0x28, },
+	[39] = { .offset = 0xa1, .value = 0x01, },
+	[40] = { .offset = 0xa2, .value = 0x28, },
+	[41] = { .offset = 0xa3, .value = 0x05, },
+	/* register 0xa4 is mode specific, but 0x80..0x84 works always */
+	[42] = { .offset = 0xa4, .value = 0x84, },
+	[43] = { .offset = 0xa5, .value = 0x00, },
+	[44] = { .offset = 0xa6, .value = 0x00, },
+	[45] = { .offset = 0xa7, .value = 0x00, },
+	[46] = { .offset = 0xa8, .value = 0x00, },
+	/* 0xa9 to 0xab are mode specific, but have no visible effect */
+	[47] = { .offset = 0xa9, .value = 0x04, },
+	[48] = { .offset = 0xaa, .value = 0x70, },
+	[49] = { .offset = 0xab, .value = 0x4f, },
+	[50] = { .offset = 0xac, .value = 0x00, },
+	[51] = { .offset = 0xad, .value = 0x00, },
+	[52] = { .offset = 0xb6, .value = 0x09, },
+	[53] = { .offset = 0xb7, .value = 0x03, },
+	/* b8,b9 are part of the mode-specific configuration */
+	[54] = { .offset = 0xba, .value = 0x00, },
+	[55] = { .offset = 0xbb, .value = 0x20, },
+	[56] = { .offset = 0xf3, .value = 0x90, },
+	[57] = { .offset = 0xf4, .value = 0x00, },
+	[58] = { .offset = 0xf7, .value = 0x88, },
+	/* f8 is mode specific, but the value does not matter */
+	[59] = { .offset = 0xf8, .value = 0x0a, },
+	[60] = { .offset = 0xf9, .value = 0x00, }
 };
 
 static const struct ns2501_reg regs_init[] = {
@@ -350,25 +378,12 @@
 
 struct ns2501_priv {
 	bool quiet;
-	const struct ns2501_reg *regs;
+	const struct ns2501_configuration *conf;
 };
 
 #define NSPTR(d) ((NS2501Ptr)(d->DriverPrivate.ptr))
 
 /*
- * For reasons unclear to me, the ns2501 at least on the Fujitsu/Siemens
- * laptops does not react on the i2c bus unless
- * both the PLL is running and the display is configured in its native
- * resolution.
- * This function forces the DVO on, and stores the registers it touches.
- * Afterwards, registers are restored to regular values.
- *
- * This is pretty much a hack, though it works.
- * Without that, ns2501_readb and ns2501_writeb fail
- * when switching the resolution.
- */
-
-/*
 ** Read a register from the ns2501.
 ** Returns true if successful, false otherwise.
 ** If it returns false, it might be wise to enable the
@@ -534,6 +549,7 @@
 			    struct drm_display_mode *mode,
 			    struct drm_display_mode *adjusted_mode)
 {
+	const struct ns2501_configuration *conf;
 	struct ns2501_priv *ns = (struct ns2501_priv *)(dvo->dev_priv);
 	int mode_idx, i;
 
@@ -541,6 +557,36 @@
 	    ("set mode (hdisplay=%d,htotal=%d,vdisplay=%d,vtotal=%d).\n",
 	     mode->hdisplay, mode->htotal, mode->vdisplay, mode->vtotal);
 
+	DRM_DEBUG_KMS("Detailed requested mode settings are:\n"
+			"clock		: %d kHz\n"
+			"hdisplay	: %d\n"
+			"hblank start	: %d\n"
+			"hblank end	: %d\n"
+			"hsync start	: %d\n"
+			"hsync end	: %d\n"
+			"htotal		: %d\n"
+			"hskew		: %d\n"
+			"vdisplay	: %d\n"
+			"vblank start	: %d\n"
+			"hblank end	: %d\n"
+			"vsync start	: %d\n"
+			"vsync end	: %d\n"
+			"vtotal		: %d\n",
+			adjusted_mode->crtc_clock,
+			adjusted_mode->crtc_hdisplay,
+			adjusted_mode->crtc_hblank_start,
+			adjusted_mode->crtc_hblank_end,
+			adjusted_mode->crtc_hsync_start,
+			adjusted_mode->crtc_hsync_end,
+			adjusted_mode->crtc_htotal,
+			adjusted_mode->crtc_hskew,
+			adjusted_mode->crtc_vdisplay,
+			adjusted_mode->crtc_vblank_start,
+			adjusted_mode->crtc_vblank_end,
+			adjusted_mode->crtc_vsync_start,
+			adjusted_mode->crtc_vsync_end,
+			adjusted_mode->crtc_vtotal);
+
 	if (mode->hdisplay == 640 && mode->vdisplay == 480)
 		mode_idx = MODE_640x480;
 	else if (mode->hdisplay == 800 && mode->vdisplay == 600)
@@ -554,10 +600,44 @@
 	for (i = 0; i < ARRAY_SIZE(regs_init); i++)
 		ns2501_writeb(dvo, regs_init[i].offset, regs_init[i].value);
 
-	ns->regs = regs_1024x768[mode_idx];
+	/* Write the mode-agnostic values */
+	for (i = 0; i < ARRAY_SIZE(mode_agnostic_values); i++)
+		ns2501_writeb(dvo, mode_agnostic_values[i].offset,
+				mode_agnostic_values[i].value);
 
-	for (i = 0; i < 84; i++)
-		ns2501_writeb(dvo, ns->regs[i].offset, ns->regs[i].value);
+	/* Write now the mode-specific configuration */
+	conf = ns2501_modes + mode_idx;
+	ns->conf = conf;
+
+	ns2501_writeb(dvo, NS2501_REG8, conf->conf);
+	ns2501_writeb(dvo, NS2501_REG1B, conf->pll_a);
+	ns2501_writeb(dvo, NS2501_REG1C, conf->pll_b & 0xff);
+	ns2501_writeb(dvo, NS2501_REG1D, conf->pll_b >> 8);
+	ns2501_writeb(dvo, NS2501_REGC1, conf->hstart & 0xff);
+	ns2501_writeb(dvo, NS2501_REGC2, conf->hstart >> 8);
+	ns2501_writeb(dvo, NS2501_REGC3, conf->hstop & 0xff);
+	ns2501_writeb(dvo, NS2501_REGC4, conf->hstop >> 8);
+	ns2501_writeb(dvo, NS2501_REGC5, conf->vstart & 0xff);
+	ns2501_writeb(dvo, NS2501_REGC6, conf->vstart >> 8);
+	ns2501_writeb(dvo, NS2501_REGC7, conf->vstop & 0xff);
+	ns2501_writeb(dvo, NS2501_REGC8, conf->vstop >> 8);
+	ns2501_writeb(dvo, NS2501_REG80, conf->vsync & 0xff);
+	ns2501_writeb(dvo, NS2501_REG81, conf->vsync >> 8);
+	ns2501_writeb(dvo, NS2501_REG82, conf->vtotal & 0xff);
+	ns2501_writeb(dvo, NS2501_REG83, conf->vtotal >> 8);
+	ns2501_writeb(dvo, NS2501_REG98, conf->hpos & 0xff);
+	ns2501_writeb(dvo, NS2501_REG99, conf->hpos >> 8);
+	ns2501_writeb(dvo, NS2501_REG8E, conf->vpos & 0xff);
+	ns2501_writeb(dvo, NS2501_REG8F, conf->vpos >> 8);
+	ns2501_writeb(dvo, NS2501_REG9C, conf->voffs & 0xff);
+	ns2501_writeb(dvo, NS2501_REG9D, conf->voffs >> 8);
+	ns2501_writeb(dvo, NS2501_REGB8, conf->hscale & 0xff);
+	ns2501_writeb(dvo, NS2501_REGB9, conf->hscale >> 8);
+	ns2501_writeb(dvo, NS2501_REG10, conf->vscale & 0xff);
+	ns2501_writeb(dvo, NS2501_REG11, conf->vscale >> 8);
+	ns2501_writeb(dvo, NS2501_REGF9, conf->dither);
+	ns2501_writeb(dvo, NS2501_REG41, conf->syncb);
+	ns2501_writeb(dvo, NS2501_REGC0, conf->sync);
 }
 
 /* set the NS2501 power state */
@@ -579,34 +659,32 @@
 	DRM_DEBUG_KMS("Trying set the dpms of the DVO to %i\n", enable);
 
 	if (enable) {
-		if (WARN_ON(ns->regs[83].offset != 0x08 ||
-			    ns->regs[84].offset != 0x41 ||
-			    ns->regs[85].offset != 0xc0))
-			return;
+		ns2501_writeb(dvo, NS2501_REGC0, ns->conf->sync | 0x08);
 
-		ns2501_writeb(dvo, 0xc0, ns->regs[85].value | 0x08);
+		ns2501_writeb(dvo, NS2501_REG41, ns->conf->syncb);
 
-		ns2501_writeb(dvo, 0x41, ns->regs[84].value);
-
-		ns2501_writeb(dvo, 0x34, 0x01);
+		ns2501_writeb(dvo, NS2501_REG34, NS2501_34_ENABLE_OUTPUT);
 		msleep(15);
 
-		ns2501_writeb(dvo, 0x08, 0x35);
-		if (!(ns->regs[83].value & NS2501_8_BPAS))
-			ns2501_writeb(dvo, 0x08, 0x31);
+		ns2501_writeb(dvo, NS2501_REG8,
+				ns->conf->conf | NS2501_8_BPAS);
+		if (!(ns->conf->conf & NS2501_8_BPAS))
+			ns2501_writeb(dvo, NS2501_REG8, ns->conf->conf);
 		msleep(200);
 
-		ns2501_writeb(dvo, 0x34, 0x03);
+		ns2501_writeb(dvo, NS2501_REG34,
+			NS2501_34_ENABLE_OUTPUT | NS2501_34_ENABLE_BACKLIGHT);
 
-		ns2501_writeb(dvo, 0xc0, ns->regs[85].value);
+		ns2501_writeb(dvo, NS2501_REGC0, ns->conf->sync);
 	} else {
-		ns2501_writeb(dvo, 0x34, 0x01);
+		ns2501_writeb(dvo, NS2501_REG34, NS2501_34_ENABLE_OUTPUT);
 		msleep(200);
 
-		ns2501_writeb(dvo, 0x08, 0x34);
+		ns2501_writeb(dvo, NS2501_REG8, NS2501_8_VEN | NS2501_8_HEN |
+				NS2501_8_BPAS);
 		msleep(15);
 
-		ns2501_writeb(dvo, 0x34, 0x00);
+		ns2501_writeb(dvo, NS2501_REG34, 0x00);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 61ae8ff..9605ff8 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -869,6 +869,9 @@
 	    batch_len + batch_start_offset > src_obj->base.size)
 		return ERR_PTR(-E2BIG);
 
+	if (WARN_ON(dest_obj->pages_pin_count == 0))
+		return ERR_PTR(-ENODEV);
+
 	ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
@@ -882,13 +885,6 @@
 		goto unpin_src;
 	}
 
-	ret = i915_gem_object_get_pages(dest_obj);
-	if (ret) {
-		DRM_DEBUG_DRIVER("CMD: Failed to get pages for shadow batch\n");
-		goto unmap_src;
-	}
-	i915_gem_object_pin_pages(dest_obj);
-
 	ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
@@ -898,7 +894,6 @@
 	dst = vmap_batch(dest_obj, 0, batch_len);
 	if (!dst) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
-		i915_gem_object_unpin_pages(dest_obj);
 		ret = -ENOMEM;
 		goto unmap_src;
 	}
@@ -1129,7 +1124,6 @@
 	}
 
 	vunmap(batch_base);
-	i915_gem_object_unpin_pages(shadow_batch_obj);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 007c7d7..9c2b9e4 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -96,7 +96,7 @@
 
 static const char *get_pin_flag(struct drm_i915_gem_object *obj)
 {
-	if (i915_gem_obj_is_pinned(obj))
+	if (obj->pin_display)
 		return "p";
 	else
 		return " ";
@@ -123,8 +123,9 @@
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
+	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
+		   obj->active ? "*" : " ",
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
 		   get_global_flag(obj),
@@ -159,9 +160,9 @@
 	}
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
-	if (obj->pin_mappable || obj->fault_mappable) {
+	if (obj->pin_display || obj->fault_mappable) {
 		char s[3], *t = s;
-		if (obj->pin_mappable)
+		if (obj->pin_display)
 			*t++ = 'p';
 		if (obj->fault_mappable)
 			*t++ = 'f';
@@ -361,31 +362,39 @@
 	return 0;
 }
 
-#define print_file_stats(m, name, stats) \
-	seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n", \
-		   name, \
-		   stats.count, \
-		   stats.total, \
-		   stats.active, \
-		   stats.inactive, \
-		   stats.global, \
-		   stats.shared, \
-		   stats.unbound)
+#define print_file_stats(m, name, stats) do { \
+	if (stats.count) \
+		seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu global, %zu shared, %zu unbound)\n", \
+			   name, \
+			   stats.count, \
+			   stats.total, \
+			   stats.active, \
+			   stats.inactive, \
+			   stats.global, \
+			   stats.shared, \
+			   stats.unbound); \
+} while (0)
 
 static void print_batch_pool_stats(struct seq_file *m,
 				   struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
 	struct file_stats stats;
+	struct intel_engine_cs *ring;
+	int i, j;
 
 	memset(&stats, 0, sizeof(stats));
 
-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list)
-		per_file_stats(0, obj, &stats);
+	for_each_ring(ring, dev_priv, i) {
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				per_file_stats(0, obj, &stats);
+		}
+	}
 
-	print_file_stats(m, "batch pool", stats);
+	print_file_stats(m, "[k]batch pool", stats);
 }
 
 #define count_vmas(list, member) do { \
@@ -449,7 +458,7 @@
 			size += i915_gem_obj_ggtt_size(obj);
 			++count;
 		}
-		if (obj->pin_mappable) {
+		if (obj->pin_display) {
 			mappable_size += i915_gem_obj_ggtt_size(obj);
 			++mappable_count;
 		}
@@ -471,8 +480,6 @@
 
 	seq_putc(m, '\n');
 	print_batch_pool_stats(m, dev_priv);
-
-	seq_putc(m, '\n');
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 		struct file_stats stats;
 		struct task_struct *task;
@@ -613,24 +620,39 @@
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
-	int count = 0;
-	int ret;
+	struct intel_engine_cs *ring;
+	int total = 0;
+	int ret, i, j;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	seq_puts(m, "cache:\n");
-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list) {
-		seq_puts(m, "   ");
-		describe_obj(m, obj);
-		seq_putc(m, '\n');
-		count++;
+	for_each_ring(ring, dev_priv, i) {
+		for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) {
+			int count;
+
+			count = 0;
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link)
+				count++;
+			seq_printf(m, "%s cache[%d]: %d objects\n",
+				   ring->name, j, count);
+
+			list_for_each_entry(obj,
+					    &ring->batch_pool.cache_list[j],
+					    batch_pool_link) {
+				seq_puts(m, "   ");
+				describe_obj(m, obj);
+				seq_putc(m, '\n');
+			}
+
+			total += count;
+		}
 	}
 
-	seq_printf(m, "total: %d\n", count);
+	seq_printf(m, "total: %d\n", total);
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -643,31 +665,44 @@
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_i915_gem_request *gem_request;
-	int ret, count, i;
+	struct drm_i915_gem_request *rq;
+	int ret, any, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	count = 0;
+	any = 0;
 	for_each_ring(ring, dev_priv, i) {
-		if (list_empty(&ring->request_list))
+		int count;
+
+		count = 0;
+		list_for_each_entry(rq, &ring->request_list, list)
+			count++;
+		if (count == 0)
 			continue;
 
-		seq_printf(m, "%s requests:\n", ring->name);
-		list_for_each_entry(gem_request,
-				    &ring->request_list,
-				    list) {
-			seq_printf(m, "    %x @ %d\n",
-				   gem_request->seqno,
-				   (int) (jiffies - gem_request->emitted_jiffies));
+		seq_printf(m, "%s requests: %d\n", ring->name, count);
+		list_for_each_entry(rq, &ring->request_list, list) {
+			struct task_struct *task;
+
+			rcu_read_lock();
+			task = NULL;
+			if (rq->pid)
+				task = pid_task(rq->pid, PIDTYPE_PID);
+			seq_printf(m, "    %x @ %d: %s [%d]\n",
+				   rq->seqno,
+				   (int) (jiffies - rq->emitted_jiffies),
+				   task ? task->comm : "<unknown>",
+				   task ? task->pid : -1);
+			rcu_read_unlock();
 		}
-		count++;
+
+		any++;
 	}
 	mutex_unlock(&dev->struct_mutex);
 
-	if (count == 0)
+	if (any == 0)
 		seq_puts(m, "No requests\n");
 
 	return 0;
@@ -2153,8 +2188,6 @@
 	if (!ppgtt)
 		return;
 
-	seq_printf(m, "Page directories: %d\n", ppgtt->num_pd_pages);
-	seq_printf(m, "Page tables: %d\n", ppgtt->num_pd_entries);
 	for_each_ring(ring, dev_priv, unused) {
 		seq_printf(m, "%s\n", ring->name);
 		for (i = 0; i < 4; i++) {
@@ -2226,6 +2259,44 @@
 	return 0;
 }
 
+static int i915_rps_boost_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_file *file;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
+	if (ret)
+		goto unlock;
+
+	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
+		struct drm_i915_file_private *file_priv = file->driver_priv;
+		struct task_struct *task;
+
+		rcu_read_lock();
+		task = pid_task(file->pid, PIDTYPE_PID);
+		seq_printf(m, "%s [%d]: %d boosts%s\n",
+			   task ? task->comm : "<unknown>",
+			   task ? task->pid : -1,
+			   file_priv->rps_boosts,
+			   list_empty(&file_priv->rps_boost) ? "" : ", active");
+		rcu_read_unlock();
+	}
+	seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+
+	mutex_unlock(&dev_priv->rps.hw_lock);
+unlock:
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
 static int i915_llc(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
@@ -2287,9 +2358,6 @@
 		}
 	seq_puts(m, "\n");
 
-	seq_printf(m, "Link standby: %s\n",
-		   yesno((bool)dev_priv->psr.link_standby));
-
 	/* CHV PSR has no kind of performance counter */
 	if (HAS_DDI(dev)) {
 		psrperf = I915_READ(EDP_PSR_PERF_CNT(dev)) &
@@ -4470,12 +4538,116 @@
 			i915_cache_sharing_get, i915_cache_sharing_set,
 			"%llu\n");
 
+struct sseu_dev_status {
+	unsigned int slice_total;
+	unsigned int subslice_total;
+	unsigned int subslice_per_slice;
+	unsigned int eu_total;
+	unsigned int eu_per_subslice;
+};
+
+static void cherryview_sseu_device_status(struct drm_device *dev,
+					  struct sseu_dev_status *stat)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	const int ss_max = 2;
+	int ss;
+	u32 sig1[ss_max], sig2[ss_max];
+
+	sig1[0] = I915_READ(CHV_POWER_SS0_SIG1);
+	sig1[1] = I915_READ(CHV_POWER_SS1_SIG1);
+	sig2[0] = I915_READ(CHV_POWER_SS0_SIG2);
+	sig2[1] = I915_READ(CHV_POWER_SS1_SIG2);
+
+	for (ss = 0; ss < ss_max; ss++) {
+		unsigned int eu_cnt;
+
+		if (sig1[ss] & CHV_SS_PG_ENABLE)
+			/* skip disabled subslice */
+			continue;
+
+		stat->slice_total = 1;
+		stat->subslice_per_slice++;
+		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
+			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
+			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
+			 ((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
+		stat->eu_total += eu_cnt;
+		stat->eu_per_subslice = max(stat->eu_per_subslice, eu_cnt);
+	}
+	stat->subslice_total = stat->subslice_per_slice;
+}
+
+static void gen9_sseu_device_status(struct drm_device *dev,
+				    struct sseu_dev_status *stat)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int s_max = 3, ss_max = 4;
+	int s, ss;
+	u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
+
+	/* BXT has a single slice and at most 3 subslices. */
+	if (IS_BROXTON(dev)) {
+		s_max = 1;
+		ss_max = 3;
+	}
+
+	for (s = 0; s < s_max; s++) {
+		s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
+		eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s));
+		eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s));
+	}
+
+	eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
+		     GEN9_PGCTL_SSA_EU19_ACK |
+		     GEN9_PGCTL_SSA_EU210_ACK |
+		     GEN9_PGCTL_SSA_EU311_ACK;
+	eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
+		     GEN9_PGCTL_SSB_EU19_ACK |
+		     GEN9_PGCTL_SSB_EU210_ACK |
+		     GEN9_PGCTL_SSB_EU311_ACK;
+
+	for (s = 0; s < s_max; s++) {
+		unsigned int ss_cnt = 0;
+
+		if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
+			/* skip disabled slice */
+			continue;
+
+		stat->slice_total++;
+
+		if (IS_SKYLAKE(dev))
+			ss_cnt = INTEL_INFO(dev)->subslice_per_slice;
+
+		for (ss = 0; ss < ss_max; ss++) {
+			unsigned int eu_cnt;
+
+			if (IS_BROXTON(dev) &&
+			    !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+				/* skip disabled subslice */
+				continue;
+
+			if (IS_BROXTON(dev))
+				ss_cnt++;
+
+			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
+					       eu_mask[ss%2]);
+			stat->eu_total += eu_cnt;
+			stat->eu_per_subslice = max(stat->eu_per_subslice,
+						    eu_cnt);
+		}
+
+		stat->subslice_total += ss_cnt;
+		stat->subslice_per_slice = max(stat->subslice_per_slice,
+					       ss_cnt);
+	}
+}
+
 static int i915_sseu_status(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned int s_tot = 0, ss_tot = 0, ss_per = 0, eu_tot = 0, eu_per = 0;
+	struct sseu_dev_status stat;
 
 	if ((INTEL_INFO(dev)->gen < 8) || IS_BROADWELL(dev))
 		return -ENODEV;
@@ -4499,79 +4671,22 @@
 		   yesno(INTEL_INFO(dev)->has_eu_pg));
 
 	seq_puts(m, "SSEU Device Status\n");
+	memset(&stat, 0, sizeof(stat));
 	if (IS_CHERRYVIEW(dev)) {
-		const int ss_max = 2;
-		int ss;
-		u32 sig1[ss_max], sig2[ss_max];
-
-		sig1[0] = I915_READ(CHV_POWER_SS0_SIG1);
-		sig1[1] = I915_READ(CHV_POWER_SS1_SIG1);
-		sig2[0] = I915_READ(CHV_POWER_SS0_SIG2);
-		sig2[1] = I915_READ(CHV_POWER_SS1_SIG2);
-
-		for (ss = 0; ss < ss_max; ss++) {
-			unsigned int eu_cnt;
-
-			if (sig1[ss] & CHV_SS_PG_ENABLE)
-				/* skip disabled subslice */
-				continue;
-
-			s_tot = 1;
-			ss_per++;
-			eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
-				 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
-				 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
-				 ((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
-			eu_tot += eu_cnt;
-			eu_per = max(eu_per, eu_cnt);
-		}
-		ss_tot = ss_per;
-	} else if (IS_SKYLAKE(dev)) {
-		const int s_max = 3, ss_max = 4;
-		int s, ss;
-		u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
-
-		s_reg[0] = I915_READ(GEN9_SLICE0_PGCTL_ACK);
-		s_reg[1] = I915_READ(GEN9_SLICE1_PGCTL_ACK);
-		s_reg[2] = I915_READ(GEN9_SLICE2_PGCTL_ACK);
-		eu_reg[0] = I915_READ(GEN9_SLICE0_SS01_EU_PGCTL_ACK);
-		eu_reg[1] = I915_READ(GEN9_SLICE0_SS23_EU_PGCTL_ACK);
-		eu_reg[2] = I915_READ(GEN9_SLICE1_SS01_EU_PGCTL_ACK);
-		eu_reg[3] = I915_READ(GEN9_SLICE1_SS23_EU_PGCTL_ACK);
-		eu_reg[4] = I915_READ(GEN9_SLICE2_SS01_EU_PGCTL_ACK);
-		eu_reg[5] = I915_READ(GEN9_SLICE2_SS23_EU_PGCTL_ACK);
-		eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
-			     GEN9_PGCTL_SSA_EU19_ACK |
-			     GEN9_PGCTL_SSA_EU210_ACK |
-			     GEN9_PGCTL_SSA_EU311_ACK;
-		eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
-			     GEN9_PGCTL_SSB_EU19_ACK |
-			     GEN9_PGCTL_SSB_EU210_ACK |
-			     GEN9_PGCTL_SSB_EU311_ACK;
-
-		for (s = 0; s < s_max; s++) {
-			if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
-				/* skip disabled slice */
-				continue;
-
-			s_tot++;
-			ss_per = INTEL_INFO(dev)->subslice_per_slice;
-			ss_tot += ss_per;
-			for (ss = 0; ss < ss_max; ss++) {
-				unsigned int eu_cnt;
-
-				eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
-						       eu_mask[ss%2]);
-				eu_tot += eu_cnt;
-				eu_per = max(eu_per, eu_cnt);
-			}
-		}
+		cherryview_sseu_device_status(dev, &stat);
+	} else if (INTEL_INFO(dev)->gen >= 9) {
+		gen9_sseu_device_status(dev, &stat);
 	}
-	seq_printf(m, "  Enabled Slice Total: %u\n", s_tot);
-	seq_printf(m, "  Enabled Subslice Total: %u\n", ss_tot);
-	seq_printf(m, "  Enabled Subslice Per Slice: %u\n", ss_per);
-	seq_printf(m, "  Enabled EU Total: %u\n", eu_tot);
-	seq_printf(m, "  Enabled EU Per Subslice: %u\n", eu_per);
+	seq_printf(m, "  Enabled Slice Total: %u\n",
+		   stat.slice_total);
+	seq_printf(m, "  Enabled Subslice Total: %u\n",
+		   stat.subslice_total);
+	seq_printf(m, "  Enabled Subslice Per Slice: %u\n",
+		   stat.subslice_per_slice);
+	seq_printf(m, "  Enabled EU Total: %u\n",
+		   stat.eu_total);
+	seq_printf(m, "  Enabled EU Per Subslice: %u\n",
+		   stat.eu_per_subslice);
 
 	return 0;
 }
@@ -4691,6 +4806,7 @@
 	{"i915_ddb_info", i915_ddb_info, 0},
 	{"i915_sseu_status", i915_sseu_status, 0},
 	{"i915_drrs_status", i915_drrs_status, 0},
+	{"i915_rps_boost_info", i915_rps_boost_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
@@ -4780,3 +4896,99 @@
 		drm_debugfs_remove_files(info_list, 1, minor);
 	}
 }
+
+struct dpcd_block {
+	/* DPCD dump start address. */
+	unsigned int offset;
+	/* DPCD dump end address, inclusive. If unset, .size will be used. */
+	unsigned int end;
+	/* DPCD dump size. Used if .end is unset. If unset, defaults to 1. */
+	size_t size;
+	/* Only valid for eDP. */
+	bool edp;
+};
+
+static const struct dpcd_block i915_dpcd_debug[] = {
+	{ .offset = DP_DPCD_REV, .size = DP_RECEIVER_CAP_SIZE },
+	{ .offset = DP_PSR_SUPPORT, .end = DP_PSR_CAPS },
+	{ .offset = DP_DOWNSTREAM_PORT_0, .size = 16 },
+	{ .offset = DP_LINK_BW_SET, .end = DP_EDP_CONFIGURATION_SET },
+	{ .offset = DP_SINK_COUNT, .end = DP_ADJUST_REQUEST_LANE2_3 },
+	{ .offset = DP_SET_POWER },
+	{ .offset = DP_EDP_DPCD_REV },
+	{ .offset = DP_EDP_GENERAL_CAP_1, .end = DP_EDP_GENERAL_CAP_3 },
+	{ .offset = DP_EDP_DISPLAY_CONTROL_REGISTER, .end = DP_EDP_BACKLIGHT_FREQ_CAP_MAX_LSB },
+	{ .offset = DP_EDP_DBC_MINIMUM_BRIGHTNESS_SET, .end = DP_EDP_DBC_MAXIMUM_BRIGHTNESS_SET },
+};
+
+static int i915_dpcd_show(struct seq_file *m, void *data)
+{
+	struct drm_connector *connector = m->private;
+	struct intel_dp *intel_dp =
+		enc_to_intel_dp(&intel_attached_encoder(connector)->base);
+	uint8_t buf[16];
+	ssize_t err;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
+		const struct dpcd_block *b = &i915_dpcd_debug[i];
+		size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);
+
+		if (b->edp &&
+		    connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+			continue;
+
+		/* low tech for now */
+		if (WARN_ON(size > sizeof(buf)))
+			continue;
+
+		err = drm_dp_dpcd_read(&intel_dp->aux, b->offset, buf, size);
+		if (err <= 0) {
+			DRM_ERROR("dpcd read (%zu bytes at %u) failed (%zd)\n",
+				  size, b->offset, err);
+			continue;
+		}
+
+		seq_printf(m, "%04x: %*ph\n", b->offset, (int) size, buf);
+	}
+
+	return 0;
+}
+
+static int i915_dpcd_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, i915_dpcd_show, inode->i_private);
+}
+
+static const struct file_operations i915_dpcd_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_dpcd_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * i915_debugfs_connector_add - add i915 specific connector debugfs files
+ * @connector: pointer to a registered drm_connector
+ *
+ * Cleanup will be done by drm_connector_unregister() through a call to
+ * drm_debugfs_connector_remove().
+ *
+ * Returns 0 on success, negative error codes on error.
+ */
+int i915_debugfs_connector_add(struct drm_connector *connector)
+{
+	struct dentry *root = connector->debugfs_entry;
+
+	/* The connector must have been registered beforehands. */
+	if (!root)
+		return -ENODEV;
+
+	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+		debugfs_create_file("i915_dpcd", S_IRUGO, root, connector,
+				    &i915_dpcd_fops);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 68e0c85..e44116f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -564,6 +564,140 @@
 #undef SEP_COMMA
 }
 
+static void cherryview_sseu_info_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_device_info *info;
+	u32 fuse, eu_dis;
+
+	info = (struct intel_device_info *)&dev_priv->info;
+	fuse = I915_READ(CHV_FUSE_GT);
+
+	info->slice_total = 1;
+
+	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
+		info->subslice_per_slice++;
+		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
+				 CHV_FGT_EU_DIS_SS0_R1_MASK);
+		info->eu_total += 8 - hweight32(eu_dis);
+	}
+
+	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
+		info->subslice_per_slice++;
+		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
+				 CHV_FGT_EU_DIS_SS1_R1_MASK);
+		info->eu_total += 8 - hweight32(eu_dis);
+	}
+
+	info->subslice_total = info->subslice_per_slice;
+	/*
+	 * CHV expected to always have a uniform distribution of EU
+	 * across subslices.
+	*/
+	info->eu_per_subslice = info->subslice_total ?
+				info->eu_total / info->subslice_total :
+				0;
+	/*
+	 * CHV supports subslice power gating on devices with more than
+	 * one subslice, and supports EU power gating on devices with
+	 * more than one EU pair per subslice.
+	*/
+	info->has_slice_pg = 0;
+	info->has_subslice_pg = (info->subslice_total > 1);
+	info->has_eu_pg = (info->eu_per_subslice > 2);
+}
+
+static void gen9_sseu_info_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_device_info *info;
+	int s_max = 3, ss_max = 4, eu_max = 8;
+	int s, ss;
+	u32 fuse2, s_enable, ss_disable, eu_disable;
+	u8 eu_mask = 0xff;
+
+	/*
+	 * BXT has a single slice. BXT also has at most 6 EU per subslice,
+	 * and therefore only the lowest 6 bits of the 8-bit EU disable
+	 * fields are valid.
+	*/
+	if (IS_BROXTON(dev)) {
+		s_max = 1;
+		eu_max = 6;
+		eu_mask = 0x3f;
+	}
+
+	info = (struct intel_device_info *)&dev_priv->info;
+	fuse2 = I915_READ(GEN8_FUSE2);
+	s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >>
+		   GEN8_F2_S_ENA_SHIFT;
+	ss_disable = (fuse2 & GEN9_F2_SS_DIS_MASK) >>
+		     GEN9_F2_SS_DIS_SHIFT;
+
+	info->slice_total = hweight32(s_enable);
+	/*
+	 * The subslice disable field is global, i.e. it applies
+	 * to each of the enabled slices.
+	*/
+	info->subslice_per_slice = ss_max - hweight32(ss_disable);
+	info->subslice_total = info->slice_total *
+			       info->subslice_per_slice;
+
+	/*
+	 * Iterate through enabled slices and subslices to
+	 * count the total enabled EU.
+	*/
+	for (s = 0; s < s_max; s++) {
+		if (!(s_enable & (0x1 << s)))
+			/* skip disabled slice */
+			continue;
+
+		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
+		for (ss = 0; ss < ss_max; ss++) {
+			int eu_per_ss;
+
+			if (ss_disable & (0x1 << ss))
+				/* skip disabled subslice */
+				continue;
+
+			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
+						      eu_mask);
+
+			/*
+			 * Record which subslice(s) has(have) 7 EUs. we
+			 * can tune the hash used to spread work among
+			 * subslices if they are unbalanced.
+			 */
+			if (eu_per_ss == 7)
+				info->subslice_7eu[s] |= 1 << ss;
+
+			info->eu_total += eu_per_ss;
+		}
+	}
+
+	/*
+	 * SKL is expected to always have a uniform distribution
+	 * of EU across subslices with the exception that any one
+	 * EU in any one subslice may be fused off for die
+	 * recovery. BXT is expected to be perfectly uniform in EU
+	 * distribution.
+	*/
+	info->eu_per_subslice = info->subslice_total ?
+				DIV_ROUND_UP(info->eu_total,
+					     info->subslice_total) : 0;
+	/*
+	 * SKL supports slice power gating on devices with more than
+	 * one slice, and supports EU power gating on devices with
+	 * more than one EU pair per subslice. BXT supports subslice
+	 * power gating on devices with more than one subslice, and
+	 * supports EU power gating on devices with more than one EU
+	 * pair per subslice.
+	*/
+	info->has_slice_pg = (IS_SKYLAKE(dev) && (info->slice_total > 1));
+	info->has_subslice_pg = (IS_BROXTON(dev) && (info->subslice_total > 1));
+	info->has_eu_pg = (info->eu_per_subslice > 2);
+}
+
 /*
  * Determine various intel_device_info fields at runtime.
  *
@@ -585,7 +719,11 @@
 
 	info = (struct intel_device_info *)&dev_priv->info;
 
-	if (IS_VALLEYVIEW(dev) || INTEL_INFO(dev)->gen == 9)
+	if (IS_BROXTON(dev)) {
+		info->num_sprites[PIPE_A] = 3;
+		info->num_sprites[PIPE_B] = 3;
+		info->num_sprites[PIPE_C] = 2;
+	} else if (IS_VALLEYVIEW(dev) || INTEL_INFO(dev)->gen == 9)
 		for_each_pipe(dev_priv, pipe)
 			info->num_sprites[pipe] = 2;
 	else
@@ -620,116 +758,11 @@
 	}
 
 	/* Initialize slice/subslice/EU info */
-	if (IS_CHERRYVIEW(dev)) {
-		u32 fuse, eu_dis;
+	if (IS_CHERRYVIEW(dev))
+		cherryview_sseu_info_init(dev);
+	else if (INTEL_INFO(dev)->gen >= 9)
+		gen9_sseu_info_init(dev);
 
-		fuse = I915_READ(CHV_FUSE_GT);
-
-		info->slice_total = 1;
-
-		if (!(fuse & CHV_FGT_DISABLE_SS0)) {
-			info->subslice_per_slice++;
-			eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
-					 CHV_FGT_EU_DIS_SS0_R1_MASK);
-			info->eu_total += 8 - hweight32(eu_dis);
-		}
-
-		if (!(fuse & CHV_FGT_DISABLE_SS1)) {
-			info->subslice_per_slice++;
-			eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
-					CHV_FGT_EU_DIS_SS1_R1_MASK);
-			info->eu_total += 8 - hweight32(eu_dis);
-		}
-
-		info->subslice_total = info->subslice_per_slice;
-		/*
-		 * CHV expected to always have a uniform distribution of EU
-		 * across subslices.
-		*/
-		info->eu_per_subslice = info->subslice_total ?
-					info->eu_total / info->subslice_total :
-					0;
-		/*
-		 * CHV supports subslice power gating on devices with more than
-		 * one subslice, and supports EU power gating on devices with
-		 * more than one EU pair per subslice.
-		*/
-		info->has_slice_pg = 0;
-		info->has_subslice_pg = (info->subslice_total > 1);
-		info->has_eu_pg = (info->eu_per_subslice > 2);
-	} else if (IS_SKYLAKE(dev)) {
-		const int s_max = 3, ss_max = 4, eu_max = 8;
-		int s, ss;
-		u32 fuse2, eu_disable[s_max], s_enable, ss_disable;
-
-		fuse2 = I915_READ(GEN8_FUSE2);
-		s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >>
-			   GEN8_F2_S_ENA_SHIFT;
-		ss_disable = (fuse2 & GEN9_F2_SS_DIS_MASK) >>
-			     GEN9_F2_SS_DIS_SHIFT;
-
-		eu_disable[0] = I915_READ(GEN8_EU_DISABLE0);
-		eu_disable[1] = I915_READ(GEN8_EU_DISABLE1);
-		eu_disable[2] = I915_READ(GEN8_EU_DISABLE2);
-
-		info->slice_total = hweight32(s_enable);
-		/*
-		 * The subslice disable field is global, i.e. it applies
-		 * to each of the enabled slices.
-		*/
-		info->subslice_per_slice = ss_max - hweight32(ss_disable);
-		info->subslice_total = info->slice_total *
-				       info->subslice_per_slice;
-
-		/*
-		 * Iterate through enabled slices and subslices to
-		 * count the total enabled EU.
-		*/
-		for (s = 0; s < s_max; s++) {
-			if (!(s_enable & (0x1 << s)))
-				/* skip disabled slice */
-				continue;
-
-			for (ss = 0; ss < ss_max; ss++) {
-				u32 n_disabled;
-
-				if (ss_disable & (0x1 << ss))
-					/* skip disabled subslice */
-					continue;
-
-				n_disabled = hweight8(eu_disable[s] >>
-						      (ss * eu_max));
-
-				/*
-				 * Record which subslice(s) has(have) 7 EUs. we
-				 * can tune the hash used to spread work among
-				 * subslices if they are unbalanced.
-				 */
-				if (eu_max - n_disabled == 7)
-					info->subslice_7eu[s] |= 1 << ss;
-
-				info->eu_total += eu_max - n_disabled;
-			}
-		}
-
-		/*
-		 * SKL is expected to always have a uniform distribution
-		 * of EU across subslices with the exception that any one
-		 * EU in any one subslice may be fused off for die
-		 * recovery.
-		*/
-		info->eu_per_subslice = info->subslice_total ?
-					DIV_ROUND_UP(info->eu_total,
-						     info->subslice_total) : 0;
-		/*
-		 * SKL supports slice power gating on devices with more than
-		 * one slice, and supports EU power gating on devices with
-		 * more than one EU pair per subslice.
-		*/
-		info->has_slice_pg = (info->slice_total > 1) ? 1 : 0;
-		info->has_subslice_pg = 0;
-		info->has_eu_pg = (info->eu_per_subslice > 2) ? 1 : 0;
-	}
 	DRM_DEBUG_DRIVER("slice total: %u\n", info->slice_total);
 	DRM_DEBUG_DRIVER("subslice total: %u\n", info->subslice_total);
 	DRM_DEBUG_DRIVER("subslice per slice: %u\n", info->subslice_per_slice);
@@ -1006,8 +1039,12 @@
 put_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 free_priv:
-	if (dev_priv->slab)
-		kmem_cache_destroy(dev_priv->slab);
+	if (dev_priv->requests)
+		kmem_cache_destroy(dev_priv->requests);
+	if (dev_priv->vmas)
+		kmem_cache_destroy(dev_priv->vmas);
+	if (dev_priv->objects)
+		kmem_cache_destroy(dev_priv->objects);
 	kfree(dev_priv);
 	return ret;
 }
@@ -1072,7 +1109,6 @@
 
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_ringbuffer(dev);
-	i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 	i915_gem_cleanup_stolen(dev);
@@ -1091,8 +1127,12 @@
 	if (dev_priv->regs != NULL)
 		pci_iounmap(dev->pdev, dev_priv->regs);
 
-	if (dev_priv->slab)
-		kmem_cache_destroy(dev_priv->slab);
+	if (dev_priv->requests)
+		kmem_cache_destroy(dev_priv->requests);
+	if (dev_priv->vmas)
+		kmem_cache_destroy(dev_priv->vmas);
+	if (dev_priv->objects)
+		kmem_cache_destroy(dev_priv->objects);
 
 	pci_dev_put(dev_priv->bridge_dev);
 	kfree(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c302ffb..6e9ab2fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -381,6 +381,18 @@
 	IVB_CURSOR_OFFSETS,
 };
 
+static const struct intel_device_info intel_broxton_info = {
+	.is_preliminary = 1,
+	.gen = 9,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+	.num_pipes = 3,
+	.has_ddi = 1,
+	.has_fbc = 1,
+	GEN_DEFAULT_PIPEOFFSETS,
+	IVB_CURSOR_OFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -420,7 +432,8 @@
 	INTEL_CHV_IDS(&intel_cherryview_info),	\
 	INTEL_SKL_GT1_IDS(&intel_skylake_info),	\
 	INTEL_SKL_GT2_IDS(&intel_skylake_info),	\
-	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info)	\
+	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),	\
+	INTEL_BXT_IDS(&intel_broxton_info)
 
 static const struct pci_device_id pciidlist[] = {		/* aka */
 	INTEL_PCI_IDS,
@@ -996,6 +1009,38 @@
 	return 0;
 }
 
+static int bxt_suspend_complete(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	/* TODO: when DC5 support is added disable DC5 here. */
+
+	broxton_ddi_phy_uninit(dev);
+	broxton_uninit_cdclk(dev);
+	bxt_enable_dc9(dev_priv);
+
+	return 0;
+}
+
+static int bxt_resume_prepare(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	/* TODO: when CSR FW support is added make sure the FW is loaded */
+
+	bxt_disable_dc9(dev_priv);
+
+	/*
+	 * TODO: when DC5 support is added enable DC5 here if the CSR FW
+	 * is available.
+	 */
+	broxton_init_cdclk(dev);
+	broxton_ddi_phy_init(dev);
+	intel_prepare_ddi(dev);
+
+	return 0;
+}
+
 /*
  * Save all Gunit registers that may be lost after a D3 and a subsequent
  * S0i[R123] transition. The list of registers needing a save/restore is
@@ -1195,7 +1240,21 @@
 	u32 val;
 	int err;
 
+	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
+
 #define COND (I915_READ(VLV_GTLC_SURVIVABILITY_REG) & VLV_GFX_CLK_STATUS_BIT)
+	/* Wait for a previous force-off to settle */
+	if (force_on && !IS_CHERRYVIEW(dev_priv->dev)) {
+		/* WARN_ON only for the Valleyview */
+		WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on);
+
+		err = wait_for(!COND, 20);
+		if (err) {
+			DRM_ERROR("timeout waiting for GFX clock force-off (%08x)\n",
+				  I915_READ(VLV_GTLC_SURVIVABILITY_REG));
+			return err;
+		}
+	}
 
 	val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
 	val &= ~VLV_GFX_CLK_FORCE_ON_BIT;
@@ -1454,6 +1513,9 @@
 
 	if (IS_GEN6(dev_priv))
 		intel_init_pch_refclk(dev);
+
+	if (IS_BROXTON(dev))
+		ret = bxt_resume_prepare(dev_priv);
 	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		hsw_disable_pc8(dev_priv);
 	else if (IS_VALLEYVIEW(dev_priv))
@@ -1486,7 +1548,9 @@
 	struct drm_device *dev = dev_priv->dev;
 	int ret;
 
-	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+	if (IS_BROXTON(dev))
+		ret = bxt_suspend_complete(dev_priv);
+	else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		ret = hsw_suspend_complete(dev_priv);
 	else if (IS_VALLEYVIEW(dev))
 		ret = vlv_suspend_complete(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8ae6f7f..e8e8145 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,7 +56,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20150327"
+#define DRIVER_DATE		"20150423"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -130,7 +130,7 @@
  *
  * This value doesn't count the cursor plane.
  */
-#define I915_MAX_PLANES	3
+#define I915_MAX_PLANES	4
 
 enum plane {
 	PLANE_A = 0,
@@ -251,7 +251,6 @@
 			    &dev->mode_config.connector_list,	\
 			    base.head)
 
-
 #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
 	list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
 		if ((intel_encoder)->base.crtc == (__crtc))
@@ -303,6 +302,9 @@
 	uint32_t ctrl1;
 	/* HDMI only, 0 when used for DP */
 	uint32_t cfgcr1, cfgcr2;
+
+	/* bxt */
+	uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pcsdw12;
 };
 
 struct intel_shared_dpll_config {
@@ -455,6 +457,7 @@
 		u32 semaphore_seqno[I915_NUM_RINGS - 1];
 
 		/* Register state */
+		u32 start;
 		u32 tail;
 		u32 head;
 		u32 ctl;
@@ -766,7 +769,7 @@
  *	       context).
  * @hang_stats: information about the role of this context in possible GPU
  *		hangs.
- * @vm: virtual memory space used by this context.
+ * @ppgtt: virtual memory space used by this context.
  * @legacy_hw_ctx: render context backing object and whether it is correctly
  *                initialized (legacy ring submission mechanism only).
  * @link: link in the global list of contexts.
@@ -880,7 +883,8 @@
 	bool active;
 	struct delayed_work work;
 	unsigned busy_frontbuffer_bits;
-	bool link_standby;
+	bool psr2_support;
+	bool aux_frame_sync;
 };
 
 enum intel_pch {
@@ -1034,11 +1038,16 @@
 	u8 rp0_freq;		/* Non-overclocked max frequency. */
 	u32 cz_freq;
 
+	u8 up_threshold; /* Current %busy required to uplock */
+	u8 down_threshold; /* Current %busy required to downclock */
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
 	bool enabled;
 	struct delayed_work delayed_resume_work;
+	struct list_head clients;
+	unsigned boosts;
 
 	/* manual wa residency calculations */
 	struct intel_rps_ei up_ei, down_ei;
@@ -1136,11 +1145,6 @@
 	int which_slice;
 };
 
-struct i915_gem_batch_pool {
-	struct drm_device *dev;
-	struct list_head cache_list;
-};
-
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
@@ -1154,13 +1158,6 @@
 	 */
 	struct list_head unbound_list;
 
-	/*
-	 * A pool of objects to use as shadow copies of client batch buffers
-	 * when the command parser is enabled. Prevents the client from
-	 * modifying the batch contents after software parsing.
-	 */
-	struct i915_gem_batch_pool batch_pool;
-
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
@@ -1563,7 +1560,9 @@
 
 struct drm_i915_private {
 	struct drm_device *dev;
-	struct kmem_cache *slab;
+	struct kmem_cache *objects;
+	struct kmem_cache *vmas;
+	struct kmem_cache *requests;
 
 	const struct intel_device_info info;
 
@@ -1575,8 +1574,7 @@
 
 	struct i915_virtual_gpu vgpu;
 
-	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
-
+	struct intel_gmbus gmbus[GMBUS_NUM_PINS];
 
 	/** gmbus_mutex protects against concurrent usage of the single hw gmbus
 	 * controller on different i2c buses. */
@@ -1661,7 +1659,7 @@
 	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
 
 	unsigned int fsb_freq, mem_freq, is_ddr3;
-	unsigned int vlv_cdclk_freq;
+	unsigned int cdclk_freq;
 	unsigned int hpll_freq;
 
 	/**
@@ -1815,20 +1813,18 @@
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
-		int (*do_execbuf)(struct drm_device *dev, struct drm_file *file,
-				  struct intel_engine_cs *ring,
-				  struct intel_context *ctx,
-				  struct drm_i915_gem_execbuffer2 *args,
-				  struct list_head *vmas,
-				  struct drm_i915_gem_object *batch_obj,
-				  u64 exec_start, u32 flags);
+		int (*execbuf_submit)(struct drm_device *dev, struct drm_file *file,
+				      struct intel_engine_cs *ring,
+				      struct intel_context *ctx,
+				      struct drm_i915_gem_execbuffer2 *args,
+				      struct list_head *vmas,
+				      struct drm_i915_gem_object *batch_obj,
+				      u64 exec_start, u32 flags);
 		int (*init_rings)(struct drm_device *dev);
 		void (*cleanup_ring)(struct intel_engine_cs *ring);
 		void (*stop_ring)(struct intel_engine_cs *ring);
 	} gt;
 
-	uint32_t request_uniq;
-
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -1917,7 +1913,7 @@
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
-	struct list_head batch_pool_list;
+	struct list_head batch_pool_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
@@ -1969,8 +1965,6 @@
 	 * accurate mappable working set.
 	 */
 	unsigned int fault_mappable:1;
-	unsigned int pin_mappable:1;
-	unsigned int pin_display:1;
 
 	/*
 	 * Is the object to be mapped as read-only to the GPU
@@ -1984,8 +1978,14 @@
 
 	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
 
+	unsigned int pin_display;
+
 	struct sg_table *pages;
 	int pages_pin_count;
+	struct get_page {
+		struct scatterlist *sg;
+		int last;
+	} get_page;
 
 	/* prime dma-buf support */
 	void *dma_buf_vmapping;
@@ -2046,6 +2046,7 @@
 	struct kref ref;
 
 	/** On Which ring this request was generated */
+	struct drm_i915_private *i915;
 	struct intel_engine_cs *ring;
 
 	/** GEM sequence number associated with this request. */
@@ -2093,8 +2094,6 @@
 	/** process identifier submitting this request */
 	struct pid *pid;
 
-	uint32_t uniq;
-
 	/**
 	 * The ELSP only accepts two elements at a time, so we queue
 	 * context/tail pairs on a given queue (ring->execlist_queue) until the
@@ -2116,6 +2115,8 @@
 
 };
 
+int i915_gem_request_alloc(struct intel_engine_cs *ring,
+			   struct intel_context *ctx);
 void i915_gem_request_free(struct kref *req_ref);
 
 static inline uint32_t
@@ -2143,6 +2144,19 @@
 	kref_put(&req->ref, i915_gem_request_free);
 }
 
+static inline void
+i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
+{
+	struct drm_device *dev;
+
+	if (!req)
+		return;
+
+	dev = req->ring->dev;
+	if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex))
+		mutex_unlock(&dev->struct_mutex);
+}
+
 static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 					   struct drm_i915_gem_request *src)
 {
@@ -2168,12 +2182,13 @@
 	struct {
 		spinlock_t lock;
 		struct list_head request_list;
-		struct delayed_work idle_work;
 	} mm;
 	struct idr context_idr;
 
-	atomic_t rps_wait_boost;
-	struct  intel_engine_cs *bsd_ring;
+	struct list_head rps_boost;
+	struct intel_engine_cs *bsd_ring;
+
+	unsigned rps_boosts;
 };
 
 /*
@@ -2307,6 +2322,7 @@
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_BROADWELL(dev)	(!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
 #define IS_SKYLAKE(dev)	(INTEL_INFO(dev)->is_skylake)
+#define IS_BROXTON(dev)	(!INTEL_INFO(dev)->is_skylake && IS_GEN9(dev))
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 #define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
 				 (INTEL_DEVID(dev) & 0xFF00) == 0x0C00)
@@ -2331,6 +2347,10 @@
 #define SKL_REVID_D0		(0x3)
 #define SKL_REVID_E0		(0x4)
 
+#define BXT_REVID_A0		(0x0)
+#define BXT_REVID_B0		(0x3)
+#define BXT_REVID_C0		(0x6)
+
 /*
  * The genX designation typically refers to the render engine, so render
  * capability related checks should use IS_GEN, while display and other checks
@@ -2520,6 +2540,13 @@
 				enum forcewake_domains domains);
 void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
 				enum forcewake_domains domains);
+/* Like above but the caller must manage the uncore.lock itself.
+ * Must be used with I915_READ_FW and friends.
+ */
+void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains domains);
+void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains domains);
 void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
 static inline bool intel_vgpu_active(struct drm_device *dev)
 {
@@ -2614,10 +2641,13 @@
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
-#define PIN_MAPPABLE 0x1
-#define PIN_NONBLOCK 0x2
-#define PIN_GLOBAL 0x4
-#define PIN_OFFSET_BIAS 0x8
+/* Flags used by pin/bind&friends. */
+#define PIN_MAPPABLE	(1<<0)
+#define PIN_NONBLOCK	(1<<1)
+#define PIN_GLOBAL	(1<<2)
+#define PIN_OFFSET_BIAS	(1<<3)
+#define PIN_USER	(1<<4)
+#define PIN_UPDATE	(1<<5)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -2641,15 +2671,32 @@
 				    int *needs_clflush);
 
 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
+
+static inline int __sg_page_count(struct scatterlist *sg)
 {
-	struct sg_page_iter sg_iter;
-
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, n)
-		return sg_page_iter_page(&sg_iter);
-
-	return NULL;
+	return sg->length >> PAGE_SHIFT;
 }
+
+static inline struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
+{
+	if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
+		return NULL;
+
+	if (n < obj->get_page.last) {
+		obj->get_page.sg = obj->pages->sgl;
+		obj->get_page.last = 0;
+	}
+
+	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
+		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
+		if (unlikely(sg_is_chain(obj->get_page.sg)))
+			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
+	}
+
+	return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
+}
+
 static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
 	BUG_ON(obj->pages == NULL);
@@ -2993,8 +3040,10 @@
 int i915_debugfs_init(struct drm_minor *minor);
 void i915_debugfs_cleanup(struct drm_minor *minor);
 #ifdef CONFIG_DEBUG_FS
+int i915_debugfs_connector_add(struct drm_connector *connector);
 void intel_display_crc_init(struct drm_device *dev);
 #else
+static inline int i915_debugfs_connector_add(struct drm_connector *connector) {}
 static inline void intel_display_crc_init(struct drm_device *dev) {}
 #endif
 
@@ -3021,13 +3070,6 @@
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
-/* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
-			      struct i915_gem_batch_pool *pool);
-void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
-i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
-
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
@@ -3051,13 +3093,11 @@
 /* intel_i2c.c */
 extern int intel_setup_gmbus(struct drm_device *dev);
 extern void intel_teardown_gmbus(struct drm_device *dev);
-static inline bool intel_gmbus_is_port_valid(unsigned port)
-{
-	return (port >= GMBUS_PORT_SSC && port <= GMBUS_PORT_DPD);
-}
+extern bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
+				     unsigned int pin);
 
-extern struct i2c_adapter *intel_gmbus_get_adapter(
-		struct drm_i915_private *dev_priv, unsigned port);
+extern struct i2c_adapter *
+intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin);
 extern void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed);
 extern void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit);
 static inline bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter)
@@ -3203,6 +3243,17 @@
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
+/* These are untraced mmio-accessors that are only valid to be used inside
+ * criticial sections inside IRQ handlers where forcewake is explicitly
+ * controlled.
+ * Think twice, and think again, before using these.
+ * Note: Should only be used between intel_uncore_forcewake_irqlock() and
+ * intel_uncore_forcewake_irqunlock().
+ */
+#define I915_READ_FW(reg__) readl(dev_priv->regs + (reg__))
+#define I915_WRITE_FW(reg__, val__) writel(val__, dev_priv->regs + (reg__))
+#define POSTING_READ_FW(reg__) (void)I915_READ_FW(reg__)
+
 /* "Broadcast RGB" property */
 #define INTEL_BROADCAST_RGB_AUTO 0
 #define INTEL_BROADCAST_RGB_FULL 1
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53394f9..e8f6f4c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -378,13 +378,13 @@
 void *i915_gem_object_alloc(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
+	return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
 }
 
 void i915_gem_object_free(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	kmem_cache_free(dev_priv->slab, obj);
+	kmem_cache_free(dev_priv->objects, obj);
 }
 
 static int
@@ -1181,12 +1181,27 @@
 	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
 
-static bool can_wait_boost(struct drm_i915_file_private *file_priv)
+static int __i915_spin_request(struct drm_i915_gem_request *rq)
 {
-	if (file_priv == NULL)
-		return true;
+	unsigned long timeout;
 
-	return !atomic_xchg(&file_priv->rps_wait_boost, true);
+	if (i915_gem_request_get_ring(rq)->irq_refcount)
+		return -EBUSY;
+
+	timeout = jiffies + 1;
+	while (!need_resched()) {
+		if (i915_gem_request_completed(rq, true))
+			return 0;
+
+		if (time_after_eq(jiffies, timeout))
+			break;
+
+		cpu_relax_lowlatency();
+	}
+	if (i915_gem_request_completed(rq, false))
+		return 0;
+
+	return -EAGAIN;
 }
 
 /**
@@ -1230,20 +1245,23 @@
 	timeout_expire = timeout ?
 		jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
 
-	if (INTEL_INFO(dev)->gen >= 6 && ring->id == RCS && can_wait_boost(file_priv)) {
-		gen6_rps_boost(dev_priv);
-		if (file_priv)
-			mod_delayed_work(dev_priv->wq,
-					 &file_priv->mm.idle_work,
-					 msecs_to_jiffies(100));
-	}
-
-	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring)))
-		return -ENODEV;
+	if (INTEL_INFO(dev)->gen >= 6)
+		gen6_rps_boost(dev_priv, file_priv);
 
 	/* Record current time in case interrupted by signal, or wedged */
 	trace_i915_gem_request_wait_begin(req);
 	before = ktime_get_raw_ns();
+
+	/* Optimistic spin for the next jiffie before touching IRQs */
+	ret = __i915_spin_request(req);
+	if (ret == 0)
+		goto out;
+
+	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
+		ret = -ENODEV;
+		goto out;
+	}
+
 	for (;;) {
 		struct timer_list timer;
 
@@ -1292,14 +1310,15 @@
 			destroy_timer_on_stack(&timer);
 		}
 	}
-	now = ktime_get_raw_ns();
-	trace_i915_gem_request_wait_end(req);
-
 	if (!irq_test_in_progress)
 		ring->irq_put(ring);
 
 	finish_wait(&ring->irq_queue, &wait);
 
+out:
+	now = ktime_get_raw_ns();
+	trace_i915_gem_request_wait_end(req);
+
 	if (timeout) {
 		s64 tres = *timeout - (now - before);
 
@@ -2178,6 +2197,10 @@
 		return ret;
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+
+	obj->get_page.sg = obj->pages->sgl;
+	obj->get_page.last = 0;
+
 	return 0;
 }
 
@@ -2421,7 +2444,6 @@
 
 	i915_queue_hangcheck(ring->dev);
 
-	cancel_delayed_work_sync(&dev_priv->mm.idle_work);
 	queue_delayed_work(dev_priv->wq,
 			   &dev_priv->mm.retire_work,
 			   round_jiffies_up_relative(HZ));
@@ -2516,7 +2538,45 @@
 		i915_gem_context_unreference(ctx);
 	}
 
-	kfree(req);
+	kmem_cache_free(req->i915->requests, req);
+}
+
+int i915_gem_request_alloc(struct intel_engine_cs *ring,
+			   struct intel_context *ctx)
+{
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	struct drm_i915_gem_request *rq;
+	int ret;
+
+	if (ring->outstanding_lazy_request)
+		return 0;
+
+	rq = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+	if (rq == NULL)
+		return -ENOMEM;
+
+	kref_init(&rq->ref);
+	rq->i915 = dev_priv;
+
+	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
+	if (ret) {
+		kfree(rq);
+		return ret;
+	}
+
+	rq->ring = ring;
+
+	if (i915.enable_execlists)
+		ret = intel_logical_ring_alloc_request_extras(rq, ctx);
+	else
+		ret = intel_ring_alloc_request_extras(rq);
+	if (ret) {
+		kfree(rq);
+		return ret;
+	}
+
+	ring->outstanding_lazy_request = rq;
+	return 0;
 }
 
 struct drm_i915_gem_request *
@@ -2578,7 +2638,6 @@
 				struct drm_i915_gem_request,
 				execlist_link);
 		list_del(&submit_req->execlist_link);
-		intel_runtime_pm_put(dev_priv);
 
 		if (submit_req->ctx != ring->default_context)
 			intel_lr_context_unpin(ring, submit_req->ctx);
@@ -2768,8 +2827,25 @@
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), mm.idle_work.work);
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_engine_cs *ring;
+	int i;
 
-	intel_mark_idle(dev_priv->dev);
+	for_each_ring(ring, dev_priv, i)
+		if (!list_empty(&ring->request_list))
+			return;
+
+	intel_mark_idle(dev);
+
+	if (mutex_trylock(&dev->struct_mutex)) {
+		struct intel_engine_cs *ring;
+		int i;
+
+		for_each_ring(ring, dev_priv, i)
+			i915_gem_batch_pool_fini(&ring->batch_pool);
+
+		mutex_unlock(&dev->struct_mutex);
+	}
 }
 
 /**
@@ -2867,9 +2943,7 @@
 	ret = __i915_wait_request(req, reset_counter, true,
 				  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
 				  file->driver_priv);
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_request_unreference(req);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_request_unreference__unlocked(req);
 	return ret;
 
 out:
@@ -2994,7 +3068,7 @@
 
 	trace_i915_vma_unbind(vma);
 
-	vma->unbind_vma(vma);
+	vma->vm->unbind_vma(vma);
 
 	list_del_init(&vma->mm_list);
 	if (i915_is_ggtt(vma->vm)) {
@@ -3515,20 +3589,8 @@
 	if (ret)
 		goto err_remove_node;
 
-	/*  allocate before insert / bind */
-	if (vma->vm->allocate_va_range) {
-		trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size,
-				VM_TO_TRACE_NAME(vma->vm));
-		ret = vma->vm->allocate_va_range(vma->vm,
-						vma->node.start,
-						vma->node.size);
-		if (ret)
-			goto err_remove_node;
-	}
-
 	trace_i915_vma_bind(vma, flags);
-	ret = i915_vma_bind(vma, obj->cache_level,
-			    flags & PIN_GLOBAL ? GLOBAL_BIND : 0);
+	ret = i915_vma_bind(vma, obj->cache_level, flags);
 	if (ret)
 		goto err_finish_gtt;
 
@@ -3754,7 +3816,7 @@
 		list_for_each_entry(vma, &obj->vma_list, vma_link)
 			if (drm_mm_node_allocated(&vma->node)) {
 				ret = i915_vma_bind(vma, cache_level,
-						    vma->bound & GLOBAL_BIND);
+						    PIN_UPDATE);
 				if (ret)
 					return ret;
 			}
@@ -3852,24 +3914,6 @@
 	return ret;
 }
 
-static bool is_pin_display(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	vma = i915_gem_obj_to_ggtt(obj);
-	if (!vma)
-		return false;
-
-	/* There are 2 sources that pin objects:
-	 *   1. The display engine (scanouts, sprites, cursors);
-	 *   2. Reservations for execbuffer;
-	 *
-	 * We can ignore reservations as we hold the struct_mutex and
-	 * are only called outside of the reservation path.
-	 */
-	return vma->pin_count;
-}
-
 /*
  * Prepare buffer for display plane (scanout, cursors, etc).
  * Can be called from an uninterruptible phase (modesetting) and allows
@@ -3882,7 +3926,6 @@
 				     const struct i915_ggtt_view *view)
 {
 	u32 old_read_domains, old_write_domain;
-	bool was_pin_display;
 	int ret;
 
 	if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
@@ -3894,8 +3937,7 @@
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
-	was_pin_display = obj->pin_display;
-	obj->pin_display = true;
+	obj->pin_display++;
 
 	/* The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
@@ -3939,8 +3981,7 @@
 	return 0;
 
 err_unpin_display:
-	WARN_ON(was_pin_display != is_pin_display(obj));
-	obj->pin_display = was_pin_display;
+	obj->pin_display--;
 	return ret;
 }
 
@@ -3948,9 +3989,12 @@
 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
 					 const struct i915_ggtt_view *view)
 {
+	if (WARN_ON(obj->pin_display == 0))
+		return;
+
 	i915_gem_object_ggtt_unpin_view(obj, view);
 
-	obj->pin_display = is_pin_display(obj);
+	obj->pin_display--;
 }
 
 int
@@ -4072,9 +4116,7 @@
 	if (ret == 0)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_request_unreference(target);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_request_unreference__unlocked(target);
 
 	return ret;
 }
@@ -4155,18 +4197,12 @@
 
 	bound = vma ? vma->bound : 0;
 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		/* In true PPGTT, bind has possibly changed PDEs, which
-		 * means we must do a context switch before the GPU can
-		 * accurately read some of the VMAs.
-		 */
 		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
 						 flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
-	}
-
-	if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) {
-		ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND);
+	} else {
+		ret = i915_vma_bind(vma, obj->cache_level, flags);
 		if (ret)
 			return ret;
 	}
@@ -4195,9 +4231,6 @@
 	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
 
 	vma->pin_count++;
-	if (flags & PIN_MAPPABLE)
-		obj->pin_mappable |= true;
-
 	return 0;
 }
 
@@ -4235,8 +4268,7 @@
 	WARN_ON(vma->pin_count == 0);
 	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
 
-	if (--vma->pin_count == 0 && view->type == I915_GGTT_VIEW_NORMAL)
-		obj->pin_mappable = false;
+	--vma->pin_count;
 }
 
 bool
@@ -4375,7 +4407,7 @@
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
-	INIT_LIST_HEAD(&obj->batch_pool_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
@@ -4577,7 +4609,7 @@
 
 	list_del(&vma->vma_link);
 
-	kfree(vma);
+	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
 
 static void
@@ -4864,12 +4896,12 @@
 	}
 
 	if (!i915.enable_execlists) {
-		dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission;
+		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
 		dev_priv->gt.init_rings = i915_gem_init_rings;
 		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
 		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
 	} else {
-		dev_priv->gt.do_execbuf = intel_execlists_submission;
+		dev_priv->gt.execbuf_submit = intel_execlists_submission;
 		dev_priv->gt.init_rings = intel_logical_rings_init;
 		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
 		dev_priv->gt.stop_ring = intel_logical_ring_stop;
@@ -4951,11 +4983,21 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i;
 
-	dev_priv->slab =
+	dev_priv->objects =
 		kmem_cache_create("i915_gem_object",
 				  sizeof(struct drm_i915_gem_object), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
+	dev_priv->vmas =
+		kmem_cache_create("i915_gem_vma",
+				  sizeof(struct i915_vma), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
+	dev_priv->requests =
+		kmem_cache_create("i915_gem_request",
+				  sizeof(struct drm_i915_gem_request), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->vm_list);
 	i915_init_vm(dev_priv, &dev_priv->gtt.base);
@@ -4998,8 +5040,6 @@
 
 	i915_gem_shrinker_init(dev_priv);
 
-	i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
-
 	mutex_init(&dev_priv->fb_tracking.lock);
 }
 
@@ -5007,8 +5047,6 @@
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	cancel_delayed_work_sync(&file_priv->mm.idle_work);
-
 	/* Clean up our request list when the client is going away, so that
 	 * later retire_requests won't dereference our soon-to-be-gone
 	 * file_priv.
@@ -5024,15 +5062,12 @@
 		request->file_priv = NULL;
 	}
 	spin_unlock(&file_priv->mm.lock);
-}
 
-static void
-i915_gem_file_idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_file_private *file_priv =
-		container_of(work, typeof(*file_priv), mm.idle_work.work);
-
-	atomic_set(&file_priv->rps_wait_boost, false);
+	if (!list_empty(&file_priv->rps_boost)) {
+		mutex_lock(&to_i915(dev)->rps.hw_lock);
+		list_del(&file_priv->rps_boost);
+		mutex_unlock(&to_i915(dev)->rps.hw_lock);
+	}
 }
 
 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
@@ -5049,11 +5084,10 @@
 	file->driver_priv = file_priv;
 	file_priv->dev_priv = dev->dev_private;
 	file_priv->file = file;
+	INIT_LIST_HEAD(&file_priv->rps_boost);
 
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
-	INIT_DELAYED_WORK(&file_priv->mm.idle_work,
-			  i915_gem_file_idle_work_handler);
 
 	ret = i915_gem_context_open(dev, file);
 	if (ret)
@@ -5123,7 +5157,7 @@
 		    i915_ggtt_view_equal(&vma->ggtt_view, view))
 			return vma->node.start;
 
-	WARN(1, "global vma for this object not found.\n");
+	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
 	return -1;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index c690170..7bf2f3f 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_gem_batch_pool.h"
 
 /**
  * DOC: batch pool
@@ -46,8 +47,12 @@
 void i915_gem_batch_pool_init(struct drm_device *dev,
 			      struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	pool->dev = dev;
-	INIT_LIST_HEAD(&pool->cache_list);
+
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+		INIT_LIST_HEAD(&pool->cache_list[n]);
 }
 
 /**
@@ -58,33 +63,35 @@
  */
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 {
+	int n;
+
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	while (!list_empty(&pool->cache_list)) {
-		struct drm_i915_gem_object *obj =
-			list_first_entry(&pool->cache_list,
-					 struct drm_i915_gem_object,
-					 batch_pool_list);
+	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+		while (!list_empty(&pool->cache_list[n])) {
+			struct drm_i915_gem_object *obj =
+				list_first_entry(&pool->cache_list[n],
+						 struct drm_i915_gem_object,
+						 batch_pool_link);
 
-		WARN_ON(obj->active);
-
-		list_del_init(&obj->batch_pool_list);
-		drm_gem_object_unreference(&obj->base);
+			list_del(&obj->batch_pool_link);
+			drm_gem_object_unreference(&obj->base);
+		}
 	}
 }
 
 /**
- * i915_gem_batch_pool_get() - select a buffer from the pool
+ * i915_gem_batch_pool_get() - allocate a buffer from the pool
  * @pool: the batch buffer pool
  * @size: the minimum desired size of the returned buffer
  *
- * Finds or allocates a batch buffer in the pool with at least the requested
- * size. The caller is responsible for any domain, active/inactive, or
- * purgeability management for the returned buffer.
+ * Returns an inactive buffer from @pool with at least @size bytes,
+ * with the pages pinned. The caller must i915_gem_object_unpin_pages()
+ * on the returned object.
  *
  * Note: Callers must hold the struct_mutex
  *
- * Return: the selected batch buffer object
+ * Return: the buffer object or an error pointer
  */
 struct drm_i915_gem_object *
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
@@ -92,46 +99,53 @@
 {
 	struct drm_i915_gem_object *obj = NULL;
 	struct drm_i915_gem_object *tmp, *next;
+	struct list_head *list;
+	int n;
 
 	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
 
-	list_for_each_entry_safe(tmp, next,
-			&pool->cache_list, batch_pool_list) {
+	/* Compute a power-of-two bucket, but throw everything greater than
+	 * 16KiB into the same bucket: i.e. the the buckets hold objects of
+	 * (1 page, 2 pages, 4 pages, 8+ pages).
+	 */
+	n = fls(size >> PAGE_SHIFT) - 1;
+	if (n >= ARRAY_SIZE(pool->cache_list))
+		n = ARRAY_SIZE(pool->cache_list) - 1;
+	list = &pool->cache_list[n];
 
+	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
+		/* The batches are strictly LRU ordered */
 		if (tmp->active)
-			continue;
+			break;
 
 		/* While we're looping, do some clean up */
 		if (tmp->madv == __I915_MADV_PURGED) {
-			list_del(&tmp->batch_pool_list);
+			list_del(&tmp->batch_pool_link);
 			drm_gem_object_unreference(&tmp->base);
 			continue;
 		}
 
-		/*
-		 * Select a buffer that is at least as big as needed
-		 * but not 'too much' bigger. A better way to do this
-		 * might be to bucket the pool objects based on size.
-		 */
-		if (tmp->base.size >= size &&
-		    tmp->base.size <= (2 * size)) {
+		if (tmp->base.size >= size) {
 			obj = tmp;
 			break;
 		}
 	}
 
-	if (!obj) {
+	if (obj == NULL) {
+		int ret;
+
 		obj = i915_gem_alloc_object(pool->dev, size);
-		if (!obj)
+		if (obj == NULL)
 			return ERR_PTR(-ENOMEM);
 
-		list_add_tail(&obj->batch_pool_list, &pool->cache_list);
+		ret = i915_gem_object_get_pages(obj);
+		if (ret)
+			return ERR_PTR(ret);
+
+		obj->madv = I915_MADV_DONTNEED;
 	}
-	else
-		/* Keep list in LRU order */
-		list_move_tail(&obj->batch_pool_list, &pool->cache_list);
 
-	obj->madv = I915_MADV_WILLNEED;
-
+	list_move_tail(&obj->batch_pool_link, list);
+	i915_gem_object_pin_pages(obj);
 	return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
new file mode 100644
index 0000000..848e907
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_GEM_BATCH_POOL_H
+#define I915_GEM_BATCH_POOL_H
+
+#include "i915_drv.h"
+
+struct i915_gem_batch_pool {
+	struct drm_device *dev;
+	struct list_head cache_list[4];
+};
+
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+			      struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+
+#endif /* I915_GEM_BATCH_POOL_H */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f3e84c4..5a47eb5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -157,7 +157,9 @@
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	obj = i915_gem_alloc_object(dev, size);
+	obj = i915_gem_object_create_stolen(dev, size);
+	if (obj == NULL)
+		obj = i915_gem_alloc_object(dev, size);
 	if (obj == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -573,20 +575,12 @@
 				      struct intel_context *from,
 				      struct intel_context *to)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
 	if (to->remap_slice)
 		return false;
 
-	if (to->ppgtt) {
-		if (from == to && !test_bit(ring->id,
-				&to->ppgtt->pd_dirty_rings))
-			return true;
-	} else if (dev_priv->mm.aliasing_ppgtt) {
-		if (from == to && !test_bit(ring->id,
-				&dev_priv->mm.aliasing_ppgtt->pd_dirty_rings))
-			return true;
-	}
+	if (to->ppgtt && from == to &&
+	    !(intel_ring_flag(ring) & to->ppgtt->pd_dirty_rings))
+		return true;
 
 	return false;
 }
@@ -636,7 +630,6 @@
 	struct intel_context *from = ring->last_context;
 	u32 hw_flags = 0;
 	bool uninitialized = false;
-	struct i915_vma *vma;
 	int ret, i;
 
 	if (from != NULL && ring == &dev_priv->ring[RCS]) {
@@ -673,7 +666,7 @@
 			goto unpin_out;
 
 		/* Doing a PD load always reloads the page dirs */
-		clear_bit(ring->id, &to->ppgtt->pd_dirty_rings);
+		to->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring);
 	}
 
 	if (ring != &dev_priv->ring[RCS]) {
@@ -694,16 +687,6 @@
 	if (ret)
 		goto unpin_out;
 
-	vma = i915_gem_obj_to_ggtt(to->legacy_hw_ctx.rcs_state);
-	if (!(vma->bound & GLOBAL_BIND)) {
-		ret = i915_vma_bind(vma,
-				    to->legacy_hw_ctx.rcs_state->cache_level,
-				    GLOBAL_BIND);
-		/* This shouldn't ever fail. */
-		if (WARN_ONCE(ret, "GGTT context bind failed!"))
-			goto unpin_out;
-	}
-
 	if (!to->legacy_hw_ctx.initialized) {
 		hw_flags |= MI_RESTORE_INHIBIT;
 		/* NB: If we inhibit the restore, the context is not allowed to
@@ -711,12 +694,14 @@
 		 * space. This means we must enforce that a page table load
 		 * occur when this occurs. */
 	} else if (to->ppgtt &&
-			test_and_clear_bit(ring->id, &to->ppgtt->pd_dirty_rings))
+		   (intel_ring_flag(ring) & to->ppgtt->pd_dirty_rings)) {
 		hw_flags |= MI_FORCE_RESTORE;
+		to->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring);
+	}
 
 	/* We should never emit switch_mm more than once */
 	WARN_ON(needs_pd_load_pre(ring, to) &&
-			needs_pd_load_post(ring, to, hw_flags));
+		needs_pd_load_post(ring, to, hw_flags));
 
 	ret = mi_set_context(ring, to, hw_flags);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..d2e21c5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,7 +37,6 @@
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
-#define  __EXEC_OBJECT_PURGEABLE (1<<27)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -224,12 +223,7 @@
 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 		vma->pin_count--;
 
-	if (entry->flags & __EXEC_OBJECT_PURGEABLE)
-		obj->madv = I915_MADV_DONTNEED;
-
-	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
-			  __EXEC_OBJECT_HAS_PIN |
-			  __EXEC_OBJECT_PURGEABLE);
+	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
 
 static void eb_destroy(struct eb_vmas *eb)
@@ -406,10 +400,9 @@
 	 * pipe_control writes because the gpu doesn't properly redirect them
 	 * through the ppgtt for non_secure batchbuffers. */
 	if (unlikely(IS_GEN6(dev) &&
-	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
-	    !(target_vma->bound & GLOBAL_BIND))) {
+	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
 		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
-				    GLOBAL_BIND);
+				    PIN_GLOBAL);
 		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
 			return ret;
 	}
@@ -591,12 +584,13 @@
 	uint64_t flags;
 	int ret;
 
-	flags = 0;
+	flags = PIN_USER;
+	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
+		flags |= PIN_GLOBAL;
+
 	if (!drm_mm_node_allocated(&vma->node)) {
 		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 			flags |= PIN_GLOBAL | PIN_MAPPABLE;
-		if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
-			flags |= PIN_GLOBAL;
 		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 	}
@@ -606,7 +600,7 @@
 	    only_mappable_for_reloc(entry->flags))
 		ret = i915_gem_object_pin(obj, vma->vm,
 					  entry->alignment,
-					  flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
+					  flags & ~PIN_MAPPABLE);
 	if (ret)
 		return ret;
 
@@ -1142,12 +1136,11 @@
 			  u32 batch_len,
 			  bool is_master)
 {
-	struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
 	struct drm_i915_gem_object *shadow_batch_obj;
 	struct i915_vma *vma;
 	int ret;
 
-	shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
 						   PAGE_ALIGN(batch_len));
 	if (IS_ERR(shadow_batch_obj))
 		return shadow_batch_obj;
@@ -1165,11 +1158,13 @@
 	if (ret)
 		goto err;
 
+	i915_gem_object_unpin_pages(shadow_batch_obj);
+
 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
 
 	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
 	vma->exec_entry = shadow_exec_entry;
-	vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE | __EXEC_OBJECT_HAS_PIN;
+	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
 	drm_gem_object_reference(&shadow_batch_obj->base);
 	list_add_tail(&vma->exec_list, &eb->vmas);
 
@@ -1178,6 +1173,7 @@
 	return shadow_batch_obj;
 
 err:
+	i915_gem_object_unpin_pages(shadow_batch_obj);
 	if (ret == -EACCES) /* unhandled chained batch */
 		return batch_obj;
 	else
@@ -1251,12 +1247,8 @@
 	if (ret)
 		goto error;
 
-	if (ctx->ppgtt)
-		WARN(ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
-			"%s didn't clear reload\n", ring->name);
-	else if (dev_priv->mm.aliasing_ppgtt)
-		WARN(dev_priv->mm.aliasing_ppgtt->pd_dirty_rings &
-			(1<<ring->id), "%s didn't clear reload\n", ring->name);
+	WARN(ctx->ppgtt && ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
+	     "%s didn't clear reload\n", ring->name);
 
 	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
 	instp_mask = I915_EXEC_CONSTANTS_MASK;
@@ -1566,12 +1558,8 @@
 		 * dispatch_execbuffer implementations. We specifically
 		 * don't want that set when the command parser is
 		 * enabled.
-		 *
-		 * FIXME: with aliasing ppgtt, buffers that should only
-		 * be in ggtt still end up in the aliasing ppgtt. remove
-		 * this check when that is fixed.
 		 */
-		if (USES_FULL_PPGTT(dev))
+		if (USES_PPGTT(dev))
 			dispatch_flags |= I915_DISPATCH_SECURE;
 
 		exec_start = 0;
@@ -1601,9 +1589,9 @@
 	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
-	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
-				      &eb->vmas, batch_obj, exec_start,
-				      dispatch_flags);
+	ret = dev_priv->gt.execbuf_submit(dev, file, ring, ctx, args,
+					  &eb->vmas, batch_obj, exec_start,
+					  dispatch_flags);
 
 	/*
 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0239fbf..9d3852c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -92,14 +92,14 @@
  *
  */
 
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma);
+
 const struct i915_ggtt_view i915_ggtt_view_normal;
 const struct i915_ggtt_view i915_ggtt_view_rotated = {
         .type = I915_GGTT_VIEW_ROTATED
 };
 
-static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
-static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
-
 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
 {
 	bool has_aliasing_ppgtt;
@@ -146,14 +146,33 @@
 		return has_aliasing_ppgtt ? 1 : 0;
 }
 
-static void ppgtt_bind_vma(struct i915_vma *vma,
-			   enum i915_cache_level cache_level,
-			   u32 flags);
-static void ppgtt_unbind_vma(struct i915_vma *vma);
+static int ppgtt_bind_vma(struct i915_vma *vma,
+			  enum i915_cache_level cache_level,
+			  u32 unused)
+{
+	u32 pte_flags = 0;
 
-static inline gen8_pte_t gen8_pte_encode(dma_addr_t addr,
-					 enum i915_cache_level level,
-					 bool valid)
+	/* Currently applicable only to VLV */
+	if (vma->obj->gt_ro)
+		pte_flags |= PTE_READ_ONLY;
+
+	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
+				cache_level, pte_flags);
+
+	return 0;
+}
+
+static void ppgtt_unbind_vma(struct i915_vma *vma)
+{
+	vma->vm->clear_range(vma->vm,
+			     vma->node.start,
+			     vma->obj->base.size,
+			     true);
+}
+
+static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
+				  enum i915_cache_level level,
+				  bool valid)
 {
 	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
 	pte |= addr;
@@ -173,9 +192,9 @@
 	return pte;
 }
 
-static inline gen8_pde_t gen8_pde_encode(struct drm_device *dev,
-					  dma_addr_t addr,
-					  enum i915_cache_level level)
+static gen8_pde_t gen8_pde_encode(struct drm_device *dev,
+				  dma_addr_t addr,
+				  enum i915_cache_level level)
 {
 	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
 	pde |= addr;
@@ -285,8 +304,8 @@
 #define i915_dma_unmap_single(px, dev) \
 	__i915_dma_unmap_single((px)->daddr, dev)
 
-static inline void __i915_dma_unmap_single(dma_addr_t daddr,
-					struct drm_device *dev)
+static void __i915_dma_unmap_single(dma_addr_t daddr,
+				    struct drm_device *dev)
 {
 	struct device *device = &dev->pdev->dev;
 
@@ -307,9 +326,9 @@
 #define i915_dma_map_single(px, dev) \
 	i915_dma_map_page_single((px)->page, (dev), &(px)->daddr)
 
-static inline int i915_dma_map_page_single(struct page *page,
-					   struct drm_device *dev,
-					   dma_addr_t *daddr)
+static int i915_dma_map_page_single(struct page *page,
+				    struct drm_device *dev,
+				    dma_addr_t *daddr)
 {
 	struct device *device = &dev->pdev->dev;
 
@@ -320,7 +339,7 @@
 	return 0;
 }
 
-static void unmap_and_free_pt(struct i915_page_table_entry *pt,
+static void unmap_and_free_pt(struct i915_page_table *pt,
 			       struct drm_device *dev)
 {
 	if (WARN_ON(!pt->page))
@@ -332,9 +351,27 @@
 	kfree(pt);
 }
 
-static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev)
+static void gen8_initialize_pt(struct i915_address_space *vm,
+			       struct i915_page_table *pt)
 {
-	struct i915_page_table_entry *pt;
+	gen8_pte_t *pt_vaddr, scratch_pte;
+	int i;
+
+	pt_vaddr = kmap_atomic(pt->page);
+	scratch_pte = gen8_pte_encode(vm->scratch.addr,
+				      I915_CACHE_LLC, true);
+
+	for (i = 0; i < GEN8_PTES; i++)
+		pt_vaddr[i] = scratch_pte;
+
+	if (!HAS_LLC(vm->dev))
+		drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
+	kunmap_atomic(pt_vaddr);
+}
+
+static struct i915_page_table *alloc_pt_single(struct drm_device *dev)
+{
+	struct i915_page_table *pt;
 	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
 		GEN8_PTES : GEN6_PTES;
 	int ret = -ENOMEM;
@@ -369,78 +406,55 @@
 	return ERR_PTR(ret);
 }
 
-/**
- * alloc_pt_range() - Allocate a multiple page tables
- * @pd:		The page directory which will have at least @count entries
- *		available to point to the allocated page tables.
- * @pde:	First page directory entry for which we are allocating.
- * @count:	Number of pages to allocate.
- * @dev:	DRM device.
- *
- * Allocates multiple page table pages and sets the appropriate entries in the
- * page table structure within the page directory. Function cleans up after
- * itself on any failures.
- *
- * Return: 0 if allocation succeeded.
- */
-static int alloc_pt_range(struct i915_page_directory_entry *pd, uint16_t pde, size_t count,
-			  struct drm_device *dev)
-{
-	int i, ret;
-
-	/* 512 is the max page tables per page_directory on any platform. */
-	if (WARN_ON(pde + count > I915_PDES))
-		return -EINVAL;
-
-	for (i = pde; i < pde + count; i++) {
-		struct i915_page_table_entry *pt = alloc_pt_single(dev);
-
-		if (IS_ERR(pt)) {
-			ret = PTR_ERR(pt);
-			goto err_out;
-		}
-		WARN(pd->page_table[i],
-		     "Leaking page directory entry %d (%p)\n",
-		     i, pd->page_table[i]);
-		pd->page_table[i] = pt;
-	}
-
-	return 0;
-
-err_out:
-	while (i-- > pde)
-		unmap_and_free_pt(pd->page_table[i], dev);
-	return ret;
-}
-
-static void unmap_and_free_pd(struct i915_page_directory_entry *pd)
+static void unmap_and_free_pd(struct i915_page_directory *pd,
+			      struct drm_device *dev)
 {
 	if (pd->page) {
+		i915_dma_unmap_single(pd, dev);
 		__free_page(pd->page);
+		kfree(pd->used_pdes);
 		kfree(pd);
 	}
 }
 
-static struct i915_page_directory_entry *alloc_pd_single(void)
+static struct i915_page_directory *alloc_pd_single(struct drm_device *dev)
 {
-	struct i915_page_directory_entry *pd;
+	struct i915_page_directory *pd;
+	int ret = -ENOMEM;
 
 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 	if (!pd)
 		return ERR_PTR(-ENOMEM);
 
-	pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!pd->page) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
+	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
+				sizeof(*pd->used_pdes), GFP_KERNEL);
+	if (!pd->used_pdes)
+		goto free_pd;
+
+	pd->page = alloc_page(GFP_KERNEL);
+	if (!pd->page)
+		goto free_bitmap;
+
+	ret = i915_dma_map_single(pd, dev);
+	if (ret)
+		goto free_page;
 
 	return pd;
+
+free_page:
+	__free_page(pd->page);
+free_bitmap:
+	kfree(pd->used_pdes);
+free_pd:
+	kfree(pd);
+
+	return ERR_PTR(ret);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
-static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
-			   uint64_t val)
+static int gen8_write_pdp(struct intel_engine_cs *ring,
+			  unsigned entry,
+			  dma_addr_t addr)
 {
 	int ret;
 
@@ -452,10 +466,10 @@
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
-	intel_ring_emit(ring, (u32)(val >> 32));
+	intel_ring_emit(ring, upper_32_bits(addr));
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
-	intel_ring_emit(ring, (u32)(val));
+	intel_ring_emit(ring, lower_32_bits(addr));
 	intel_ring_advance(ring);
 
 	return 0;
@@ -466,12 +480,12 @@
 {
 	int i, ret;
 
-	/* bit of a hack to find the actual last used pd */
-	int used_pd = ppgtt->num_pd_entries / I915_PDES;
-
-	for (i = used_pd - 1; i >= 0; i--) {
-		dma_addr_t addr = ppgtt->pdp.page_directory[i]->daddr;
-		ret = gen8_write_pdp(ring, i, addr);
+	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
+		struct i915_page_directory *pd = ppgtt->pdp.page_directory[i];
+		dma_addr_t pd_daddr = pd ? pd->daddr : ppgtt->scratch_pd->daddr;
+		/* The page directory might be NULL, but we need to clear out
+		 * whatever the previous context might have used. */
+		ret = gen8_write_pdp(ring, i, pd_daddr);
 		if (ret)
 			return ret;
 	}
@@ -497,8 +511,8 @@
 				      I915_CACHE_LLC, use_scratch);
 
 	while (num_entries) {
-		struct i915_page_directory_entry *pd;
-		struct i915_page_table_entry *pt;
+		struct i915_page_directory *pd;
+		struct i915_page_table *pt;
 		struct page *page_table;
 
 		if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
@@ -559,8 +573,8 @@
 			break;
 
 		if (pt_vaddr == NULL) {
-			struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[pdpe];
-			struct i915_page_table_entry *pt = pd->page_table[pde];
+			struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe];
+			struct i915_page_table *pt = pd->page_table[pde];
 			struct page *page_table = pt->page;
 
 			pt_vaddr = kmap_atomic(page_table);
@@ -588,14 +602,43 @@
 	}
 }
 
-static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct drm_device *dev)
+static void __gen8_do_map_pt(gen8_pde_t * const pde,
+			     struct i915_page_table *pt,
+			     struct drm_device *dev)
+{
+	gen8_pde_t entry =
+		gen8_pde_encode(dev, pt->daddr, I915_CACHE_LLC);
+	*pde = entry;
+}
+
+static void gen8_initialize_pd(struct i915_address_space *vm,
+			       struct i915_page_directory *pd)
+{
+	struct i915_hw_ppgtt *ppgtt =
+			container_of(vm, struct i915_hw_ppgtt, base);
+	gen8_pde_t *page_directory;
+	struct i915_page_table *pt;
+	int i;
+
+	page_directory = kmap_atomic(pd->page);
+	pt = ppgtt->scratch_pt;
+	for (i = 0; i < I915_PDES; i++)
+		/* Map the PDE to the page table */
+		__gen8_do_map_pt(page_directory + i, pt, vm->dev);
+
+	if (!HAS_LLC(vm->dev))
+		drm_clflush_virt_range(page_directory, PAGE_SIZE);
+	kunmap_atomic(page_directory);
+}
+
+static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev)
 {
 	int i;
 
 	if (!pd->page)
 		return;
 
-	for (i = 0; i < I915_PDES; i++) {
+	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
 		if (WARN_ON(!pd->page_table[i]))
 			continue;
 
@@ -604,280 +647,348 @@
 	}
 }
 
-static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
-{
-	int i;
-
-	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		if (WARN_ON(!ppgtt->pdp.page_directory[i]))
-			continue;
-
-		gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
-		unmap_and_free_pd(ppgtt->pdp.page_directory[i]);
-	}
-}
-
-static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
-{
-	struct pci_dev *hwdev = ppgtt->base.dev->pdev;
-	int i, j;
-
-	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		/* TODO: In the future we'll support sparse mappings, so this
-		 * will have to change. */
-		if (!ppgtt->pdp.page_directory[i]->daddr)
-			continue;
-
-		pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i]->daddr, PAGE_SIZE,
-			       PCI_DMA_BIDIRECTIONAL);
-
-		for (j = 0; j < I915_PDES; j++) {
-			struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i];
-			struct i915_page_table_entry *pt;
-			dma_addr_t addr;
-
-			if (WARN_ON(!pd->page_table[j]))
-				continue;
-
-			pt = pd->page_table[j];
-			addr = pt->daddr;
-
-			if (addr)
-				pci_unmap_page(hwdev, addr, PAGE_SIZE,
-					       PCI_DMA_BIDIRECTIONAL);
-		}
-	}
-}
-
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
+	int i;
 
-	gen8_ppgtt_unmap_pages(ppgtt);
-	gen8_ppgtt_free(ppgtt);
+	for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) {
+		if (WARN_ON(!ppgtt->pdp.page_directory[i]))
+			continue;
+
+		gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
+		unmap_and_free_pd(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
+	}
+
+	unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev);
+	unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
 }
 
-static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
+/**
+ * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
+ * @ppgtt:	Master ppgtt structure.
+ * @pd:		Page directory for this address range.
+ * @start:	Starting virtual address to begin allocations.
+ * @length	Size of the allocations.
+ * @new_pts:	Bitmap set by function with new allocations. Likely used by the
+ *		caller to free on error.
+ *
+ * Allocate the required number of page tables. Extremely similar to
+ * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
+ * the page directory boundary (instead of the page directory pointer). That
+ * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
+ * possible, and likely that the caller will need to use multiple calls of this
+ * function to achieve the appropriate allocation.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
+				     struct i915_page_directory *pd,
+				     uint64_t start,
+				     uint64_t length,
+				     unsigned long *new_pts)
 {
-	int i, ret;
+	struct drm_device *dev = ppgtt->base.dev;
+	struct i915_page_table *pt;
+	uint64_t temp;
+	uint32_t pde;
 
-	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		ret = alloc_pt_range(ppgtt->pdp.page_directory[i],
-				     0, I915_PDES, ppgtt->base.dev);
-		if (ret)
+	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
+		/* Don't reallocate page tables */
+		if (pt) {
+			/* Scratch is never allocated this way */
+			WARN_ON(pt == ppgtt->scratch_pt);
+			continue;
+		}
+
+		pt = alloc_pt_single(dev);
+		if (IS_ERR(pt))
 			goto unwind_out;
+
+		gen8_initialize_pt(&ppgtt->base, pt);
+		pd->page_table[pde] = pt;
+		set_bit(pde, new_pts);
 	}
 
 	return 0;
 
 unwind_out:
-	while (i--)
-		gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
+	for_each_set_bit(pde, new_pts, I915_PDES)
+		unmap_and_free_pt(pd->page_table[pde], dev);
 
 	return -ENOMEM;
 }
 
-static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
-						const int max_pdp)
+/**
+ * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
+ * @ppgtt:	Master ppgtt structure.
+ * @pdp:	Page directory pointer for this address range.
+ * @start:	Starting virtual address to begin allocations.
+ * @length	Size of the allocations.
+ * @new_pds	Bitmap set by function with new allocations. Likely used by the
+ *		caller to free on error.
+ *
+ * Allocate the required number of page directories starting at the pde index of
+ * @start, and ending at the pde index @start + @length. This function will skip
+ * over already allocated page directories within the range, and only allocate
+ * new ones, setting the appropriate pointer within the pdp as well as the
+ * correct position in the bitmap @new_pds.
+ *
+ * The function will only allocate the pages within the range for a give page
+ * directory pointer. In other words, if @start + @length straddles a virtually
+ * addressed PDP boundary (512GB for 4k pages), there will be more allocations
+ * required by the caller, This is not currently possible, and the BUG in the
+ * code will prevent it.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
+				     struct i915_page_directory_pointer *pdp,
+				     uint64_t start,
+				     uint64_t length,
+				     unsigned long *new_pds)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct i915_page_directory *pd;
+	uint64_t temp;
+	uint32_t pdpe;
+
+	WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
+
+	/* FIXME: PPGTT container_of won't work for 64b */
+	WARN_ON((start + length) > 0x800000000ULL);
+
+	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+		if (pd)
+			continue;
+
+		pd = alloc_pd_single(dev);
+		if (IS_ERR(pd))
+			goto unwind_out;
+
+		gen8_initialize_pd(&ppgtt->base, pd);
+		pdp->page_directory[pdpe] = pd;
+		set_bit(pdpe, new_pds);
+	}
+
+	return 0;
+
+unwind_out:
+	for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
+		unmap_and_free_pd(pdp->page_directory[pdpe], dev);
+
+	return -ENOMEM;
+}
+
+static void
+free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
 {
 	int i;
 
-	for (i = 0; i < max_pdp; i++) {
-		ppgtt->pdp.page_directory[i] = alloc_pd_single();
-		if (IS_ERR(ppgtt->pdp.page_directory[i]))
-			goto unwind_out;
-	}
-
-	ppgtt->num_pd_pages = max_pdp;
-	BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPES);
-
-	return 0;
-
-unwind_out:
-	while (i--)
-		unmap_and_free_pd(ppgtt->pdp.page_directory[i]);
-
-	return -ENOMEM;
+	for (i = 0; i < GEN8_LEGACY_PDPES; i++)
+		kfree(new_pts[i]);
+	kfree(new_pts);
+	kfree(new_pds);
 }
 
-static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
-			    const int max_pdp)
+/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
+ * of these are based on the number of PDPEs in the system.
+ */
+static
+int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
+					 unsigned long ***new_pts)
 {
-	int ret;
+	int i;
+	unsigned long *pds;
+	unsigned long **pts;
 
-	ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
-	if (ret)
-		return ret;
+	pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
+	if (!pds)
+		return -ENOMEM;
 
-	ret = gen8_ppgtt_allocate_page_tables(ppgtt);
-	if (ret)
-		goto err_out;
+	pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL);
+	if (!pts) {
+		kfree(pds);
+		return -ENOMEM;
+	}
 
-	ppgtt->num_pd_entries = max_pdp * I915_PDES;
+	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
+		pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
+				 sizeof(unsigned long), GFP_KERNEL);
+		if (!pts[i])
+			goto err_out;
+	}
+
+	*new_pds = pds;
+	*new_pts = pts;
 
 	return 0;
 
 err_out:
-	gen8_ppgtt_free(ppgtt);
+	free_gen8_temp_bitmaps(pds, pts);
+	return -ENOMEM;
+}
+
+static int gen8_alloc_va_range(struct i915_address_space *vm,
+			       uint64_t start,
+			       uint64_t length)
+{
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
+	unsigned long *new_page_dirs, **new_page_tables;
+	struct i915_page_directory *pd;
+	const uint64_t orig_start = start;
+	const uint64_t orig_length = length;
+	uint64_t temp;
+	uint32_t pdpe;
+	int ret;
+
+#ifndef CONFIG_64BIT
+	/* Disallow 64b address on 32b platforms. Nothing is wrong with doing
+	 * this in hardware, but a lot of the drm code is not prepared to handle
+	 * 64b offset on 32b platforms.
+	 * This will be addressed when 48b PPGTT is added */
+	if (start + length > 0x100000000ULL)
+		return -E2BIG;
+#endif
+
+	/* Wrap is never okay since we can only represent 48b, and we don't
+	 * actually use the other side of the canonical address space.
+	 */
+	if (WARN_ON(start + length < start))
+		return -ERANGE;
+
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
+	if (ret)
+		return ret;
+
+	/* Do the allocations first so we can easily bail out */
+	ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
+					new_page_dirs);
+	if (ret) {
+		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+		return ret;
+	}
+
+	/* For every page directory referenced, allocate page tables */
+	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+		ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
+						new_page_tables[pdpe]);
+		if (ret)
+			goto err_out;
+	}
+
+	start = orig_start;
+	length = orig_length;
+
+	/* Allocations have completed successfully, so set the bitmaps, and do
+	 * the mappings. */
+	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+		gen8_pde_t *const page_directory = kmap_atomic(pd->page);
+		struct i915_page_table *pt;
+		uint64_t pd_len = gen8_clamp_pd(start, length);
+		uint64_t pd_start = start;
+		uint32_t pde;
+
+		/* Every pd should be allocated, we just did that above. */
+		WARN_ON(!pd);
+
+		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+			/* Same reasoning as pd */
+			WARN_ON(!pt);
+			WARN_ON(!pd_len);
+			WARN_ON(!gen8_pte_count(pd_start, pd_len));
+
+			/* Set our used ptes within the page table */
+			bitmap_set(pt->used_ptes,
+				   gen8_pte_index(pd_start),
+				   gen8_pte_count(pd_start, pd_len));
+
+			/* Our pde is now pointing to the pagetable, pt */
+			set_bit(pde, pd->used_pdes);
+
+			/* Map the PDE to the page table */
+			__gen8_do_map_pt(page_directory + pde, pt, vm->dev);
+
+			/* NB: We haven't yet mapped ptes to pages. At this
+			 * point we're still relying on insert_entries() */
+		}
+
+		if (!HAS_LLC(vm->dev))
+			drm_clflush_virt_range(page_directory, PAGE_SIZE);
+
+		kunmap_atomic(page_directory);
+
+		set_bit(pdpe, ppgtt->pdp.used_pdpes);
+	}
+
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	return 0;
+
+err_out:
+	while (pdpe--) {
+		for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
+			unmap_and_free_pt(ppgtt->pdp.page_directory[pdpe]->page_table[temp], vm->dev);
+	}
+
+	for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
+		unmap_and_free_pd(ppgtt->pdp.page_directory[pdpe], vm->dev);
+
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
 	return ret;
 }
 
-static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
-					     const int pd)
-{
-	dma_addr_t pd_addr;
-	int ret;
-
-	pd_addr = pci_map_page(ppgtt->base.dev->pdev,
-			       ppgtt->pdp.page_directory[pd]->page, 0,
-			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-
-	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
-	if (ret)
-		return ret;
-
-	ppgtt->pdp.page_directory[pd]->daddr = pd_addr;
-
-	return 0;
-}
-
-static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
-					const int pd,
-					const int pt)
-{
-	dma_addr_t pt_addr;
-	struct i915_page_directory_entry *pdir = ppgtt->pdp.page_directory[pd];
-	struct i915_page_table_entry *ptab = pdir->page_table[pt];
-	struct page *p = ptab->page;
-	int ret;
-
-	pt_addr = pci_map_page(ppgtt->base.dev->pdev,
-			       p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
-	if (ret)
-		return ret;
-
-	ptab->daddr = pt_addr;
-
-	return 0;
-}
-
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
  * space.
  *
- * FIXME: split allocation into smaller pieces. For now we only ever do this
- * once, but with full PPGTT, the multiple contiguous allocations will be bad.
- * TODO: Do something with the size parameter
  */
-static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
+static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 {
-	const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
-	const int min_pt_pages = I915_PDES * max_pdp;
-	int i, j, ret;
+	ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
+	if (IS_ERR(ppgtt->scratch_pt))
+		return PTR_ERR(ppgtt->scratch_pt);
 
-	if (size % (1<<30))
-		DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
+	ppgtt->scratch_pd = alloc_pd_single(ppgtt->base.dev);
+	if (IS_ERR(ppgtt->scratch_pd))
+		return PTR_ERR(ppgtt->scratch_pd);
 
-	/* 1. Do all our allocations for page directories and page tables.
-	 * We allocate more than was asked so that we can point the unused parts
-	 * to valid entries that point to scratch page. Dynamic page tables
-	 * will fix this eventually.
-	 */
-	ret = gen8_ppgtt_alloc(ppgtt, GEN8_LEGACY_PDPES);
-	if (ret)
-		return ret;
+	gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
+	gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd);
 
-	/*
-	 * 2. Create DMA mappings for the page directories and page tables.
-	 */
-	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
-		ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
-		if (ret)
-			goto bail;
-
-		for (j = 0; j < I915_PDES; j++) {
-			ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
-			if (ret)
-				goto bail;
-		}
-	}
-
-	/*
-	 * 3. Map all the page directory entires to point to the page tables
-	 * we've allocated.
-	 *
-	 * For now, the PPGTT helper functions all require that the PDEs are
-	 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
-	 * will never need to touch the PDEs again.
-	 */
-	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
-		struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i];
-		gen8_pde_t *pd_vaddr;
-		pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i]->page);
-		for (j = 0; j < I915_PDES; j++) {
-			struct i915_page_table_entry *pt = pd->page_table[j];
-			dma_addr_t addr = pt->daddr;
-			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
-						      I915_CACHE_LLC);
-		}
-		if (!HAS_LLC(ppgtt->base.dev))
-			drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
-		kunmap_atomic(pd_vaddr);
-	}
+	ppgtt->base.start = 0;
+	ppgtt->base.total = 1ULL << 32;
+	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
+	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
+	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
+	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
+	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
+	ppgtt->base.bind_vma = ppgtt_bind_vma;
 
 	ppgtt->switch_mm = gen8_mm_switch;
-	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
-	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
-	ppgtt->base.start = 0;
 
-	/* This is the area that we advertise as usable for the caller */
-	ppgtt->base.total = max_pdp * I915_PDES * GEN8_PTES * PAGE_SIZE;
-
-	/* Set all ptes to a valid scratch page. Also above requested space */
-	ppgtt->base.clear_range(&ppgtt->base, 0,
-				ppgtt->num_pd_pages * GEN8_PTES * PAGE_SIZE,
-				true);
-
-	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
-			 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
-	DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
-			 ppgtt->num_pd_entries,
-			 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
 	return 0;
-
-bail:
-	gen8_ppgtt_unmap_pages(ppgtt);
-	gen8_ppgtt_free(ppgtt);
-	return ret;
 }
 
 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 {
-	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
 	struct i915_address_space *vm = &ppgtt->base;
-	gen6_pte_t __iomem *pd_addr;
+	struct i915_page_table *unused;
 	gen6_pte_t scratch_pte;
 	uint32_t pd_entry;
-	int pte, pde;
+	uint32_t  pte, pde, temp;
+	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
 
 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
 
-	pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
-		ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
-
-	seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
-		   ppgtt->pd.pd_offset,
-		   ppgtt->pd.pd_offset + ppgtt->num_pd_entries);
-	for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
+	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
 		u32 expected;
 		gen6_pte_t *pt_vaddr;
 		dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr;
-		pd_entry = readl(pd_addr + pde);
+		pd_entry = readl(ppgtt->pd_addr + pde);
 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
 
 		if (pd_entry != expected)
@@ -914,8 +1025,8 @@
 }
 
 /* Write pde (index) from the page directory @pd to the page table @pt */
-static void gen6_write_pde(struct i915_page_directory_entry *pd,
-			    const int pde, struct i915_page_table_entry *pt)
+static void gen6_write_pde(struct i915_page_directory *pd,
+			    const int pde, struct i915_page_table *pt)
 {
 	/* Caller needs to make sure the write completes if necessary */
 	struct i915_hw_ppgtt *ppgtt =
@@ -931,10 +1042,10 @@
 /* Write all the page tables found in the ppgtt structure to incrementing page
  * directories. */
 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
-				  struct i915_page_directory_entry *pd,
+				  struct i915_page_directory *pd,
 				  uint32_t start, uint32_t length)
 {
-	struct i915_page_table_entry *pt;
+	struct i915_page_table *pt;
 	uint32_t pde, temp;
 
 	gen6_for_each_pde(pt, pd, start, length, temp, pde)
@@ -1162,14 +1273,14 @@
  * are switching between contexts with the same LRCA, we also must do a force
  * restore.
  */
-static inline void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
+static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
 {
 	/* If current vm != vm, */
 	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
 }
 
 static void gen6_initialize_pt(struct i915_address_space *vm,
-		struct i915_page_table_entry *pt)
+		struct i915_page_table *pt)
 {
 	gen6_pte_t *pt_vaddr, scratch_pte;
 	int i;
@@ -1195,7 +1306,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt =
 				container_of(vm, struct i915_hw_ppgtt, base);
-	struct i915_page_table_entry *pt;
+	struct i915_page_table *pt;
 	const uint32_t start_save = start, length_save = length;
 	uint32_t pde, temp;
 	int ret;
@@ -1263,7 +1374,7 @@
 
 unwind_out:
 	for_each_set_bit(pde, new_page_tables, I915_PDES) {
-		struct i915_page_table_entry *pt = ppgtt->pd.page_table[pde];
+		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
 
 		ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
 		unmap_and_free_pt(pt, vm->dev);
@@ -1273,29 +1384,23 @@
 	return ret;
 }
 
-static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
-{
-	int i;
-
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		struct i915_page_table_entry *pt = ppgtt->pd.page_table[i];
-
-		if (pt != ppgtt->scratch_pt)
-			unmap_and_free_pt(ppgtt->pd.page_table[i], ppgtt->base.dev);
-	}
-
-	unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
-	unmap_and_free_pd(&ppgtt->pd);
-}
-
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
+	struct i915_page_table *pt;
+	uint32_t pde;
+
 
 	drm_mm_remove_node(&ppgtt->node);
 
-	gen6_ppgtt_free(ppgtt);
+	gen6_for_all_pdes(pt, ppgtt, pde) {
+		if (pt != ppgtt->scratch_pt)
+			unmap_and_free_pt(pt, ppgtt->base.dev);
+	}
+
+	unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
+	unmap_and_free_pd(&ppgtt->pd, ppgtt->base.dev);
 }
 
 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
@@ -1342,7 +1447,6 @@
 	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
 		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
-	ppgtt->num_pd_entries = I915_PDES;
 	return 0;
 
 err_out:
@@ -1358,14 +1462,14 @@
 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
 				  uint64_t start, uint64_t length)
 {
-	struct i915_page_table_entry *unused;
+	struct i915_page_table *unused;
 	uint32_t pde, temp;
 
 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
 		ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
 }
 
-static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
+static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1388,23 +1492,14 @@
 	if (ret)
 		return ret;
 
-	if (aliasing) {
-		/* preallocate all pts */
-		ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries,
-				ppgtt->base.dev);
-
-		if (ret) {
-			gen6_ppgtt_cleanup(&ppgtt->base);
-			return ret;
-		}
-	}
-
 	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
+	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.start = 0;
-	ppgtt->base.total = ppgtt->num_pd_entries * GEN6_PTES * PAGE_SIZE;
+	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
 	ppgtt->pd.pd_offset =
@@ -1413,10 +1508,7 @@
 	ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
 		ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
 
-	if (aliasing)
-		ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
-	else
-		gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
+	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
 
 	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
 
@@ -1430,8 +1522,7 @@
 	return 0;
 }
 
-static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt,
-		bool aliasing)
+static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -1439,16 +1530,16 @@
 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 
 	if (INTEL_INFO(dev)->gen < 8)
-		return gen6_ppgtt_init(ppgtt, aliasing);
+		return gen6_ppgtt_init(ppgtt);
 	else
-		return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
+		return gen8_ppgtt_init(ppgtt);
 }
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret = 0;
 
-	ret = __hw_ppgtt_init(dev, ppgtt, false);
+	ret = __hw_ppgtt_init(dev, ppgtt);
 	if (ret == 0) {
 		kref_init(&ppgtt->ref);
 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
@@ -1535,32 +1626,11 @@
 	kfree(ppgtt);
 }
 
-static void
-ppgtt_bind_vma(struct i915_vma *vma,
-	       enum i915_cache_level cache_level,
-	       u32 flags)
-{
-	/* Currently applicable only to VLV */
-	if (vma->obj->gt_ro)
-		flags |= PTE_READ_ONLY;
-
-	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
-				cache_level, flags);
-}
-
-static void ppgtt_unbind_vma(struct i915_vma *vma)
-{
-	vma->vm->clear_range(vma->vm,
-			     vma->node.start,
-			     vma->obj->base.size,
-			     true);
-}
-
 extern int intel_iommu_gfx_mapped;
 /* Certain Gen5 chipsets require require idling the GPU before
  * unmapping anything from the GTT when VT-d is enabled.
  */
-static inline bool needs_idle_maps(struct drm_device *dev)
+static bool needs_idle_maps(struct drm_device *dev)
 {
 #ifdef CONFIG_INTEL_IOMMU
 	/* Query intel_iommu to see if we need the workaround. Presumably that
@@ -1653,67 +1723,6 @@
 	i915_ggtt_flush(dev_priv);
 }
 
-void i915_gem_restore_gtt_mappings(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj;
-	struct i915_address_space *vm;
-
-	i915_check_and_clear_faults(dev);
-
-	/* First fill our portion of the GTT with scratch pages */
-	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
-				       dev_priv->gtt.base.start,
-				       dev_priv->gtt.base.total,
-				       true);
-
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
-							   &dev_priv->gtt.base);
-		if (!vma)
-			continue;
-
-		i915_gem_clflush_object(obj, obj->pin_display);
-		/* The bind_vma code tries to be smart about tracking mappings.
-		 * Unfortunately above, we've just wiped out the mappings
-		 * without telling our object about it. So we need to fake it.
-		 *
-		 * Bind is not expected to fail since this is only called on
-		 * resume and assumption is all requirements exist already.
-		 */
-		vma->bound &= ~GLOBAL_BIND;
-		WARN_ON(i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND));
-	}
-
-
-	if (INTEL_INFO(dev)->gen >= 8) {
-		if (IS_CHERRYVIEW(dev))
-			chv_setup_private_ppat(dev_priv);
-		else
-			bdw_setup_private_ppat(dev_priv);
-
-		return;
-	}
-
-	if (USES_PPGTT(dev)) {
-		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
-			/* TODO: Perhaps it shouldn't be gen6 specific */
-
-			struct i915_hw_ppgtt *ppgtt =
-					container_of(vm, struct i915_hw_ppgtt,
-						     base);
-
-			if (i915_is_ggtt(vm))
-				ppgtt = dev_priv->mm.aliasing_ppgtt;
-
-			gen6_write_page_range(dev_priv, &ppgtt->pd,
-					      0, ppgtt->base.total);
-		}
-	}
-
-	i915_ggtt_flush(dev_priv);
-}
-
 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 {
 	if (obj->has_dma_mapping)
@@ -1727,7 +1736,7 @@
 	return 0;
 }
 
-static inline void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 {
 #ifdef writeq
 	writeq(pte, addr);
@@ -1872,18 +1881,16 @@
 	readl(gtt_base);
 }
 
-
-static void i915_ggtt_bind_vma(struct i915_vma *vma,
-			       enum i915_cache_level cache_level,
-			       u32 unused)
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct sg_table *pages,
+				     uint64_t start,
+				     enum i915_cache_level cache_level, u32 unused)
 {
-	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
-	BUG_ON(!i915_is_ggtt(vma->vm));
-	intel_gtt_insert_sg_entries(vma->ggtt_view.pages, entry, flags);
-	vma->bound = GLOBAL_BIND;
+	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
+
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
@@ -1896,62 +1903,41 @@
 	intel_gtt_clear_range(first_entry, num_entries);
 }
 
-static void i915_ggtt_unbind_vma(struct i915_vma *vma)
-{
-	const unsigned int first = vma->node.start >> PAGE_SHIFT;
-	const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
-
-	BUG_ON(!i915_is_ggtt(vma->vm));
-	vma->bound = 0;
-	intel_gtt_clear_range(first, size);
-}
-
-static void ggtt_bind_vma(struct i915_vma *vma,
-			  enum i915_cache_level cache_level,
-			  u32 flags)
+static int ggtt_bind_vma(struct i915_vma *vma,
+			 enum i915_cache_level cache_level,
+			 u32 flags)
 {
 	struct drm_device *dev = vma->vm->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct sg_table *pages = obj->pages;
+	u32 pte_flags = 0;
+	int ret;
+
+	ret = i915_get_ggtt_vma_pages(vma);
+	if (ret)
+		return ret;
+	pages = vma->ggtt_view.pages;
 
 	/* Currently applicable only to VLV */
 	if (obj->gt_ro)
-		flags |= PTE_READ_ONLY;
+		pte_flags |= PTE_READ_ONLY;
 
-	if (i915_is_ggtt(vma->vm))
-		pages = vma->ggtt_view.pages;
 
-	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
-	 * or we have a global mapping already but the cacheability flags have
-	 * changed, set the global PTEs.
-	 *
-	 * If there is an aliasing PPGTT it is anecdotally faster, so use that
-	 * instead if none of the above hold true.
-	 *
-	 * NB: A global mapping should only be needed for special regions like
-	 * "gtt mappable", SNB errata, or if specified via special execbuf
-	 * flags. At all other times, the GPU will use the aliasing PPGTT.
-	 */
 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
-		if (!(vma->bound & GLOBAL_BIND) ||
-		    (cache_level != obj->cache_level)) {
-			vma->vm->insert_entries(vma->vm, pages,
-						vma->node.start,
-						cache_level, flags);
-			vma->bound |= GLOBAL_BIND;
-		}
+		vma->vm->insert_entries(vma->vm, pages,
+					vma->node.start,
+					cache_level, pte_flags);
 	}
 
-	if (dev_priv->mm.aliasing_ppgtt &&
-	    (!(vma->bound & LOCAL_BIND) ||
-	     (cache_level != obj->cache_level))) {
+	if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base, pages,
 					    vma->node.start,
-					    cache_level, flags);
-		vma->bound |= LOCAL_BIND;
+					    cache_level, pte_flags);
 	}
+
+	return 0;
 }
 
 static void ggtt_unbind_vma(struct i915_vma *vma)
@@ -1965,16 +1951,14 @@
 				     vma->node.start,
 				     obj->base.size,
 				     true);
-		vma->bound &= ~GLOBAL_BIND;
 	}
 
-	if (vma->bound & LOCAL_BIND) {
+	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.clear_range(&appgtt->base,
 					 vma->node.start,
 					 obj->base.size,
 					 true);
-		vma->bound &= ~LOCAL_BIND;
 	}
 }
 
@@ -2083,12 +2067,27 @@
 		if (!ppgtt)
 			return -ENOMEM;
 
-		ret = __hw_ppgtt_init(dev, ppgtt, true);
+		ret = __hw_ppgtt_init(dev, ppgtt);
 		if (ret) {
+			ppgtt->base.cleanup(&ppgtt->base);
 			kfree(ppgtt);
 			return ret;
 		}
 
+		if (ppgtt->base.allocate_va_range)
+			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
+							    ppgtt->base.total);
+		if (ret) {
+			ppgtt->base.cleanup(&ppgtt->base);
+			kfree(ppgtt);
+			return ret;
+		}
+
+		ppgtt->base.clear_range(&ppgtt->base,
+					ppgtt->base.start,
+					ppgtt->base.total,
+					true);
+
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
 	}
 
@@ -2164,14 +2163,14 @@
 	__free_page(page);
 }
 
-static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
 {
 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
 	return snb_gmch_ctl << 20;
 }
 
-static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
 {
 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
@@ -2187,7 +2186,7 @@
 	return bdw_gmch_ctl << 20;
 }
 
-static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
 {
 	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
 	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
@@ -2198,14 +2197,14 @@
 	return 0;
 }
 
-static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
+static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
 {
 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
 	return snb_gmch_ctl << 25; /* 32 MB units */
 }
 
-static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
+static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
 {
 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
@@ -2253,7 +2252,17 @@
 	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
 		(pci_resource_len(dev->pdev, 0) / 2);
 
-	dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
+	/*
+	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
+	 * dropped. For WC mappings in general we have 64 byte burst writes
+	 * when the WC buffer is flushed, so we can't use it, but have to
+	 * resort to an uncached mapping. The WC issue is easily caught by the
+	 * readback check when writing GTT PTE entries.
+	 */
+	if (IS_BROXTON(dev))
+		dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
+	else
+		dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
 	if (!dev_priv->gtt.gsm) {
 		DRM_ERROR("Failed to map the gtt page table\n");
 		return -ENOMEM;
@@ -2375,7 +2384,7 @@
 
 	*gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
 
-	if (IS_CHERRYVIEW(dev))
+	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
 		chv_setup_private_ppat(dev_priv);
 	else
 		bdw_setup_private_ppat(dev_priv);
@@ -2384,6 +2393,8 @@
 
 	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
 	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
+	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
+	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
 
 	return ret;
 }
@@ -2424,6 +2435,8 @@
 
 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
 	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
+	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
+	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
 
 	return ret;
 }
@@ -2455,7 +2468,10 @@
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
+	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
+	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
 
 	if (unlikely(dev_priv->gtt.do_idle_maps))
 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
@@ -2523,6 +2539,59 @@
 	return 0;
 }
 
+void i915_gem_restore_gtt_mappings(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+
+	i915_check_and_clear_faults(dev);
+
+	/* First fill our portion of the GTT with scratch pages */
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       dev_priv->gtt.base.start,
+				       dev_priv->gtt.base.total,
+				       true);
+
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
+							   &dev_priv->gtt.base);
+		if (!vma)
+			continue;
+
+		i915_gem_clflush_object(obj, obj->pin_display);
+		WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE));
+	}
+
+
+	if (INTEL_INFO(dev)->gen >= 8) {
+		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
+			chv_setup_private_ppat(dev_priv);
+		else
+			bdw_setup_private_ppat(dev_priv);
+
+		return;
+	}
+
+	if (USES_PPGTT(dev)) {
+		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+			/* TODO: Perhaps it shouldn't be gen6 specific */
+
+			struct i915_hw_ppgtt *ppgtt =
+					container_of(vm, struct i915_hw_ppgtt,
+						     base);
+
+			if (i915_is_ggtt(vm))
+				ppgtt = dev_priv->mm.aliasing_ppgtt;
+
+			gen6_write_page_range(dev_priv, &ppgtt->pd,
+					      0, ppgtt->base.total);
+		}
+	}
+
+	i915_ggtt_flush(dev_priv);
+}
+
 static struct i915_vma *
 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 		      struct i915_address_space *vm,
@@ -2532,7 +2601,8 @@
 
 	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
 		return ERR_PTR(-EINVAL);
-	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+
+	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -2542,22 +2612,8 @@
 	vma->vm = vm;
 	vma->obj = obj;
 
-	if (INTEL_INFO(vm->dev)->gen >= 6) {
-		if (i915_is_ggtt(vm)) {
-			vma->ggtt_view = *ggtt_view;
-
-			vma->unbind_vma = ggtt_unbind_vma;
-			vma->bind_vma = ggtt_bind_vma;
-		} else {
-			vma->unbind_vma = ppgtt_unbind_vma;
-			vma->bind_vma = ppgtt_bind_vma;
-		}
-	} else {
-		BUG_ON(!i915_is_ggtt(vm));
+	if (i915_is_ggtt(vm))
 		vma->ggtt_view = *ggtt_view;
-		vma->unbind_vma = i915_ggtt_unbind_vma;
-		vma->bind_vma = i915_ggtt_bind_vma;
-	}
 
 	list_add_tail(&vma->vma_link, &obj->vma_list);
 	if (!i915_is_ggtt(vm))
@@ -2702,7 +2758,7 @@
 	return ERR_PTR(ret);
 }
 
-static inline int
+static int
 i915_get_ggtt_vma_pages(struct i915_vma *vma)
 {
 	int ret = 0;
@@ -2746,14 +2802,44 @@
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		  u32 flags)
 {
-	if (i915_is_ggtt(vma->vm)) {
-		int ret = i915_get_ggtt_vma_pages(vma);
+	int ret;
+	u32 bind_flags;
 
+	if (WARN_ON(flags == 0))
+		return -EINVAL;
+
+	bind_flags = 0;
+	if (flags & PIN_GLOBAL)
+		bind_flags |= GLOBAL_BIND;
+	if (flags & PIN_USER)
+		bind_flags |= LOCAL_BIND;
+
+	if (flags & PIN_UPDATE)
+		bind_flags |= vma->bound;
+	else
+		bind_flags &= ~vma->bound;
+
+	if (bind_flags == 0)
+		return 0;
+
+	if (vma->bound == 0 && vma->vm->allocate_va_range) {
+		trace_i915_va_alloc(vma->vm,
+				    vma->node.start,
+				    vma->node.size,
+				    VM_TO_TRACE_NAME(vma->vm));
+
+		ret = vma->vm->allocate_va_range(vma->vm,
+						 vma->node.start,
+						 vma->node.size);
 		if (ret)
 			return ret;
 	}
 
-	vma->bind_vma(vma, cache_level, flags);
+	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
+	if (ret)
+		return ret;
+
+	vma->bound |= bind_flags;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index fc03c99..4e6cac5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -158,7 +158,6 @@
 	/** Flags and address space this VMA is bound to */
 #define GLOBAL_BIND	(1<<0)
 #define LOCAL_BIND	(1<<1)
-#define PTE_READ_ONLY	(1<<2)
 	unsigned int bound : 4;
 
 	/**
@@ -196,36 +195,30 @@
 	 * bits with absolutely no headroom. So use 4 bits. */
 	unsigned int pin_count:4;
 #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
-
-	/** Unmap an object from an address space. This usually consists of
-	 * setting the valid PTE entries to a reserved scratch page. */
-	void (*unbind_vma)(struct i915_vma *vma);
-	/* Map an object into an address space with the given cache flags. */
-	void (*bind_vma)(struct i915_vma *vma,
-			 enum i915_cache_level cache_level,
-			 u32 flags);
 };
 
-struct i915_page_table_entry {
+struct i915_page_table {
 	struct page *page;
 	dma_addr_t daddr;
 
 	unsigned long *used_ptes;
 };
 
-struct i915_page_directory_entry {
+struct i915_page_directory {
 	struct page *page; /* NULL for GEN6-GEN7 */
 	union {
 		uint32_t pd_offset;
 		dma_addr_t daddr;
 	};
 
-	struct i915_page_table_entry *page_table[I915_PDES]; /* PDEs */
+	unsigned long *used_pdes;
+	struct i915_page_table *page_table[I915_PDES]; /* PDEs */
 };
 
-struct i915_page_directory_pointer_entry {
+struct i915_page_directory_pointer {
 	/* struct page *page; */
-	struct i915_page_directory_entry *page_directory[GEN8_LEGACY_PDPES];
+	DECLARE_BITMAP(used_pdpes, GEN8_LEGACY_PDPES);
+	struct i915_page_directory *page_directory[GEN8_LEGACY_PDPES];
 };
 
 struct i915_address_space {
@@ -267,6 +260,8 @@
 	gen6_pte_t (*pte_encode)(dma_addr_t addr,
 				 enum i915_cache_level level,
 				 bool valid, u32 flags); /* Create a valid PTE */
+	/* flags for pte_encode */
+#define PTE_READ_ONLY	(1<<0)
 	int (*allocate_va_range)(struct i915_address_space *vm,
 				 uint64_t start,
 				 uint64_t length);
@@ -279,6 +274,13 @@
 			       uint64_t start,
 			       enum i915_cache_level cache_level, u32 flags);
 	void (*cleanup)(struct i915_address_space *vm);
+	/** Unmap an object from an address space. This usually consists of
+	 * setting the valid PTE entries to a reserved scratch page. */
+	void (*unbind_vma)(struct i915_vma *vma);
+	/* Map an object into an address space with the given cache flags. */
+	int (*bind_vma)(struct i915_vma *vma,
+			enum i915_cache_level cache_level,
+			u32 flags);
 };
 
 /* The Graphics Translation Table is the way in which GEN hardware translates a
@@ -314,14 +316,13 @@
 	struct kref ref;
 	struct drm_mm_node node;
 	unsigned long pd_dirty_rings;
-	unsigned num_pd_entries;
-	unsigned num_pd_pages; /* gen8+ */
 	union {
-		struct i915_page_directory_pointer_entry pdp;
-		struct i915_page_directory_entry pd;
+		struct i915_page_directory_pointer pdp;
+		struct i915_page_directory pd;
 	};
 
-	struct i915_page_table_entry *scratch_pt;
+	struct i915_page_table *scratch_pt;
+	struct i915_page_directory *scratch_pd;
 
 	struct drm_i915_file_private *file_priv;
 
@@ -349,6 +350,11 @@
 	     temp = min_t(unsigned, temp, length), \
 	     start += temp, length -= temp)
 
+#define gen6_for_all_pdes(pt, ppgtt, iter)  \
+	for (iter = 0;		\
+	     pt = ppgtt->pd.page_table[iter], iter < I915_PDES;	\
+	     iter++)
+
 static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
 {
 	const uint32_t mask = NUM_PTE(pde_shift) - 1;
@@ -397,6 +403,63 @@
 	return i915_pde_index(addr, GEN6_PDE_SHIFT);
 }
 
+/* Equivalent to the gen6 version, For each pde iterates over every pde
+ * between from start until start + length. On gen8+ it simply iterates
+ * over every page directory entry in a page directory.
+ */
+#define gen8_for_each_pde(pt, pd, start, length, temp, iter)		\
+	for (iter = gen8_pde_index(start); \
+	     pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES;	\
+	     iter++,				\
+	     temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,	\
+	     temp = min(temp, length),					\
+	     start += temp, length -= temp)
+
+#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)		\
+	for (iter = gen8_pdpe_index(start);	\
+	     pd = (pdp)->page_directory[iter], length > 0 && iter < GEN8_LEGACY_PDPES;	\
+	     iter++,				\
+	     temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,	\
+	     temp = min(temp, length),					\
+	     start += temp, length -= temp)
+
+/* Clamp length to the next page_directory boundary */
+static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length)
+{
+	uint64_t next_pd = ALIGN(start + 1, 1 << GEN8_PDPE_SHIFT);
+
+	if (next_pd > (start + length))
+		return length;
+
+	return next_pd - start;
+}
+
+static inline uint32_t gen8_pte_index(uint64_t address)
+{
+	return i915_pte_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pde_index(uint64_t address)
+{
+	return i915_pde_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pdpe_index(uint64_t address)
+{
+	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
+}
+
+static inline uint32_t gen8_pml4e_index(uint64_t address)
+{
+	WARN_ON(1); /* For 64B */
+	return 0;
+}
+
+static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
+{
+	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
+}
+
 int i915_gem_gtt_init(struct drm_device *dev);
 void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_global_gtt_cleanup(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index f7929e7..f6ecbda 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -184,9 +184,12 @@
 	struct i915_vma *vma;
 	int count = 0;
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
 		if (drm_mm_node_allocated(&vma->node))
 			count++;
+		if (vma->pin_count)
+			count++;
+	}
 
 	return count;
 }
@@ -210,8 +213,7 @@
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (!i915_gem_obj_is_pinned(obj) &&
-		    obj->pages_pin_count == num_vma_bound(obj))
+		if (obj->pages_pin_count == num_vma_bound(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f8da716..348ed5a 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -209,7 +209,7 @@
 
 	dev_priv->fbc.threshold = ret;
 
-	if (HAS_PCH_SPLIT(dev))
+	if (INTEL_INFO(dev_priv)->gen >= 5)
 		I915_WRITE(ILK_DPFC_CB_BASE, dev_priv->fbc.compressed_fb.start);
 	else if (IS_GM45(dev)) {
 		I915_WRITE(DPFC_CB_BASE, dev_priv->fbc.compressed_fb.start);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 6377b22..633bd1f 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -336,7 +336,7 @@
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	if (i915_gem_obj_is_pinned(obj) || obj->framebuffer_references) {
+	if (obj->pin_display || obj->framebuffer_references) {
 		ret = -EBUSY;
 		goto err;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1d4e60d..ac22614 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -251,10 +251,11 @@
 		return;
 
 	err_printf(m, "%s command stream:\n", ring_str(ring_idx));
-	err_printf(m, "  HEAD: 0x%08x\n", ring->head);
-	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
-	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
-	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
+	err_printf(m, "  START: 0x%08x\n", ring->start);
+	err_printf(m, "  HEAD:  0x%08x\n", ring->head);
+	err_printf(m, "  TAIL:  0x%08x\n", ring->tail);
+	err_printf(m, "  CTL:   0x%08x\n", ring->ctl);
+	err_printf(m, "  HWS:   0x%08x\n", ring->hws);
 	err_printf(m, "  ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd);
 	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
 	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
@@ -883,6 +884,7 @@
 	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
 	ering->seqno = ring->get_seqno(ring, false);
 	ering->acthd = intel_ring_get_active_head(ring);
+	ering->start = I915_READ_START(ring);
 	ering->head = I915_READ_HEAD(ring);
 	ering->tail = I915_READ_TAIL(ring);
 	ering->ctl = I915_READ_CTL(ring);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6d49443..9da955e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -88,6 +88,12 @@
 	[HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
 };
 
+/* BXT hpd list */
+static const u32 hpd_bxt[HPD_NUM_PINS] = {
+	[HPD_PORT_B] = BXT_DE_PORT_HP_DDIB,
+	[HPD_PORT_C] = BXT_DE_PORT_HP_DDIC
+};
+
 /* IIR can theoretically queue up two events. Be paranoid. */
 #define GEN8_IRQ_RESET_NDX(type, which) do { \
 	I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \
@@ -985,8 +991,7 @@
 	return;
 }
 
-static void notify_ring(struct drm_device *dev,
-			struct intel_engine_cs *ring)
+static void notify_ring(struct intel_engine_cs *ring)
 {
 	if (!intel_ring_initialized(ring))
 		return;
@@ -1049,7 +1054,7 @@
 	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
 		if (!vlv_c0_above(dev_priv,
 				  &dev_priv->rps.down_ei, &now,
-				  VLV_RP_DOWN_EI_THRESHOLD))
+				  dev_priv->rps.down_threshold))
 			events |= GEN6_PM_RP_DOWN_THRESHOLD;
 		dev_priv->rps.down_ei = now;
 	}
@@ -1057,7 +1062,7 @@
 	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
 		if (vlv_c0_above(dev_priv,
 				 &dev_priv->rps.up_ei, &now,
-				 VLV_RP_UP_EI_THRESHOLD))
+				 dev_priv->rps.up_threshold))
 			events |= GEN6_PM_RP_UP_THRESHOLD;
 		dev_priv->rps.up_ei = now;
 	}
@@ -1095,21 +1100,20 @@
 	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
 
 	adj = dev_priv->rps.last_adj;
+	new_delay = dev_priv->rps.cur_freq;
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
 		if (adj > 0)
 			adj *= 2;
-		else {
-			/* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv->dev) ? 2 : 1;
-		}
-		new_delay = dev_priv->rps.cur_freq + adj;
-
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
 		/*
 		 * For better performance, jump directly
 		 * to RPe if we're below it.
 		 */
-		if (new_delay < dev_priv->rps.efficient_freq)
+		if (new_delay < dev_priv->rps.efficient_freq - adj) {
 			new_delay = dev_priv->rps.efficient_freq;
+			adj = 0;
+		}
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
 		if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
 			new_delay = dev_priv->rps.efficient_freq;
@@ -1119,24 +1123,22 @@
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
-		else {
-			/* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv->dev) ? -2 : -1;
-		}
-		new_delay = dev_priv->rps.cur_freq + adj;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
 	} else { /* unknown event */
-		new_delay = dev_priv->rps.cur_freq;
+		adj = 0;
 	}
 
+	dev_priv->rps.last_adj = adj;
+
 	/* sysfs frequency interfaces may have snuck in while servicing the
 	 * interrupt
 	 */
+	new_delay += adj;
 	new_delay = clamp_t(int, new_delay,
 			    dev_priv->rps.min_freq_softlimit,
 			    dev_priv->rps.max_freq_softlimit);
 
-	dev_priv->rps.last_adj = new_delay - dev_priv->rps.cur_freq;
-
 	intel_set_rps(dev_priv->dev, new_delay);
 
 	mutex_unlock(&dev_priv->rps.hw_lock);
@@ -1251,9 +1253,9 @@
 {
 	if (gt_iir &
 	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(dev, &dev_priv->ring[RCS]);
+		notify_ring(&dev_priv->ring[RCS]);
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[VCS]);
+		notify_ring(&dev_priv->ring[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_device *dev,
@@ -1263,11 +1265,11 @@
 
 	if (gt_iir &
 	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(dev, &dev_priv->ring[RCS]);
+		notify_ring(&dev_priv->ring[RCS]);
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[VCS]);
+		notify_ring(&dev_priv->ring[VCS]);
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[BCS]);
+		notify_ring(&dev_priv->ring[BCS]);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -1278,88 +1280,74 @@
 		ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
-static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
-				       struct drm_i915_private *dev_priv,
+static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
 				       u32 master_ctl)
 {
-	struct intel_engine_cs *ring;
-	u32 rcs, bcs, vcs;
-	uint32_t tmp = 0;
 	irqreturn_t ret = IRQ_NONE;
 
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		tmp = I915_READ(GEN8_GT_IIR(0));
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(0));
 		if (tmp) {
-			I915_WRITE(GEN8_GT_IIR(0), tmp);
+			I915_WRITE_FW(GEN8_GT_IIR(0), tmp);
 			ret = IRQ_HANDLED;
 
-			rcs = tmp >> GEN8_RCS_IRQ_SHIFT;
-			ring = &dev_priv->ring[RCS];
-			if (rcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (rcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[RCS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[RCS]);
 
-			bcs = tmp >> GEN8_BCS_IRQ_SHIFT;
-			ring = &dev_priv->ring[BCS];
-			if (bcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (bcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[BCS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[BCS]);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT0)!\n");
 	}
 
 	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		tmp = I915_READ(GEN8_GT_IIR(1));
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(1));
 		if (tmp) {
-			I915_WRITE(GEN8_GT_IIR(1), tmp);
+			I915_WRITE_FW(GEN8_GT_IIR(1), tmp);
 			ret = IRQ_HANDLED;
 
-			vcs = tmp >> GEN8_VCS1_IRQ_SHIFT;
-			ring = &dev_priv->ring[VCS];
-			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[VCS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[VCS]);
 
-			vcs = tmp >> GEN8_VCS2_IRQ_SHIFT;
-			ring = &dev_priv->ring[VCS2];
-			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[VCS2]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[VCS2]);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT1)!\n");
 	}
 
+	if (master_ctl & GEN8_GT_VECS_IRQ) {
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(3));
+		if (tmp) {
+			I915_WRITE_FW(GEN8_GT_IIR(3), tmp);
+			ret = IRQ_HANDLED;
+
+			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
+				intel_lrc_irq_handler(&dev_priv->ring[VECS]);
+			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
+				notify_ring(&dev_priv->ring[VECS]);
+		} else
+			DRM_ERROR("The master control interrupt lied (GT3)!\n");
+	}
+
 	if (master_ctl & GEN8_GT_PM_IRQ) {
-		tmp = I915_READ(GEN8_GT_IIR(2));
+		u32 tmp = I915_READ_FW(GEN8_GT_IIR(2));
 		if (tmp & dev_priv->pm_rps_events) {
-			I915_WRITE(GEN8_GT_IIR(2),
-				   tmp & dev_priv->pm_rps_events);
+			I915_WRITE_FW(GEN8_GT_IIR(2),
+				      tmp & dev_priv->pm_rps_events);
 			ret = IRQ_HANDLED;
 			gen6_rps_irq_handler(dev_priv, tmp);
 		} else
 			DRM_ERROR("The master control interrupt lied (PM)!\n");
 	}
 
-	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		tmp = I915_READ(GEN8_GT_IIR(3));
-		if (tmp) {
-			I915_WRITE(GEN8_GT_IIR(3), tmp);
-			ret = IRQ_HANDLED;
-
-			vcs = tmp >> GEN8_VECS_IRQ_SHIFT;
-			ring = &dev_priv->ring[VECS];
-			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, ring);
-			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_lrc_irq_handler(ring);
-		} else
-			DRM_ERROR("The master control interrupt lied (GT3)!\n");
-	}
-
 	return ret;
 }
 
@@ -1440,7 +1428,7 @@
 		if (port && dev_priv->hpd_irq_port[port]) {
 			bool long_hpd;
 
-			if (HAS_PCH_SPLIT(dev)) {
+			if (!HAS_GMCH_DISPLAY(dev_priv)) {
 				dig_shift = pch_port_to_hotplug_shift(port);
 				long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
 			} else {
@@ -1654,7 +1642,7 @@
 
 	if (HAS_VEBOX(dev_priv->dev)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
+			notify_ring(&dev_priv->ring[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -1848,7 +1836,7 @@
 			I915_WRITE(VLV_IIR, iir);
 		}
 
-		gen8_gt_irq_handler(dev, dev_priv, master_ctl);
+		gen8_gt_irq_handler(dev_priv, master_ctl);
 
 		/* Call regardless, as some status bits might not be
 		 * signalled in iir */
@@ -2164,6 +2152,38 @@
 	return ret;
 }
 
+static void bxt_hpd_handler(struct drm_device *dev, uint32_t iir_status)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t hp_control;
+	uint32_t hp_trigger;
+
+	/* Get the status */
+	hp_trigger = iir_status & BXT_DE_PORT_HOTPLUG_MASK;
+	hp_control = I915_READ(BXT_HOTPLUG_CTL);
+
+	/* Hotplug not enabled ? */
+	if (!(hp_control & BXT_HOTPLUG_CTL_MASK)) {
+		DRM_ERROR("Interrupt when HPD disabled\n");
+		return;
+	}
+
+	DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x\n",
+		hp_control & BXT_HOTPLUG_CTL_MASK);
+
+	/* Check for HPD storm and schedule bottom half */
+	intel_hpd_irq_handler(dev, hp_trigger, hp_control, hpd_bxt);
+
+	/*
+	 * FIXME: Save the hot plug status for bottom half before
+	 * clearing the sticky status bits, else the status will be
+	 * lost.
+	 */
+
+	/* Clear sticky bits in hpd status */
+	I915_WRITE(BXT_HOTPLUG_CTL, hp_control);
+}
+
 static irqreturn_t gen8_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -2181,17 +2201,16 @@
 		aux_mask |=  GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
 			GEN9_AUX_CHANNEL_D;
 
-	master_ctl = I915_READ(GEN8_MASTER_IRQ);
+	master_ctl = I915_READ_FW(GEN8_MASTER_IRQ);
 	master_ctl &= ~GEN8_MASTER_IRQ_CONTROL;
 	if (!master_ctl)
 		return IRQ_NONE;
 
-	I915_WRITE(GEN8_MASTER_IRQ, 0);
-	POSTING_READ(GEN8_MASTER_IRQ);
+	I915_WRITE_FW(GEN8_MASTER_IRQ, 0);
 
 	/* Find, clear, then process each source of interrupt */
 
-	ret = gen8_gt_irq_handler(dev, dev_priv, master_ctl);
+	ret = gen8_gt_irq_handler(dev_priv, master_ctl);
 
 	if (master_ctl & GEN8_DE_MISC_IRQ) {
 		tmp = I915_READ(GEN8_DE_MISC_IIR);
@@ -2210,12 +2229,27 @@
 	if (master_ctl & GEN8_DE_PORT_IRQ) {
 		tmp = I915_READ(GEN8_DE_PORT_IIR);
 		if (tmp) {
+			bool found = false;
+
 			I915_WRITE(GEN8_DE_PORT_IIR, tmp);
 			ret = IRQ_HANDLED;
 
-			if (tmp & aux_mask)
+			if (tmp & aux_mask) {
 				dp_aux_irq_handler(dev);
-			else
+				found = true;
+			}
+
+			if (IS_BROXTON(dev) && tmp & BXT_DE_PORT_HOTPLUG_MASK) {
+				bxt_hpd_handler(dev, tmp);
+				found = true;
+			}
+
+			if (IS_BROXTON(dev) && (tmp & BXT_DE_PORT_GMBUS)) {
+				gmbus_irq_handler(dev);
+				found = true;
+			}
+
+			if (!found)
 				DRM_ERROR("Unexpected DE Port interrupt\n");
 		}
 		else
@@ -2268,7 +2302,8 @@
 			DRM_ERROR("The master control interrupt lied (DE PIPE)!\n");
 	}
 
-	if (!HAS_PCH_NOP(dev) && master_ctl & GEN8_DE_PCH_IRQ) {
+	if (HAS_PCH_SPLIT(dev) && !HAS_PCH_NOP(dev) &&
+	    master_ctl & GEN8_DE_PCH_IRQ) {
 		/*
 		 * FIXME(BDW): Assume for now that the new interrupt handling
 		 * scheme also closed the SDE interrupt handling race we've seen
@@ -2284,8 +2319,8 @@
 
 	}
 
-	I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
-	POSTING_READ(GEN8_MASTER_IRQ);
+	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
+	POSTING_READ_FW(GEN8_MASTER_IRQ);
 
 	return ret;
 }
@@ -3104,7 +3139,8 @@
 	GEN5_IRQ_RESET(GEN8_DE_MISC_);
 	GEN5_IRQ_RESET(GEN8_PCU_);
 
-	ibx_irq_reset(dev);
+	if (HAS_PCH_SPLIT(dev))
+		ibx_irq_reset(dev);
 }
 
 void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
@@ -3178,6 +3214,42 @@
 	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
 }
 
+static void bxt_hpd_irq_setup(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_encoder *intel_encoder;
+	u32 hotplug_port = 0;
+	u32 hotplug_ctrl;
+
+	/* Now, enable HPD */
+	for_each_intel_encoder(dev, intel_encoder) {
+		if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark
+				== HPD_ENABLED)
+			hotplug_port |= hpd_bxt[intel_encoder->hpd_pin];
+	}
+
+	/* Mask all HPD control bits */
+	hotplug_ctrl = I915_READ(BXT_HOTPLUG_CTL) & ~BXT_HOTPLUG_CTL_MASK;
+
+	/* Enable requested port in hotplug control */
+	/* TODO: implement (short) HPD support on port A */
+	WARN_ON_ONCE(hotplug_port & BXT_DE_PORT_HP_DDIA);
+	if (hotplug_port & BXT_DE_PORT_HP_DDIB)
+		hotplug_ctrl |= BXT_DDIB_HPD_ENABLE;
+	if (hotplug_port & BXT_DE_PORT_HP_DDIC)
+		hotplug_ctrl |= BXT_DDIC_HPD_ENABLE;
+	I915_WRITE(BXT_HOTPLUG_CTL, hotplug_ctrl);
+
+	/* Unmask DDI hotplug in IMR */
+	hotplug_ctrl = I915_READ(GEN8_DE_PORT_IMR) & ~hotplug_port;
+	I915_WRITE(GEN8_DE_PORT_IMR, hotplug_ctrl);
+
+	/* Enable DDI hotplug in IER */
+	hotplug_ctrl = I915_READ(GEN8_DE_PORT_IER) | hotplug_port;
+	I915_WRITE(GEN8_DE_PORT_IER, hotplug_ctrl);
+	POSTING_READ(GEN8_DE_PORT_IER);
+}
+
 static void ibx_irq_postinstall(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3448,13 +3520,16 @@
 	uint32_t de_pipe_masked = GEN8_PIPE_CDCLK_CRC_DONE;
 	uint32_t de_pipe_enables;
 	int pipe;
-	u32 aux_en = GEN8_AUX_CHANNEL_A;
+	u32 de_port_en = GEN8_AUX_CHANNEL_A;
 
 	if (IS_GEN9(dev_priv)) {
 		de_pipe_masked |= GEN9_PIPE_PLANE1_FLIP_DONE |
 				  GEN9_DE_PIPE_IRQ_FAULT_ERRORS;
-		aux_en |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
+		de_port_en |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
 			GEN9_AUX_CHANNEL_D;
+
+		if (IS_BROXTON(dev_priv))
+			de_port_en |= BXT_DE_PORT_GMBUS;
 	} else
 		de_pipe_masked |= GEN8_PIPE_PRIMARY_FLIP_DONE |
 				  GEN8_DE_PIPE_IRQ_FAULT_ERRORS;
@@ -3473,19 +3548,21 @@
 					  dev_priv->de_irq_mask[pipe],
 					  de_pipe_enables);
 
-	GEN5_IRQ_INIT(GEN8_DE_PORT_, ~aux_en, aux_en);
+	GEN5_IRQ_INIT(GEN8_DE_PORT_, ~de_port_en, de_port_en);
 }
 
 static int gen8_irq_postinstall(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	ibx_irq_pre_postinstall(dev);
+	if (HAS_PCH_SPLIT(dev))
+		ibx_irq_pre_postinstall(dev);
 
 	gen8_gt_irq_postinstall(dev_priv);
 	gen8_de_irq_postinstall(dev_priv);
 
-	ibx_irq_postinstall(dev);
+	if (HAS_PCH_SPLIT(dev))
+		ibx_irq_postinstall(dev);
 
 	I915_WRITE(GEN8_MASTER_IRQ, DE_MASTER_IRQ_CONTROL);
 	POSTING_READ(GEN8_MASTER_IRQ);
@@ -3694,7 +3771,7 @@
 		new_iir = I915_READ16(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[RCS]);
+			notify_ring(&dev_priv->ring[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			int plane = pipe;
@@ -3883,7 +3960,7 @@
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[RCS]);
+			notify_ring(&dev_priv->ring[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			int plane = pipe;
@@ -4110,9 +4187,9 @@
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[RCS]);
+			notify_ring(&dev_priv->ring[RCS]);
 		if (iir & I915_BSD_USER_INTERRUPT)
-			notify_ring(dev, &dev_priv->ring[VCS]);
+			notify_ring(&dev_priv->ring[VCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS &&
@@ -4294,7 +4371,10 @@
 		dev->driver->irq_uninstall = gen8_irq_uninstall;
 		dev->driver->enable_vblank = gen8_enable_vblank;
 		dev->driver->disable_vblank = gen8_disable_vblank;
-		dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup;
+		if (HAS_PCH_SPLIT(dev))
+			dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup;
+		else
+			dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		dev->driver->irq_handler = ironlake_irq_handler;
 		dev->driver->irq_preinstall = ironlake_irq_reset;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 773d1d2..e35d7f29 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -671,8 +671,6 @@
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
 #define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
-#define VLV_RP_UP_EI_THRESHOLD			90
-#define VLV_RP_DOWN_EI_THRESHOLD		70
 
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
@@ -717,7 +715,7 @@
 /**
  * DOC: DPIO
  *
- * VLV and CHV have slightly peculiar display PHYs for driving DP/HDMI
+ * VLV, CHV and BXT have slightly peculiar display PHYs for driving DP/HDMI
  * ports. DPIO is the name given to such a display PHY. These PHYs
  * don't follow the standard programming model using direct MMIO
  * registers, and instead their registers must be accessed trough IOSF
@@ -748,7 +746,7 @@
  * controlled from the display controller side. No DPIO registers
  * need to be accessed during AUX communication,
  *
- * Generally the common lane corresponds to the pipe and
+ * Generally on VLV/CHV the common lane corresponds to the pipe and
  * the spline (PCS/TX) corresponds to the port.
  *
  * For dual channel PHY (VLV/CHV):
@@ -770,11 +768,17 @@
  *
  *  port D == PCS/TX CH0
  *
- * Note: digital port B is DDI0, digital port C is DDI1,
- * digital port D is DDI2
+ * On BXT the entire PHY channel corresponds to the port. That means
+ * the PLL is also now associated with the port rather than the pipe,
+ * and so the clock needs to be routed to the appropriate transcoder.
+ * Port A PLL is directly connected to transcoder EDP and port B/C
+ * PLLs can be routed to any transcoder A/B/C.
+ *
+ * Note: DDI0 is digital port B, DD1 is digital port C, and DDI2 is
+ * digital port D (CHV) or port A (BXT).
  */
 /*
- * Dual channel PHY (VLV/CHV)
+ * Dual channel PHY (VLV/CHV/BXT)
  * ---------------------------------
  * |      CH0      |      CH1      |
  * |  CMN/PLL/REF  |  CMN/PLL/REF  |
@@ -786,7 +790,7 @@
  * |     DDI0      |     DDI1      | DP/HDMI ports
  * ---------------------------------
  *
- * Single channel PHY (CHV)
+ * Single channel PHY (CHV/BXT)
  * -----------------
  * |      CH0      |
  * |  CMN/PLL/REF  |
@@ -1119,6 +1123,239 @@
 #define   DPIO_FRC_LATENCY_SHFIT	8
 #define CHV_TX_DW14(ch, lane) _TXLANE(ch, lane, 0xb8)
 #define   DPIO_UPAR_SHIFT		30
+
+/* BXT PHY registers */
+#define _BXT_PHY(phy, a, b)		_PIPE((phy), (a), (b))
+
+#define BXT_P_CR_GT_DISP_PWRON		0x138090
+#define   GT_DISPLAY_POWER_ON(phy)	(1 << (phy))
+
+#define _PHY_CTL_FAMILY_EDP		0x64C80
+#define _PHY_CTL_FAMILY_DDI		0x64C90
+#define   COMMON_RESET_DIS		(1 << 31)
+#define BXT_PHY_CTL_FAMILY(phy)		_BXT_PHY((phy), _PHY_CTL_FAMILY_DDI, \
+							_PHY_CTL_FAMILY_EDP)
+
+/* BXT PHY PLL registers */
+#define _PORT_PLL_A			0x46074
+#define _PORT_PLL_B			0x46078
+#define _PORT_PLL_C			0x4607c
+#define   PORT_PLL_ENABLE		(1 << 31)
+#define   PORT_PLL_LOCK			(1 << 30)
+#define   PORT_PLL_REF_SEL		(1 << 27)
+#define BXT_PORT_PLL_ENABLE(port)	_PORT(port, _PORT_PLL_A, _PORT_PLL_B)
+
+#define _PORT_PLL_EBB_0_A		0x162034
+#define _PORT_PLL_EBB_0_B		0x6C034
+#define _PORT_PLL_EBB_0_C		0x6C340
+#define   PORT_PLL_P1_MASK		(0x07 << 13)
+#define   PORT_PLL_P1(x)		((x)  << 13)
+#define   PORT_PLL_P2_MASK		(0x1f << 8)
+#define   PORT_PLL_P2(x)		((x)  << 8)
+#define BXT_PORT_PLL_EBB_0(port)	_PORT3(port, _PORT_PLL_EBB_0_A, \
+						_PORT_PLL_EBB_0_B,	\
+						_PORT_PLL_EBB_0_C)
+
+#define _PORT_PLL_EBB_4_A		0x162038
+#define _PORT_PLL_EBB_4_B		0x6C038
+#define _PORT_PLL_EBB_4_C		0x6C344
+#define   PORT_PLL_10BIT_CLK_ENABLE	(1 << 13)
+#define   PORT_PLL_RECALIBRATE		(1 << 14)
+#define BXT_PORT_PLL_EBB_4(port)	_PORT3(port, _PORT_PLL_EBB_4_A, \
+						_PORT_PLL_EBB_4_B,	\
+						_PORT_PLL_EBB_4_C)
+
+#define _PORT_PLL_0_A			0x162100
+#define _PORT_PLL_0_B			0x6C100
+#define _PORT_PLL_0_C			0x6C380
+/* PORT_PLL_0_A */
+#define   PORT_PLL_M2_MASK		0xFF
+/* PORT_PLL_1_A */
+#define   PORT_PLL_N_MASK		(0x0F << 8)
+#define   PORT_PLL_N(x)			((x) << 8)
+/* PORT_PLL_2_A */
+#define   PORT_PLL_M2_FRAC_MASK		0x3FFFFF
+/* PORT_PLL_3_A */
+#define   PORT_PLL_M2_FRAC_ENABLE	(1 << 16)
+/* PORT_PLL_6_A */
+#define   PORT_PLL_PROP_COEFF_MASK	0xF
+#define   PORT_PLL_INT_COEFF_MASK	(0x1F << 8)
+#define   PORT_PLL_INT_COEFF(x)		((x)  << 8)
+#define   PORT_PLL_GAIN_CTL_MASK	(0x07 << 16)
+#define   PORT_PLL_GAIN_CTL(x)		((x)  << 16)
+/* PORT_PLL_8_A */
+#define   PORT_PLL_TARGET_CNT_MASK	0x3FF
+#define _PORT_PLL_BASE(port)		_PORT3(port, _PORT_PLL_0_A,	\
+						_PORT_PLL_0_B,		\
+						_PORT_PLL_0_C)
+#define BXT_PORT_PLL(port, idx)		(_PORT_PLL_BASE(port) + (idx) * 4)
+
+/* BXT PHY common lane registers */
+#define _PORT_CL1CM_DW0_A		0x162000
+#define _PORT_CL1CM_DW0_BC		0x6C000
+#define   PHY_POWER_GOOD		(1 << 16)
+#define BXT_PORT_CL1CM_DW0(phy)		_BXT_PHY((phy), _PORT_CL1CM_DW0_BC, \
+							_PORT_CL1CM_DW0_A)
+
+#define _PORT_CL1CM_DW9_A		0x162024
+#define _PORT_CL1CM_DW9_BC		0x6C024
+#define   IREF0RC_OFFSET_SHIFT		8
+#define   IREF0RC_OFFSET_MASK		(0xFF << IREF0RC_OFFSET_SHIFT)
+#define BXT_PORT_CL1CM_DW9(phy)		_BXT_PHY((phy), _PORT_CL1CM_DW9_BC, \
+							_PORT_CL1CM_DW9_A)
+
+#define _PORT_CL1CM_DW10_A		0x162028
+#define _PORT_CL1CM_DW10_BC		0x6C028
+#define   IREF1RC_OFFSET_SHIFT		8
+#define   IREF1RC_OFFSET_MASK		(0xFF << IREF1RC_OFFSET_SHIFT)
+#define BXT_PORT_CL1CM_DW10(phy)	_BXT_PHY((phy), _PORT_CL1CM_DW10_BC, \
+							_PORT_CL1CM_DW10_A)
+
+#define _PORT_CL1CM_DW28_A		0x162070
+#define _PORT_CL1CM_DW28_BC		0x6C070
+#define   OCL1_POWER_DOWN_EN		(1 << 23)
+#define   DW28_OLDO_DYN_PWR_DOWN_EN	(1 << 22)
+#define   SUS_CLK_CONFIG		0x3
+#define BXT_PORT_CL1CM_DW28(phy)	_BXT_PHY((phy), _PORT_CL1CM_DW28_BC, \
+							_PORT_CL1CM_DW28_A)
+
+#define _PORT_CL1CM_DW30_A		0x162078
+#define _PORT_CL1CM_DW30_BC		0x6C078
+#define   OCL2_LDOFUSE_PWR_DIS		(1 << 6)
+#define BXT_PORT_CL1CM_DW30(phy)	_BXT_PHY((phy), _PORT_CL1CM_DW30_BC, \
+							_PORT_CL1CM_DW30_A)
+
+/* Defined for PHY0 only */
+#define BXT_PORT_CL2CM_DW6_BC		0x6C358
+#define   DW6_OLDO_DYN_PWR_DOWN_EN	(1 << 28)
+
+/* BXT PHY Ref registers */
+#define _PORT_REF_DW3_A			0x16218C
+#define _PORT_REF_DW3_BC		0x6C18C
+#define   GRC_DONE			(1 << 22)
+#define BXT_PORT_REF_DW3(phy)		_BXT_PHY((phy), _PORT_REF_DW3_BC, \
+							_PORT_REF_DW3_A)
+
+#define _PORT_REF_DW6_A			0x162198
+#define _PORT_REF_DW6_BC		0x6C198
+/*
+ * FIXME: BSpec/CHV ConfigDB disagrees on the following two fields, fix them
+ * after testing.
+ */
+#define   GRC_CODE_SHIFT		23
+#define   GRC_CODE_MASK			(0x1FF << GRC_CODE_SHIFT)
+#define   GRC_CODE_FAST_SHIFT		16
+#define   GRC_CODE_FAST_MASK		(0x7F << GRC_CODE_FAST_SHIFT)
+#define   GRC_CODE_SLOW_SHIFT		8
+#define   GRC_CODE_SLOW_MASK		(0xFF << GRC_CODE_SLOW_SHIFT)
+#define   GRC_CODE_NOM_MASK		0xFF
+#define BXT_PORT_REF_DW6(phy)		_BXT_PHY((phy), _PORT_REF_DW6_BC,	\
+						      _PORT_REF_DW6_A)
+
+#define _PORT_REF_DW8_A			0x1621A0
+#define _PORT_REF_DW8_BC		0x6C1A0
+#define   GRC_DIS			(1 << 15)
+#define   GRC_RDY_OVRD			(1 << 1)
+#define BXT_PORT_REF_DW8(phy)		_BXT_PHY((phy), _PORT_REF_DW8_BC,	\
+						      _PORT_REF_DW8_A)
+
+/* BXT PHY PCS registers */
+#define _PORT_PCS_DW10_LN01_A		0x162428
+#define _PORT_PCS_DW10_LN01_B		0x6C428
+#define _PORT_PCS_DW10_LN01_C		0x6C828
+#define _PORT_PCS_DW10_GRP_A		0x162C28
+#define _PORT_PCS_DW10_GRP_B		0x6CC28
+#define _PORT_PCS_DW10_GRP_C		0x6CE28
+#define BXT_PORT_PCS_DW10_LN01(port)	_PORT3(port, _PORT_PCS_DW10_LN01_A, \
+						     _PORT_PCS_DW10_LN01_B, \
+						     _PORT_PCS_DW10_LN01_C)
+#define BXT_PORT_PCS_DW10_GRP(port)	_PORT3(port, _PORT_PCS_DW10_GRP_A,  \
+						     _PORT_PCS_DW10_GRP_B,  \
+						     _PORT_PCS_DW10_GRP_C)
+#define   TX2_SWING_CALC_INIT		(1 << 31)
+#define   TX1_SWING_CALC_INIT		(1 << 30)
+
+#define _PORT_PCS_DW12_LN01_A		0x162430
+#define _PORT_PCS_DW12_LN01_B		0x6C430
+#define _PORT_PCS_DW12_LN01_C		0x6C830
+#define _PORT_PCS_DW12_LN23_A		0x162630
+#define _PORT_PCS_DW12_LN23_B		0x6C630
+#define _PORT_PCS_DW12_LN23_C		0x6CA30
+#define _PORT_PCS_DW12_GRP_A		0x162c30
+#define _PORT_PCS_DW12_GRP_B		0x6CC30
+#define _PORT_PCS_DW12_GRP_C		0x6CE30
+#define   LANESTAGGER_STRAP_OVRD	(1 << 6)
+#define   LANE_STAGGER_MASK		0x1F
+#define BXT_PORT_PCS_DW12_LN01(port)	_PORT3(port, _PORT_PCS_DW12_LN01_A, \
+						     _PORT_PCS_DW12_LN01_B, \
+						     _PORT_PCS_DW12_LN01_C)
+#define BXT_PORT_PCS_DW12_LN23(port)	_PORT3(port, _PORT_PCS_DW12_LN23_A, \
+						     _PORT_PCS_DW12_LN23_B, \
+						     _PORT_PCS_DW12_LN23_C)
+#define BXT_PORT_PCS_DW12_GRP(port)	_PORT3(port, _PORT_PCS_DW12_GRP_A, \
+						     _PORT_PCS_DW12_GRP_B, \
+						     _PORT_PCS_DW12_GRP_C)
+
+/* BXT PHY TX registers */
+#define _BXT_LANE_OFFSET(lane)           (((lane) >> 1) * 0x200 +	\
+					  ((lane) & 1) * 0x80)
+
+#define _PORT_TX_DW2_LN0_A		0x162508
+#define _PORT_TX_DW2_LN0_B		0x6C508
+#define _PORT_TX_DW2_LN0_C		0x6C908
+#define _PORT_TX_DW2_GRP_A		0x162D08
+#define _PORT_TX_DW2_GRP_B		0x6CD08
+#define _PORT_TX_DW2_GRP_C		0x6CF08
+#define BXT_PORT_TX_DW2_GRP(port)	_PORT3(port, _PORT_TX_DW2_GRP_A,  \
+						     _PORT_TX_DW2_GRP_B,  \
+						     _PORT_TX_DW2_GRP_C)
+#define BXT_PORT_TX_DW2_LN0(port)	_PORT3(port, _PORT_TX_DW2_LN0_A,  \
+						     _PORT_TX_DW2_LN0_B,  \
+						     _PORT_TX_DW2_LN0_C)
+#define   MARGIN_000_SHIFT		16
+#define   MARGIN_000			(0xFF << MARGIN_000_SHIFT)
+#define   UNIQ_TRANS_SCALE_SHIFT	8
+#define   UNIQ_TRANS_SCALE		(0xFF << UNIQ_TRANS_SCALE_SHIFT)
+
+#define _PORT_TX_DW3_LN0_A		0x16250C
+#define _PORT_TX_DW3_LN0_B		0x6C50C
+#define _PORT_TX_DW3_LN0_C		0x6C90C
+#define _PORT_TX_DW3_GRP_A		0x162D0C
+#define _PORT_TX_DW3_GRP_B		0x6CD0C
+#define _PORT_TX_DW3_GRP_C		0x6CF0C
+#define BXT_PORT_TX_DW3_GRP(port)	_PORT3(port, _PORT_TX_DW3_GRP_A,  \
+						     _PORT_TX_DW3_GRP_B,  \
+						     _PORT_TX_DW3_GRP_C)
+#define BXT_PORT_TX_DW3_LN0(port)	_PORT3(port, _PORT_TX_DW3_LN0_A,  \
+						     _PORT_TX_DW3_LN0_B,  \
+						     _PORT_TX_DW3_LN0_C)
+#define   UNIQE_TRANGE_EN_METHOD	(1 << 27)
+
+#define _PORT_TX_DW4_LN0_A		0x162510
+#define _PORT_TX_DW4_LN0_B		0x6C510
+#define _PORT_TX_DW4_LN0_C		0x6C910
+#define _PORT_TX_DW4_GRP_A		0x162D10
+#define _PORT_TX_DW4_GRP_B		0x6CD10
+#define _PORT_TX_DW4_GRP_C		0x6CF10
+#define BXT_PORT_TX_DW4_LN0(port)	_PORT3(port, _PORT_TX_DW4_LN0_A,  \
+						     _PORT_TX_DW4_LN0_B,  \
+						     _PORT_TX_DW4_LN0_C)
+#define BXT_PORT_TX_DW4_GRP(port)	_PORT3(port, _PORT_TX_DW4_GRP_A,  \
+						     _PORT_TX_DW4_GRP_B,  \
+						     _PORT_TX_DW4_GRP_C)
+#define   DEEMPH_SHIFT			24
+#define   DE_EMPHASIS			(0xFF << DEEMPH_SHIFT)
+
+#define _PORT_TX_DW14_LN0_A		0x162538
+#define _PORT_TX_DW14_LN0_B		0x6C538
+#define _PORT_TX_DW14_LN0_C		0x6C938
+#define   LATENCY_OPTIM_SHIFT		30
+#define   LATENCY_OPTIM			(1 << LATENCY_OPTIM_SHIFT)
+#define BXT_PORT_TX_DW14_LN(port, lane)	(_PORT3((port), _PORT_TX_DW14_LN0_A,   \
+							_PORT_TX_DW14_LN0_B,   \
+							_PORT_TX_DW14_LN0_C) + \
+					 _BXT_LANE_OFFSET(lane))
+
 /*
  * Fence registers
  */
@@ -1150,6 +1387,7 @@
 /* control register for cpu gtt access */
 #define TILECTL				0x101000
 #define   TILECTL_SWZCTL			(1 << 0)
+#define   TILECTL_TLBPF			(1 << 1)
 #define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
 #define   TILECTL_BACKSNOOP_DIS		(1 << 3)
 
@@ -1554,9 +1792,7 @@
 #define   GEN9_F2_SS_DIS_SHIFT		20
 #define   GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
 
-#define GEN8_EU_DISABLE0		0x9134
-#define GEN8_EU_DISABLE1		0x9138
-#define GEN8_EU_DISABLE2		0x913c
+#define GEN9_EU_DISABLE(slice)		(0x9134 + (slice)*0x4)
 
 #define GEN6_BSD_SLEEP_PSMI_CONTROL	0x12050
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
@@ -1788,16 +2024,19 @@
 #define   GMBUS_RATE_400KHZ	(2<<8) /* reserved on Pineview */
 #define   GMBUS_RATE_1MHZ	(3<<8) /* reserved on Pineview */
 #define   GMBUS_HOLD_EXT	(1<<7) /* 300ns hold time, rsvd on Pineview */
-#define   GMBUS_PORT_DISABLED	0
-#define   GMBUS_PORT_SSC	1
-#define   GMBUS_PORT_VGADDC	2
-#define   GMBUS_PORT_PANEL	3
-#define   GMBUS_PORT_DPD_CHV	3 /* HDMID_CHV */
-#define   GMBUS_PORT_DPC	4 /* HDMIC */
-#define   GMBUS_PORT_DPB	5 /* SDVO, HDMIB */
-#define   GMBUS_PORT_DPD	6 /* HDMID */
-#define   GMBUS_PORT_RESERVED	7 /* 7 reserved */
-#define   GMBUS_NUM_PORTS	(GMBUS_PORT_DPD - GMBUS_PORT_SSC + 1)
+#define   GMBUS_PIN_DISABLED	0
+#define   GMBUS_PIN_SSC		1
+#define   GMBUS_PIN_VGADDC	2
+#define   GMBUS_PIN_PANEL	3
+#define   GMBUS_PIN_DPD_CHV	3 /* HDMID_CHV */
+#define   GMBUS_PIN_DPC		4 /* HDMIC */
+#define   GMBUS_PIN_DPB		5 /* SDVO, HDMIB */
+#define   GMBUS_PIN_DPD		6 /* HDMID */
+#define   GMBUS_PIN_RESERVED	7 /* 7 reserved */
+#define   GMBUS_PIN_1_BXT	1
+#define   GMBUS_PIN_2_BXT	2
+#define   GMBUS_PIN_3_BXT	3
+#define   GMBUS_NUM_PINS	7 /* including 0 */
 #define GMBUS1			0x5104 /* command/status */
 #define   GMBUS_SW_CLR_INT	(1<<31)
 #define   GMBUS_SW_RDY		(1<<30)
@@ -2689,7 +2928,6 @@
 #define EDP_PSR_CTL(dev)			(EDP_PSR_BASE(dev) + 0)
 #define   EDP_PSR_ENABLE			(1<<31)
 #define   BDW_PSR_SINGLE_FRAME			(1<<30)
-#define   EDP_PSR_LINK_DISABLE			(0<<27)
 #define   EDP_PSR_LINK_STANDBY			(1<<27)
 #define   EDP_PSR_MIN_LINK_ENTRY_TIME_MASK	(3<<25)
 #define   EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES	(0<<25)
@@ -2749,6 +2987,20 @@
 #define   EDP_PSR_DEBUG_MASK_MEMUP	(1<<26)
 #define   EDP_PSR_DEBUG_MASK_HPD	(1<<25)
 
+#define EDP_PSR2_CTL			0x6f900
+#define   EDP_PSR2_ENABLE		(1<<31)
+#define   EDP_SU_TRACK_ENABLE		(1<<30)
+#define   EDP_MAX_SU_DISABLE_TIME(t)	((t)<<20)
+#define   EDP_MAX_SU_DISABLE_TIME_MASK	(0x1f<<20)
+#define   EDP_PSR2_TP2_TIME_500		(0<<8)
+#define   EDP_PSR2_TP2_TIME_100		(1<<8)
+#define   EDP_PSR2_TP2_TIME_2500	(2<<8)
+#define   EDP_PSR2_TP2_TIME_50		(3<<8)
+#define   EDP_PSR2_TP2_TIME_MASK	(3<<8)
+#define   EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4
+#define   EDP_PSR2_FRAME_BEFORE_SU_MASK	(0xf<<4)
+#define   EDP_PSR2_IDLE_MASK		0xf
+
 /* VGA port control */
 #define ADPA			0x61100
 #define PCH_ADPA                0xe1100
@@ -4855,7 +5107,9 @@
 #define   PLANE_CTL_ALPHA_HW_PREMULTIPLY	(  3 << 4)
 #define   PLANE_CTL_ROTATE_MASK			0x3
 #define   PLANE_CTL_ROTATE_0			0x0
+#define   PLANE_CTL_ROTATE_90			0x1
 #define   PLANE_CTL_ROTATE_180			0x2
+#define   PLANE_CTL_ROTATE_270			0x3
 #define _PLANE_STRIDE_1_A			0x70188
 #define _PLANE_STRIDE_2_A			0x70288
 #define _PLANE_STRIDE_3_A			0x70388
@@ -5097,6 +5351,121 @@
 #define PS_WIN_SZ(pipe)		_PIPE(pipe, _PSA_WIN_SZ, _PSB_WIN_SZ)
 #define PS_WIN_POS(pipe)	_PIPE(pipe, _PSA_WIN_POS, _PSB_WIN_POS)
 
+/*
+ * Skylake scalers
+ */
+#define _PS_1A_CTRL      0x68180
+#define _PS_2A_CTRL      0x68280
+#define _PS_1B_CTRL      0x68980
+#define _PS_2B_CTRL      0x68A80
+#define _PS_1C_CTRL      0x69180
+#define PS_SCALER_EN        (1 << 31)
+#define PS_SCALER_MODE_MASK (3 << 28)
+#define PS_SCALER_MODE_DYN  (0 << 28)
+#define PS_SCALER_MODE_HQ  (1 << 28)
+#define PS_PLANE_SEL_MASK  (7 << 25)
+#define PS_PLANE_SEL(plane) ((plane + 1) << 25)
+#define PS_FILTER_MASK         (3 << 23)
+#define PS_FILTER_MEDIUM       (0 << 23)
+#define PS_FILTER_EDGE_ENHANCE (2 << 23)
+#define PS_FILTER_BILINEAR     (3 << 23)
+#define PS_VERT3TAP            (1 << 21)
+#define PS_VERT_INT_INVERT_FIELD1 (0 << 20)
+#define PS_VERT_INT_INVERT_FIELD0 (1 << 20)
+#define PS_PWRUP_PROGRESS         (1 << 17)
+#define PS_V_FILTER_BYPASS        (1 << 8)
+#define PS_VADAPT_EN              (1 << 7)
+#define PS_VADAPT_MODE_MASK        (3 << 5)
+#define PS_VADAPT_MODE_LEAST_ADAPT (0 << 5)
+#define PS_VADAPT_MODE_MOD_ADAPT   (1 << 5)
+#define PS_VADAPT_MODE_MOST_ADAPT  (3 << 5)
+
+#define _PS_PWR_GATE_1A     0x68160
+#define _PS_PWR_GATE_2A     0x68260
+#define _PS_PWR_GATE_1B     0x68960
+#define _PS_PWR_GATE_2B     0x68A60
+#define _PS_PWR_GATE_1C     0x69160
+#define PS_PWR_GATE_DIS_OVERRIDE       (1 << 31)
+#define PS_PWR_GATE_SETTLING_TIME_32   (0 << 3)
+#define PS_PWR_GATE_SETTLING_TIME_64   (1 << 3)
+#define PS_PWR_GATE_SETTLING_TIME_96   (2 << 3)
+#define PS_PWR_GATE_SETTLING_TIME_128  (3 << 3)
+#define PS_PWR_GATE_SLPEN_8             0
+#define PS_PWR_GATE_SLPEN_16            1
+#define PS_PWR_GATE_SLPEN_24            2
+#define PS_PWR_GATE_SLPEN_32            3
+
+#define _PS_WIN_POS_1A      0x68170
+#define _PS_WIN_POS_2A      0x68270
+#define _PS_WIN_POS_1B      0x68970
+#define _PS_WIN_POS_2B      0x68A70
+#define _PS_WIN_POS_1C      0x69170
+
+#define _PS_WIN_SZ_1A       0x68174
+#define _PS_WIN_SZ_2A       0x68274
+#define _PS_WIN_SZ_1B       0x68974
+#define _PS_WIN_SZ_2B       0x68A74
+#define _PS_WIN_SZ_1C       0x69174
+
+#define _PS_VSCALE_1A       0x68184
+#define _PS_VSCALE_2A       0x68284
+#define _PS_VSCALE_1B       0x68984
+#define _PS_VSCALE_2B       0x68A84
+#define _PS_VSCALE_1C       0x69184
+
+#define _PS_HSCALE_1A       0x68190
+#define _PS_HSCALE_2A       0x68290
+#define _PS_HSCALE_1B       0x68990
+#define _PS_HSCALE_2B       0x68A90
+#define _PS_HSCALE_1C       0x69190
+
+#define _PS_VPHASE_1A       0x68188
+#define _PS_VPHASE_2A       0x68288
+#define _PS_VPHASE_1B       0x68988
+#define _PS_VPHASE_2B       0x68A88
+#define _PS_VPHASE_1C       0x69188
+
+#define _PS_HPHASE_1A       0x68194
+#define _PS_HPHASE_2A       0x68294
+#define _PS_HPHASE_1B       0x68994
+#define _PS_HPHASE_2B       0x68A94
+#define _PS_HPHASE_1C       0x69194
+
+#define _PS_ECC_STAT_1A     0x681D0
+#define _PS_ECC_STAT_2A     0x682D0
+#define _PS_ECC_STAT_1B     0x689D0
+#define _PS_ECC_STAT_2B     0x68AD0
+#define _PS_ECC_STAT_1C     0x691D0
+
+#define _ID(id, a, b) ((a) + (id)*((b)-(a)))
+#define SKL_PS_CTRL(pipe, id) _PIPE(pipe,        \
+			_ID(id, _PS_1A_CTRL, _PS_2A_CTRL),       \
+			_ID(id, _PS_1B_CTRL, _PS_2B_CTRL))
+#define SKL_PS_PWR_GATE(pipe, id) _PIPE(pipe,    \
+			_ID(id, _PS_PWR_GATE_1A, _PS_PWR_GATE_2A), \
+			_ID(id, _PS_PWR_GATE_1B, _PS_PWR_GATE_2B))
+#define SKL_PS_WIN_POS(pipe, id) _PIPE(pipe,     \
+			_ID(id, _PS_WIN_POS_1A, _PS_WIN_POS_2A), \
+			_ID(id, _PS_WIN_POS_1B, _PS_WIN_POS_2B))
+#define SKL_PS_WIN_SZ(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_WIN_SZ_1A, _PS_WIN_SZ_2A),   \
+			_ID(id, _PS_WIN_SZ_1B, _PS_WIN_SZ_2B))
+#define SKL_PS_VSCALE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_VSCALE_1A, _PS_VSCALE_2A),   \
+			_ID(id, _PS_VSCALE_1B, _PS_VSCALE_2B))
+#define SKL_PS_HSCALE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_HSCALE_1A, _PS_HSCALE_2A),   \
+			_ID(id, _PS_HSCALE_1B, _PS_HSCALE_2B))
+#define SKL_PS_VPHASE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_VPHASE_1A, _PS_VPHASE_2A),   \
+			_ID(id, _PS_VPHASE_1B, _PS_VPHASE_2B))
+#define SKL_PS_HPHASE(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_HPHASE_1A, _PS_HPHASE_2A),   \
+			_ID(id, _PS_HPHASE_1B, _PS_HPHASE_2B))
+#define SKL_PS_ECC_STAT(pipe, id)  _PIPE(pipe,     \
+			_ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A),   \
+			_ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B)
+
 /* legacy palette */
 #define _LGC_PALETTE_A           0x4a000
 #define _LGC_PALETTE_B           0x4a800
@@ -5219,9 +5588,11 @@
 #define  GEN8_PIPE_VSYNC		(1 << 1)
 #define  GEN8_PIPE_VBLANK		(1 << 0)
 #define  GEN9_PIPE_CURSOR_FAULT		(1 << 11)
+#define  GEN9_PIPE_PLANE4_FAULT		(1 << 10)
 #define  GEN9_PIPE_PLANE3_FAULT		(1 << 9)
 #define  GEN9_PIPE_PLANE2_FAULT		(1 << 8)
 #define  GEN9_PIPE_PLANE1_FAULT		(1 << 7)
+#define  GEN9_PIPE_PLANE4_FLIP_DONE	(1 << 6)
 #define  GEN9_PIPE_PLANE3_FLIP_DONE	(1 << 5)
 #define  GEN9_PIPE_PLANE2_FLIP_DONE	(1 << 4)
 #define  GEN9_PIPE_PLANE1_FLIP_DONE	(1 << 3)
@@ -5232,6 +5603,7 @@
 	 GEN8_PIPE_PRIMARY_FAULT)
 #define GEN9_DE_PIPE_IRQ_FAULT_ERRORS \
 	(GEN9_PIPE_CURSOR_FAULT | \
+	 GEN9_PIPE_PLANE4_FAULT | \
 	 GEN9_PIPE_PLANE3_FAULT | \
 	 GEN9_PIPE_PLANE2_FAULT | \
 	 GEN9_PIPE_PLANE1_FAULT)
@@ -5240,10 +5612,17 @@
 #define GEN8_DE_PORT_IMR 0x44444
 #define GEN8_DE_PORT_IIR 0x44448
 #define GEN8_DE_PORT_IER 0x4444c
-#define  GEN8_PORT_DP_A_HOTPLUG		(1 << 3)
 #define  GEN9_AUX_CHANNEL_D		(1 << 27)
 #define  GEN9_AUX_CHANNEL_C		(1 << 26)
 #define  GEN9_AUX_CHANNEL_B		(1 << 25)
+#define  BXT_DE_PORT_HP_DDIC		(1 << 5)
+#define  BXT_DE_PORT_HP_DDIB		(1 << 4)
+#define  BXT_DE_PORT_HP_DDIA		(1 << 3)
+#define  BXT_DE_PORT_HOTPLUG_MASK	(BXT_DE_PORT_HP_DDIA | \
+					 BXT_DE_PORT_HP_DDIB | \
+					 BXT_DE_PORT_HP_DDIC)
+#define  GEN8_PORT_DP_A_HOTPLUG		(1 << 3)
+#define  BXT_DE_PORT_GMBUS		(1 << 1)
 #define  GEN8_AUX_CHANNEL_A		(1 << 0)
 
 #define GEN8_DE_MISC_ISR 0x44460
@@ -5257,6 +5636,21 @@
 #define GEN8_PCU_IIR 0x444e8
 #define GEN8_PCU_IER 0x444ec
 
+/* BXT hotplug control */
+#define BXT_HOTPLUG_CTL			0xC4030
+#define   BXT_DDIA_HPD_ENABLE		(1 << 28)
+#define   BXT_DDIA_HPD_STATUS		(3 << 24)
+#define   BXT_DDIC_HPD_ENABLE		(1 << 12)
+#define   BXT_DDIC_HPD_STATUS		(3 << 8)
+#define   BXT_DDIB_HPD_ENABLE		(1 << 4)
+#define   BXT_DDIB_HPD_STATUS		(3 << 0)
+#define   BXT_HOTPLUG_CTL_MASK		(BXT_DDIA_HPD_ENABLE | \
+					 BXT_DDIB_HPD_ENABLE | \
+					 BXT_DDIC_HPD_ENABLE)
+#define   BXT_HPD_STATUS_MASK		(BXT_DDIA_HPD_STATUS | \
+					 BXT_DDIB_HPD_STATUS | \
+					 BXT_DDIC_HPD_STATUS)
+
 #define ILK_DISPLAY_CHICKEN2	0x42004
 /* Required on all Ironlake and Sandybridge according to the B-Spec. */
 #define  ILK_ELPIN_409_SELECT	(1 << 25)
@@ -5297,6 +5691,9 @@
 #define  DISP_FBC_WM_DIS		(1<<15)
 #define DISP_ARB_CTL2	0x45004
 #define  DISP_DATA_PARTITION_5_6	(1<<6)
+#define DBUF_CTL	0x45008
+#define  DBUF_POWER_REQUEST		(1<<31)
+#define  DBUF_POWER_STATE		(1<<30)
 #define GEN7_MSG_CTL	0x45010
 #define  WAIT_FOR_PCH_RESET_ACK		(1<<1)
 #define  WAIT_FOR_PCH_FLR_ACK		(1<<0)
@@ -5323,6 +5720,9 @@
 #define GEN7_L3SQCREG1				0xB010
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
+#define GEN8_L3SQCREG1				0xB100
+#define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
+
 #define GEN7_L3CNTLREG1				0xB01C
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
 #define  GEN7_L3AGDIS				(1<<19)
@@ -5346,6 +5746,10 @@
 #define  HDC_FORCE_NON_COHERENT			(1<<4)
 #define  HDC_BARRIER_PERFORMANCE_DISABLE	(1<<10)
 
+/* GEN9 chicken */
+#define SLICE_ECO_CHICKEN0			0x7308
+#define   PIXEL_MASK_CAMMING_DISABLE		(1 << 14)
+
 /* WaCatErrorRejectionIssue */
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
@@ -6088,6 +6492,7 @@
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
 
 #define GEN6_UCGCTL2				0x9404
+# define GEN6_VFUNIT_CLOCK_GATE_DISABLE			(1 << 31)
 # define GEN7_VDSUNIT_CLOCK_GATE_DISABLE		(1 << 30)
 # define GEN7_TDLUNIT_CLOCK_GATE_DISABLE		(1 << 22)
 # define GEN6_RCZUNIT_CLOCK_GATE_DISABLE		(1 << 13)
@@ -6106,6 +6511,7 @@
 #define GEN8_UCGCTL6				0x9430
 #define   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE	(1<<24)
 #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE	(1<<14)
+#define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28)
 
 #define GEN6_GFXPAUSE				0xA000
 #define GEN6_RPNSWREQ				0xA008
@@ -6185,6 +6591,8 @@
 #define GEN9_MEDIA_PG_IDLE_HYSTERESIS		0xA0C4
 #define GEN9_RENDER_PG_IDLE_HYSTERESIS		0xA0C8
 #define GEN9_PG_ENABLE				0xA210
+#define GEN9_RENDER_PG_ENABLE			(1<<0)
+#define GEN9_MEDIA_PG_ENABLE			(1<<1)
 
 #define VLV_CHICKEN_3				(VLV_DISPLAY_BASE + 0x7040C)
 #define  PIXEL_OVERLAP_CNT_MASK			(3 << 30)
@@ -6239,6 +6647,7 @@
 #define   GEN6_PCODE_WRITE_D_COMP		0x11
 #define   GEN6_ENCODE_RC6_VID(mv)		(((mv) - 245) / 5)
 #define   GEN6_DECODE_RC6_VID(vids)		(((vids) * 5) + 245)
+#define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
 #define   DISPLAY_IPS_CONTROL			0x19
 #define	  HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL	0x1A
 #define GEN6_PCODE_DATA				0x138128
@@ -6271,17 +6680,12 @@
 #define CHV_POWER_SS1_SIG2		0xa72c
 #define   CHV_EU311_PG_ENABLE		(1<<1)
 
-#define GEN9_SLICE0_PGCTL_ACK		0x804c
-#define GEN9_SLICE1_PGCTL_ACK		0x8050
-#define GEN9_SLICE2_PGCTL_ACK		0x8054
+#define GEN9_SLICE_PGCTL_ACK(slice)	(0x804c + (slice)*0x4)
 #define   GEN9_PGCTL_SLICE_ACK		(1 << 0)
+#define   GEN9_PGCTL_SS_ACK(subslice)	(1 << (2 + (subslice)*2))
 
-#define GEN9_SLICE0_SS01_EU_PGCTL_ACK	0x805c
-#define GEN9_SLICE0_SS23_EU_PGCTL_ACK	0x8060
-#define GEN9_SLICE1_SS01_EU_PGCTL_ACK	0x8064
-#define GEN9_SLICE1_SS23_EU_PGCTL_ACK	0x8068
-#define GEN9_SLICE2_SS01_EU_PGCTL_ACK	0x806c
-#define GEN9_SLICE2_SS23_EU_PGCTL_ACK	0x8070
+#define GEN9_SS01_EU_PGCTL_ACK(slice)	(0x805c + (slice)*0x8)
+#define GEN9_SS23_EU_PGCTL_ACK(slice)	(0x8060 + (slice)*0x8)
 #define   GEN9_PGCTL_SSA_EU08_ACK	(1 << 0)
 #define   GEN9_PGCTL_SSA_EU19_ACK	(1 << 2)
 #define   GEN9_PGCTL_SSA_EU210_ACK	(1 << 4)
@@ -6715,6 +7119,13 @@
 #define  CDCLK_FREQ_675_617		(3<<26)
 #define  CDCLK_FREQ_DECIMAL_MASK	(0x7ff)
 
+#define  BXT_CDCLK_CD2X_DIV_SEL_MASK	(3<<22)
+#define  BXT_CDCLK_CD2X_DIV_SEL_1	(0<<22)
+#define  BXT_CDCLK_CD2X_DIV_SEL_1_5	(1<<22)
+#define  BXT_CDCLK_CD2X_DIV_SEL_2	(2<<22)
+#define  BXT_CDCLK_CD2X_DIV_SEL_4	(3<<22)
+#define  BXT_CDCLK_SSA_PRECHARGE_ENABLE	(1<<16)
+
 /* LCPLL_CTL */
 #define LCPLL1_CTL		0x46010
 #define LCPLL2_CTL		0x46014
@@ -6779,6 +7190,20 @@
 #define GET_CFG_CR1_REG(id) (DPLL1_CFGCR1 + (id - SKL_DPLL1) * 8)
 #define GET_CFG_CR2_REG(id) (DPLL1_CFGCR2 + (id - SKL_DPLL1) * 8)
 
+/* BXT display engine PLL */
+#define BXT_DE_PLL_CTL			0x6d000
+#define   BXT_DE_PLL_RATIO(x)		(x)	/* {60,65,100} * 19.2MHz */
+#define   BXT_DE_PLL_RATIO_MASK		0xff
+
+#define BXT_DE_PLL_ENABLE		0x46070
+#define   BXT_DE_PLL_PLL_ENABLE		(1 << 31)
+#define   BXT_DE_PLL_LOCK		(1 << 30)
+
+/* GEN9 DC */
+#define DC_STATE_EN			0x45504
+#define  DC_STATE_EN_UPTO_DC5		(1<<0)
+#define  DC_STATE_EN_DC9		(1<<3)
+
 /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register,
  * since on HSW we can't write to it using I915_WRITE. */
 #define D_COMP_HSW			(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 5fda6c7..497cba5 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -220,7 +220,7 @@
 
 DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table_entry *pt, u32 first, u32 count, u32 bits),
+		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
 	TP_ARGS(vm, pde, pt, first, count, bits),
 
 	TP_STRUCT__entry(
@@ -250,7 +250,7 @@
 
 DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table_entry *pt, u32 first, u32 count, u32 bits),
+		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
 	TP_ARGS(vm, pde, pt, first, count, bits)
 );
 
@@ -504,7 +504,6 @@
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, uniq)
 			     __field(u32, seqno)
 			     ),
 
@@ -513,13 +512,11 @@
 						i915_gem_request_get_ring(req);
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->uniq = req ? req->uniq : 0;
 			   __entry->seqno = i915_gem_request_get_seqno(req);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, uniq=%u, seqno=%u",
-		      __entry->dev, __entry->ring, __entry->uniq,
-		      __entry->seqno)
+	    TP_printk("dev=%u, ring=%u, seqno=%u",
+		      __entry->dev, __entry->ring, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_add,
@@ -564,7 +561,6 @@
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, uniq)
 			     __field(u32, seqno)
 			     __field(bool, blocking)
 			     ),
@@ -580,14 +576,13 @@
 						i915_gem_request_get_ring(req);
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->uniq = req ? req->uniq : 0;
 			   __entry->seqno = i915_gem_request_get_seqno(req);
 			   __entry->blocking =
 				     mutex_is_locked(&ring->dev->struct_mutex);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, uniq=%u, seqno=%u, blocking=%s",
-		      __entry->dev, __entry->ring, __entry->uniq,
+	    TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s",
+		      __entry->dev, __entry->ring,
 		      __entry->seqno, __entry->blocking ?  "yes (NB)" : "no")
 );
 
@@ -596,33 +591,6 @@
 	    TP_ARGS(req)
 );
 
-DECLARE_EVENT_CLASS(i915_ring,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring),
-
-	    TP_STRUCT__entry(
-			     __field(u32, dev)
-			     __field(u32, ring)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
-			   __entry->ring = ring->id;
-			   ),
-
-	    TP_printk("dev=%u, ring=%u", __entry->dev, __entry->ring)
-);
-
-DEFINE_EVENT(i915_ring, i915_ring_wait_begin,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring)
-);
-
-DEFINE_EVENT(i915_ring, i915_ring_wait_end,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring)
-);
-
 TRACE_EVENT(i915_flip_request,
 	    TP_PROTO(int plane, struct drm_i915_gem_object *obj),
 
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index 3903b90..3c4b7cd 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -48,6 +48,8 @@
 	int ncrtcs = dev->mode_config.num_crtc;
 	int nconnectors = dev->mode_config.num_connector;
 	enum pipe nuclear_pipe = INVALID_PIPE;
+	struct intel_crtc *nuclear_crtc = NULL;
+	struct intel_crtc_state *crtc_state = NULL;
 	int ret;
 	int i;
 	bool not_nuclear = false;
@@ -76,8 +78,14 @@
 	state->allow_modeset = false;
 	for (i = 0; i < ncrtcs; i++) {
 		struct intel_crtc *crtc = to_intel_crtc(state->crtcs[i]);
+		if (crtc)
+			memset(&crtc->atomic, 0, sizeof(crtc->atomic));
 		if (crtc && crtc->pipe != nuclear_pipe)
 			not_nuclear = true;
+		if (crtc && crtc->pipe == nuclear_pipe) {
+			nuclear_crtc = crtc;
+			crtc_state = to_intel_crtc_state(state->crtc_states[i]);
+		}
 	}
 	for (i = 0; i < nconnectors; i++)
 		if (state->connectors[i] != NULL)
@@ -92,6 +100,11 @@
 	if (ret)
 		return ret;
 
+	/* FIXME: move to crtc atomic check function once it is ready */
+	ret = intel_atomic_setup_scalers(dev, nuclear_crtc, crtc_state);
+	if (ret)
+		return ret;
+
 	return ret;
 }
 
@@ -155,6 +168,18 @@
 		swap(state->plane_states[i], plane->state);
 		plane->state->state = NULL;
 	}
+
+	/* swap crtc_state */
+	for (i = 0; i < dev->mode_config.num_crtc; i++) {
+		struct drm_crtc *crtc = state->crtcs[i];
+		if (!crtc) {
+			continue;
+		}
+
+		to_intel_crtc(crtc)->config->scaler_state =
+			to_intel_crtc_state(state->crtc_states[i])->scaler_state;
+	}
+
 	drm_atomic_helper_commit_planes(dev, state);
 	drm_atomic_helper_wait_for_vblanks(dev, state);
 	drm_atomic_helper_cleanup_planes(dev, state);
@@ -241,3 +266,151 @@
 {
 	drm_atomic_helper_crtc_destroy_state(crtc, state);
 }
+
+/**
+ * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests
+ * @dev: DRM device
+ * @crtc: intel crtc
+ * @crtc_state: incoming crtc_state to validate and setup scalers
+ *
+ * This function sets up scalers based on staged scaling requests for
+ * a @crtc and its planes. It is called from crtc level check path. If request
+ * is a supportable request, it attaches scalers to requested planes and crtc.
+ *
+ * This function takes into account the current scaler(s) in use by any planes
+ * not being part of this atomic state
+ *
+ *  Returns:
+ *         0 - scalers were setup succesfully
+ *         error code - otherwise
+ */
+int intel_atomic_setup_scalers(struct drm_device *dev,
+	struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state)
+{
+	struct drm_plane *plane = NULL;
+	struct intel_plane *intel_plane;
+	struct intel_plane_state *plane_state = NULL;
+	struct intel_crtc_scaler_state *scaler_state;
+	struct drm_atomic_state *drm_state;
+	int num_scalers_need;
+	int i, j;
+
+	if (INTEL_INFO(dev)->gen < 9 || !intel_crtc || !crtc_state)
+		return 0;
+
+	scaler_state = &crtc_state->scaler_state;
+	drm_state = crtc_state->base.state;
+
+	num_scalers_need = hweight32(scaler_state->scaler_users);
+	DRM_DEBUG_KMS("crtc_state = %p need = %d avail = %d scaler_users = 0x%x\n",
+		crtc_state, num_scalers_need, intel_crtc->num_scalers,
+		scaler_state->scaler_users);
+
+	/*
+	 * High level flow:
+	 * - staged scaler requests are already in scaler_state->scaler_users
+	 * - check whether staged scaling requests can be supported
+	 * - add planes using scalers that aren't in current transaction
+	 * - assign scalers to requested users
+	 * - as part of plane commit, scalers will be committed
+	 *   (i.e., either attached or detached) to respective planes in hw
+	 * - as part of crtc_commit, scaler will be either attached or detached
+	 *   to crtc in hw
+	 */
+
+	/* fail if required scalers > available scalers */
+	if (num_scalers_need > intel_crtc->num_scalers){
+		DRM_DEBUG_KMS("Too many scaling requests %d > %d\n",
+			num_scalers_need, intel_crtc->num_scalers);
+		return -EINVAL;
+	}
+
+	/* walkthrough scaler_users bits and start assigning scalers */
+	for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) {
+		int *scaler_id;
+
+		/* skip if scaler not required */
+		if (!(scaler_state->scaler_users & (1 << i)))
+			continue;
+
+		if (i == SKL_CRTC_INDEX) {
+			/* panel fitter case: assign as a crtc scaler */
+			scaler_id = &scaler_state->scaler_id;
+		} else {
+			if (!drm_state)
+				continue;
+
+			/* plane scaler case: assign as a plane scaler */
+			/* find the plane that set the bit as scaler_user */
+			plane = drm_state->planes[i];
+
+			/*
+			 * to enable/disable hq mode, add planes that are using scaler
+			 * into this transaction
+			 */
+			if (!plane) {
+				struct drm_plane_state *state;
+				plane = drm_plane_from_index(dev, i);
+				state = drm_atomic_get_plane_state(drm_state, plane);
+				if (IS_ERR(state)) {
+					DRM_DEBUG_KMS("Failed to add [PLANE:%d] to drm_state\n",
+						plane->base.id);
+					return PTR_ERR(state);
+				}
+			}
+
+			intel_plane = to_intel_plane(plane);
+
+			/* plane on different crtc cannot be a scaler user of this crtc */
+			if (WARN_ON(intel_plane->pipe != intel_crtc->pipe)) {
+				continue;
+			}
+
+			plane_state = to_intel_plane_state(drm_state->plane_states[i]);
+			scaler_id = &plane_state->scaler_id;
+		}
+
+		if (*scaler_id < 0) {
+			/* find a free scaler */
+			for (j = 0; j < intel_crtc->num_scalers; j++) {
+				if (!scaler_state->scalers[j].in_use) {
+					scaler_state->scalers[j].in_use = 1;
+					*scaler_id = scaler_state->scalers[j].id;
+					DRM_DEBUG_KMS("Attached scaler id %u.%u to %s:%d\n",
+						intel_crtc->pipe,
+						i == SKL_CRTC_INDEX ? scaler_state->scaler_id :
+							plane_state->scaler_id,
+						i == SKL_CRTC_INDEX ? "CRTC" : "PLANE",
+						i == SKL_CRTC_INDEX ?  intel_crtc->base.base.id :
+						plane->base.id);
+					break;
+				}
+			}
+		}
+
+		if (WARN_ON(*scaler_id < 0)) {
+			DRM_DEBUG_KMS("Cannot find scaler for %s:%d\n",
+				i == SKL_CRTC_INDEX ? "CRTC" : "PLANE",
+				i == SKL_CRTC_INDEX ? intel_crtc->base.base.id:plane->base.id);
+			continue;
+		}
+
+		/* set scaler mode */
+		if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) {
+			/*
+			 * when only 1 scaler is in use on either pipe A or B,
+			 * scaler 0 operates in high quality (HQ) mode.
+			 * In this case use scaler 0 to take advantage of HQ mode
+			 */
+			*scaler_id = 0;
+			scaler_state->scalers[0].in_use = 1;
+			scaler_state->scalers[0].mode = PS_SCALER_MODE_HQ;
+			scaler_state->scalers[1].in_use = 0;
+		} else {
+			scaler_state->scalers[*scaler_id].mode = PS_SCALER_MODE_DYN;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index cb383a0..dc8e136 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -162,6 +162,30 @@
 			(1 << drm_plane_index(plane));
 	}
 
+	if (state->fb && intel_rotation_90_or_270(state->rotation)) {
+		if (!(state->fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+			state->fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)) {
+			DRM_DEBUG_KMS("Y/Yf tiling required for 90/270!\n");
+			return -EINVAL;
+		}
+
+		/*
+		 * 90/270 is not allowed with RGB64 16:16:16:16,
+		 * RGB 16-bit 5:6:5, and Indexed 8-bit.
+		 * TBD: Add RGB64 case once its added in supported format list.
+		 */
+		switch (state->fb->pixel_format) {
+		case DRM_FORMAT_C8:
+		case DRM_FORMAT_RGB565:
+			DRM_DEBUG_KMS("Unsupported pixel format %s for 90/270!\n",
+					drm_get_format_name(state->fb->pixel_format));
+			return -EINVAL;
+
+		default:
+			break;
+		}
+	}
+
 	return intel_plane->check_plane(plane, intel_state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 2396cc7..f72e93a 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -28,7 +28,6 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
-#include "intel_drv.h"
 #include "i915_drv.h"
 
 /**
@@ -485,7 +484,8 @@
 		return -ENODEV;
 
 	intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
-	ret = intel_ddi_get_cdclk_freq(dev_priv);
+	ret = dev_priv->display.get_display_clock_speed(dev_priv->dev);
+
 	intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index c684085..c08368c 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -438,7 +438,7 @@
 		if (block_size >= sizeof(*general)) {
 			int bus_pin = general->crt_ddc_gmbus_pin;
 			DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin);
-			if (intel_gmbus_is_port_valid(bus_pin))
+			if (intel_gmbus_is_valid_pin(dev_priv, bus_pin))
 				dev_priv->vbt.crt_ddc_pin = bus_pin;
 		} else {
 			DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n",
@@ -447,6 +447,12 @@
 	}
 }
 
+static union child_device_config *
+child_device_ptr(struct bdb_general_definitions *p_defs, int i)
+{
+	return (void *) &p_defs->devices[i * p_defs->child_dev_size];
+}
+
 static void
 parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
 			  struct bdb_header *bdb)
@@ -476,10 +482,10 @@
 	block_size = get_blocksize(p_defs);
 	/* get the number of child device */
 	child_device_num = (block_size - sizeof(*p_defs)) /
-				sizeof(*p_child);
+		p_defs->child_dev_size;
 	count = 0;
 	for (i = 0; i < child_device_num; i++) {
-		p_child = &(p_defs->devices[i]);
+		p_child = child_device_ptr(p_defs, i);
 		if (!p_child->old.device_type) {
 			/* skip the device block if device type is invalid */
 			continue;
@@ -1067,25 +1073,19 @@
 		DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n");
 		return;
 	}
-	/* judge whether the size of child device meets the requirements.
-	 * If the child device size obtained from general definition block
-	 * is different with sizeof(struct child_device_config), skip the
-	 * parsing of sdvo device info
-	 */
-	if (p_defs->child_dev_size != sizeof(*p_child)) {
-		/* different child dev size . Ignore it */
-		DRM_DEBUG_KMS("different child size is found. Invalid.\n");
+	if (p_defs->child_dev_size < sizeof(*p_child)) {
+		DRM_ERROR("General definiton block child device size is too small.\n");
 		return;
 	}
 	/* get the block size of general definitions */
 	block_size = get_blocksize(p_defs);
 	/* get the number of child device */
 	child_device_num = (block_size - sizeof(*p_defs)) /
-				sizeof(*p_child);
+				p_defs->child_dev_size;
 	count = 0;
 	/* get the number of child device that is present */
 	for (i = 0; i < child_device_num; i++) {
-		p_child = &(p_defs->devices[i]);
+		p_child = child_device_ptr(p_defs, i);
 		if (!p_child->common.device_type) {
 			/* skip the device block if device type is invalid */
 			continue;
@@ -1105,7 +1105,7 @@
 	dev_priv->vbt.child_dev_num = count;
 	count = 0;
 	for (i = 0; i < child_device_num; i++) {
-		p_child = &(p_defs->devices[i]);
+		p_child = child_device_ptr(p_defs, i);
 		if (!p_child->common.device_type) {
 			/* skip the device block if device type is invalid */
 			continue;
@@ -1133,7 +1133,7 @@
 	struct drm_device *dev = dev_priv->dev;
 	enum port port;
 
-	dev_priv->vbt.crt_ddc_pin = GMBUS_PORT_VGADDC;
+	dev_priv->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC;
 
 	/* Default to having backlight */
 	dev_priv->vbt.backlight.present = true;
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index 6afd5be..af0b476 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -277,9 +277,9 @@
 	 * And the device num is related with the size of general definition
 	 * block. It is obtained by using the following formula:
 	 * number = (block_size - sizeof(bdb_general_definitions))/
-	 *	     sizeof(child_device_config);
+	 *	     defs->child_dev_size;
 	 */
-	union child_device_config devices[0];
+	uint8_t devices[0];
 } __packed;
 
 /* Mask for DRRS / Panel Channel / SSC / BLT control bits extraction */
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 515d712..93bb515 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -747,7 +747,7 @@
 		goto out;
 
 	/* Try to probe digital port for output in DVI-I -> VGA mode. */
-	i2c = intel_gmbus_get_adapter(dev_priv, GMBUS_PORT_DPB);
+	i2c = intel_gmbus_get_adapter(dev_priv, GMBUS_PIN_DPB);
 	ret = intel_crt_ddc_get_modes(connector, i2c);
 
 out:
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 3eb0efc..9c1e74a 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -155,33 +155,100 @@
 
 
 static const struct ddi_buf_trans skl_ddi_translations_hdmi[] = {
-					/* Idx	NT mV   T mV    db  */
-	{ 0x00004014, 0x00000087 },	/* 0:	800	1000	2   */
+	{ 0x00000018, 0x000000ac },
+	{ 0x00005012, 0x0000009d },
+	{ 0x00007011, 0x00000088 },
+	{ 0x00000018, 0x000000a1 },
+	{ 0x00000018, 0x00000098 },
+	{ 0x00004013, 0x00000088 },
+	{ 0x00006012, 0x00000087 },
+	{ 0x00000018, 0x000000df },
+	{ 0x00003015, 0x00000087 },
+	{ 0x00003015, 0x000000c7 },
+	{ 0x00000018, 0x000000c7 },
 };
 
-enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder)
+struct bxt_ddi_buf_trans {
+	u32 margin;	/* swing value */
+	u32 scale;	/* scale value */
+	u32 enable;	/* scale enable */
+	u32 deemphasis;
+	bool default_index; /* true if the entry represents default value */
+};
+
+/* BSpec does not define separate vswing/pre-emphasis values for eDP.
+ * Using DP values for eDP as well.
+ */
+static const struct bxt_ddi_buf_trans bxt_ddi_translations_dp[] = {
+					/* Idx	NT mV diff	db  */
+	{ 52,  0,    0, 128, true  },	/* 0:	400		0   */
+	{ 78,  0,    0, 85,  false },	/* 1:	400		3.5 */
+	{ 104, 0,    0, 64,  false },	/* 2:	400		6   */
+	{ 154, 0,    0, 43,  false },	/* 3:	400		9.5 */
+	{ 77,  0,    0, 128, false },	/* 4:	600		0   */
+	{ 116, 0,    0, 85,  false },	/* 5:	600		3.5 */
+	{ 154, 0,    0, 64,  false },	/* 6:	600		6   */
+	{ 102, 0,    0, 128, false },	/* 7:	800		0   */
+	{ 154, 0,    0, 85,  false },	/* 8:	800		3.5 */
+	{ 154, 0x9A, 1, 128, false },  /* 9:	1200		0   */
+};
+
+/* BSpec has 2 recommended values - entries 0 and 8.
+ * Using the entry with higher vswing.
+ */
+static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = {
+					/* Idx	NT mV diff	db  */
+	{ 52,  0,    0, 128, false },	/* 0:	400		0   */
+	{ 52,  0,    0, 85,  false },	/* 1:	400		3.5 */
+	{ 52,  0,    0, 64,  false },	/* 2:	400		6   */
+	{ 42,  0,    0, 43,  false },	/* 3:	400		9.5 */
+	{ 77,  0,    0, 128, false },	/* 4:	600		0   */
+	{ 77,  0,    0, 85,  false },	/* 5:	600		3.5 */
+	{ 77,  0,    0, 64,  false },	/* 6:	600		6   */
+	{ 102, 0,    0, 128, false },	/* 7:	800		0   */
+	{ 102, 0,    0, 85,  false },	/* 8:	800		3.5 */
+	{ 154, 0x9A, 1, 128, true },	/* 9:	1200		0   */
+};
+
+static void ddi_get_encoder_port(struct intel_encoder *intel_encoder,
+				 struct intel_digital_port **dig_port,
+				 enum port *port)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
 	int type = intel_encoder->type;
 
 	if (type == INTEL_OUTPUT_DP_MST) {
-		struct intel_digital_port *intel_dig_port = enc_to_mst(encoder)->primary;
-		return intel_dig_port->port;
+		*dig_port = enc_to_mst(encoder)->primary;
+		*port = (*dig_port)->port;
 	} else if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP ||
 	    type == INTEL_OUTPUT_HDMI || type == INTEL_OUTPUT_UNKNOWN) {
-		struct intel_digital_port *intel_dig_port =
-			enc_to_dig_port(encoder);
-		return intel_dig_port->port;
-
+		*dig_port = enc_to_dig_port(encoder);
+		*port = (*dig_port)->port;
 	} else if (type == INTEL_OUTPUT_ANALOG) {
-		return PORT_E;
-
+		*dig_port = NULL;
+		*port = PORT_E;
 	} else {
 		DRM_ERROR("Invalid DDI encoder type %d\n", type);
 		BUG();
 	}
 }
 
+enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder)
+{
+	struct intel_digital_port *dig_port;
+	enum port port;
+
+	ddi_get_encoder_port(intel_encoder, &dig_port, &port);
+
+	return port;
+}
+
+static bool
+intel_dig_port_supports_hdmi(const struct intel_digital_port *intel_dig_port)
+{
+	return intel_dig_port->hdmi.hdmi_reg;
+}
+
 /*
  * Starting with Haswell, DDI port buffers must be programmed with correct
  * values in advance. The buffer values are different for FDI and DP modes,
@@ -189,7 +256,8 @@
  * in either FDI or DP modes only, as HDMI connections will work with both
  * of those
  */
-static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port)
+static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
+				      bool supports_hdmi)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 reg;
@@ -202,7 +270,15 @@
 	const struct ddi_buf_trans *ddi_translations_hdmi;
 	const struct ddi_buf_trans *ddi_translations;
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_BROXTON(dev)) {
+		if (!supports_hdmi)
+			return;
+
+		/* Vswing programming for HDMI */
+		bxt_ddi_vswing_sequence(dev, hdmi_level, port,
+					INTEL_OUTPUT_HDMI);
+		return;
+	} else if (IS_SKYLAKE(dev)) {
 		ddi_translations_fdi = NULL;
 		ddi_translations_dp = skl_ddi_translations_dp;
 		n_dp_entries = ARRAY_SIZE(skl_ddi_translations_dp);
@@ -214,16 +290,9 @@
 			n_edp_entries = ARRAY_SIZE(skl_ddi_translations_dp);
 		}
 
-		/*
-		 * On SKL, the recommendation from the hw team is to always use
-		 * a certain type of level shifter (and thus the corresponding
-		 * 800mV+2dB entry). Given that's the only validated entry, we
-		 * override what is in the VBT, at least until further notice.
-		 */
-		hdmi_level = 0;
 		ddi_translations_hdmi = skl_ddi_translations_hdmi;
 		n_hdmi_entries = ARRAY_SIZE(skl_ddi_translations_hdmi);
-		hdmi_default_entry = 0;
+		hdmi_default_entry = 7;
 	} else if (IS_BROADWELL(dev)) {
 		ddi_translations_fdi = bdw_ddi_translations_fdi;
 		ddi_translations_dp = bdw_ddi_translations_dp;
@@ -290,6 +359,9 @@
 		reg += 4;
 	}
 
+	if (!supports_hdmi)
+		return;
+
 	/* Choose a good default if VBT is badly populated */
 	if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
 	    hdmi_level >= n_hdmi_entries)
@@ -307,13 +379,28 @@
  */
 void intel_prepare_ddi(struct drm_device *dev)
 {
-	int port;
+	struct intel_encoder *intel_encoder;
+	bool visited[I915_MAX_PORTS] = { 0, };
 
 	if (!HAS_DDI(dev))
 		return;
 
-	for (port = PORT_A; port <= PORT_E; port++)
-		intel_prepare_ddi_buffers(dev, port);
+	for_each_intel_encoder(dev, intel_encoder) {
+		struct intel_digital_port *intel_dig_port;
+		enum port port;
+		bool supports_hdmi;
+
+		ddi_get_encoder_port(intel_encoder, &intel_dig_port, &port);
+
+		if (visited[port])
+			continue;
+
+		supports_hdmi = intel_dig_port &&
+				intel_dig_port_supports_hdmi(intel_dig_port);
+
+		intel_prepare_ddi_buffers(dev, port, supports_hdmi);
+		visited[port] = true;
+	}
 }
 
 static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv,
@@ -322,7 +409,7 @@
 	uint32_t reg = DDI_BUF_CTL(port);
 	int i;
 
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < 16; i++) {
 		udelay(1);
 		if (I915_READ(reg) & DDI_BUF_IS_IDLE)
 			return;
@@ -491,7 +578,7 @@
 	return ret;
 }
 
-static struct intel_encoder *
+struct intel_encoder *
 intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
@@ -878,6 +965,32 @@
 		pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
 }
 
+static int bxt_calc_pll_link(struct drm_i915_private *dev_priv,
+				enum intel_dpll_id dpll)
+{
+	/* FIXME formula not available in bspec */
+	return 0;
+}
+
+static void bxt_ddi_clock_get(struct intel_encoder *encoder,
+				struct intel_crtc_state *pipe_config)
+{
+	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
+	enum port port = intel_ddi_get_encoder_port(encoder);
+	uint32_t dpll = port;
+
+	pipe_config->port_clock =
+		bxt_calc_pll_link(dev_priv, dpll);
+
+	if (pipe_config->has_dp_encoder)
+		pipe_config->base.adjusted_mode.crtc_clock =
+			intel_dotclock_calculate(pipe_config->port_clock,
+							&pipe_config->dp_m_n);
+	else
+		pipe_config->base.adjusted_mode.crtc_clock =
+							pipe_config->port_clock;
+}
+
 void intel_ddi_clock_get(struct intel_encoder *encoder,
 			 struct intel_crtc_state *pipe_config)
 {
@@ -885,8 +998,10 @@
 
 	if (INTEL_INFO(dev)->gen <= 8)
 		hsw_ddi_clock_get(encoder, pipe_config);
-	else
+	else if (IS_SKYLAKE(dev))
 		skl_ddi_clock_get(encoder, pipe_config);
+	else if (IS_BROXTON(dev))
+		bxt_ddi_clock_get(encoder, pipe_config);
 }
 
 static void
@@ -1210,6 +1325,132 @@
 	return true;
 }
 
+/* bxt clock parameters */
+struct bxt_clk_div {
+	uint32_t p1;
+	uint32_t p2;
+	uint32_t m2_int;
+	uint32_t m2_frac;
+	bool m2_frac_en;
+	uint32_t n;
+	uint32_t prop_coef;
+	uint32_t int_coef;
+	uint32_t gain_ctl;
+	uint32_t targ_cnt;
+	uint32_t lanestagger;
+};
+
+/* pre-calculated values for DP linkrates */
+static struct bxt_clk_div bxt_dp_clk_val[7] = {
+	/* 162 */ {4, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
+	/* 270 */ {4, 1, 27,       0, 0, 1, 3,  8, 1, 9, 0xd},
+	/* 540 */ {2, 1, 27,       0, 0, 1, 3,  8, 1, 9, 0x18},
+	/* 216 */ {3, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
+	/* 243 */ {4, 1, 24, 1258291, 1, 1, 5, 11, 2, 9, 0xd},
+	/* 324 */ {4, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
+	/* 432 */ {3, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0x18}
+};
+
+static bool
+bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
+		   struct intel_crtc_state *crtc_state,
+		   struct intel_encoder *intel_encoder,
+		   int clock)
+{
+	struct intel_shared_dpll *pll;
+	struct bxt_clk_div clk_div = {0};
+
+	if (intel_encoder->type == INTEL_OUTPUT_HDMI) {
+		intel_clock_t best_clock;
+
+		/* Calculate HDMI div */
+		/*
+		 * FIXME: tie the following calculation into
+		 * i9xx_crtc_compute_clock
+		 */
+		if (!bxt_find_best_dpll(crtc_state, clock, &best_clock)) {
+			DRM_DEBUG_DRIVER("no PLL dividers found for clock %d pipe %c\n",
+					 clock, pipe_name(intel_crtc->pipe));
+			return false;
+		}
+
+		clk_div.p1 = best_clock.p1;
+		clk_div.p2 = best_clock.p2;
+		WARN_ON(best_clock.m1 != 2);
+		clk_div.n = best_clock.n;
+		clk_div.m2_int = best_clock.m2 >> 22;
+		clk_div.m2_frac = best_clock.m2 & ((1 << 22) - 1);
+		clk_div.m2_frac_en = clk_div.m2_frac != 0;
+
+		/* FIXME: set coef, gain, targcnt based on freq band */
+		clk_div.prop_coef = 5;
+		clk_div.int_coef = 11;
+		clk_div.gain_ctl = 2;
+		clk_div.targ_cnt = 9;
+		if (clock > 270000)
+			clk_div.lanestagger = 0x18;
+		else if (clock > 135000)
+			clk_div.lanestagger = 0x0d;
+		else if (clock > 67000)
+			clk_div.lanestagger = 0x07;
+		else if (clock > 33000)
+			clk_div.lanestagger = 0x04;
+		else
+			clk_div.lanestagger = 0x02;
+	} else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
+			intel_encoder->type == INTEL_OUTPUT_EDP) {
+		struct drm_encoder *encoder = &intel_encoder->base;
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		switch (intel_dp->link_bw) {
+		case DP_LINK_BW_1_62:
+			clk_div = bxt_dp_clk_val[0];
+			break;
+		case DP_LINK_BW_2_7:
+			clk_div = bxt_dp_clk_val[1];
+			break;
+		case DP_LINK_BW_5_4:
+			clk_div = bxt_dp_clk_val[2];
+			break;
+		default:
+			clk_div = bxt_dp_clk_val[0];
+			DRM_ERROR("Unknown link rate\n");
+		}
+	}
+
+	crtc_state->dpll_hw_state.ebb0 =
+		PORT_PLL_P1(clk_div.p1) | PORT_PLL_P2(clk_div.p2);
+	crtc_state->dpll_hw_state.pll0 = clk_div.m2_int;
+	crtc_state->dpll_hw_state.pll1 = PORT_PLL_N(clk_div.n);
+	crtc_state->dpll_hw_state.pll2 = clk_div.m2_frac;
+
+	if (clk_div.m2_frac_en)
+		crtc_state->dpll_hw_state.pll3 =
+			PORT_PLL_M2_FRAC_ENABLE;
+
+	crtc_state->dpll_hw_state.pll6 =
+		clk_div.prop_coef | PORT_PLL_INT_COEFF(clk_div.int_coef);
+	crtc_state->dpll_hw_state.pll6 |=
+		PORT_PLL_GAIN_CTL(clk_div.gain_ctl);
+
+	crtc_state->dpll_hw_state.pll8 = clk_div.targ_cnt;
+
+	crtc_state->dpll_hw_state.pcsdw12 =
+		LANESTAGGER_STRAP_OVRD | clk_div.lanestagger;
+
+	pll = intel_get_shared_dpll(intel_crtc, crtc_state);
+	if (pll == NULL) {
+		DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
+			pipe_name(intel_crtc->pipe));
+		return false;
+	}
+
+	/* shared DPLL id 0 is DPLL A */
+	crtc_state->ddi_pll_sel = pll->id;
+
+	return true;
+}
+
 /*
  * Tries to find a *shared* PLL for the CRTC and store it in
  * intel_crtc->ddi_pll_sel.
@@ -1228,6 +1469,9 @@
 	if (IS_SKYLAKE(dev))
 		return skl_ddi_pll_select(intel_crtc, crtc_state,
 					  intel_encoder, clock);
+	else if (IS_BROXTON(dev))
+		return bxt_ddi_pll_select(intel_crtc, crtc_state,
+					  intel_encoder, clock);
 	else
 		return hsw_ddi_pll_select(intel_crtc, crtc_state,
 					  intel_encoder, clock);
@@ -1519,6 +1763,67 @@
 			   TRANS_CLK_SEL_DISABLED);
 }
 
+void bxt_ddi_vswing_sequence(struct drm_device *dev, u32 level,
+			     enum port port, int type)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	const struct bxt_ddi_buf_trans *ddi_translations;
+	u32 n_entries, i;
+	uint32_t val;
+
+	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
+		n_entries = ARRAY_SIZE(bxt_ddi_translations_dp);
+		ddi_translations = bxt_ddi_translations_dp;
+	} else if (type == INTEL_OUTPUT_HDMI) {
+		n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi);
+		ddi_translations = bxt_ddi_translations_hdmi;
+	} else {
+		DRM_DEBUG_KMS("Vswing programming not done for encoder %d\n",
+				type);
+		return;
+	}
+
+	/* Check if default value has to be used */
+	if (level >= n_entries ||
+	    (type == INTEL_OUTPUT_HDMI && level == HDMI_LEVEL_SHIFT_UNKNOWN)) {
+		for (i = 0; i < n_entries; i++) {
+			if (ddi_translations[i].default_index) {
+				level = i;
+				break;
+			}
+		}
+	}
+
+	/*
+	 * While we write to the group register to program all lanes at once we
+	 * can read only lane registers and we pick lanes 0/1 for that.
+	 */
+	val = I915_READ(BXT_PORT_PCS_DW10_LN01(port));
+	val &= ~(TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT);
+	I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val);
+
+	val = I915_READ(BXT_PORT_TX_DW2_LN0(port));
+	val &= ~(MARGIN_000 | UNIQ_TRANS_SCALE);
+	val |= ddi_translations[level].margin << MARGIN_000_SHIFT |
+	       ddi_translations[level].scale << UNIQ_TRANS_SCALE_SHIFT;
+	I915_WRITE(BXT_PORT_TX_DW2_GRP(port), val);
+
+	val = I915_READ(BXT_PORT_TX_DW3_LN0(port));
+	val &= ~UNIQE_TRANGE_EN_METHOD;
+	if (ddi_translations[level].enable)
+		val |= UNIQE_TRANGE_EN_METHOD;
+	I915_WRITE(BXT_PORT_TX_DW3_GRP(port), val);
+
+	val = I915_READ(BXT_PORT_TX_DW4_LN0(port));
+	val &= ~DE_EMPHASIS;
+	val |= ddi_translations[level].deemphasis << DEEMPH_SHIFT;
+	I915_WRITE(BXT_PORT_TX_DW4_GRP(port), val);
+
+	val = I915_READ(BXT_PORT_PCS_DW10_LN01(port));
+	val |= TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT;
+	I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val);
+}
+
 static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
@@ -1527,6 +1832,7 @@
 	struct intel_crtc *crtc = to_intel_crtc(encoder->crtc);
 	enum port port = intel_ddi_get_encoder_port(intel_encoder);
 	int type = intel_encoder->type;
+	int hdmi_level;
 
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
@@ -1565,7 +1871,7 @@
 
 		I915_WRITE(DPLL_CTRL2, val);
 
-	} else {
+	} else if (INTEL_INFO(dev)->gen < 9) {
 		WARN_ON(crtc->config->ddi_pll_sel == PORT_CLK_SEL_NONE);
 		I915_WRITE(PORT_CLK_SEL(port), crtc->config->ddi_pll_sel);
 	}
@@ -1583,6 +1889,12 @@
 	} else if (type == INTEL_OUTPUT_HDMI) {
 		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 
+		if (IS_BROXTON(dev)) {
+			hdmi_level = dev_priv->vbt.
+				ddi_port_info[port].hdmi_level_shift;
+			bxt_ddi_vswing_sequence(dev, hdmi_level, port,
+					INTEL_OUTPUT_HDMI);
+		}
 		intel_hdmi->set_infoframes(encoder,
 					   crtc->config->has_hdmi_sink,
 					   &crtc->config->base.adjusted_mode);
@@ -1624,7 +1936,7 @@
 	if (IS_SKYLAKE(dev))
 		I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) |
 					DPLL_CTRL2_DDI_CLK_OFF(port)));
-	else
+	else if (INTEL_INFO(dev)->gen < 9)
 		I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE);
 }
 
@@ -1689,105 +2001,6 @@
 	}
 }
 
-static int skl_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
-	uint32_t cdctl = I915_READ(CDCLK_CTL);
-	uint32_t linkrate;
-
-	if (!(lcpll1 & LCPLL_PLL_ENABLE)) {
-		WARN(1, "LCPLL1 not enabled\n");
-		return 24000; /* 24MHz is the cd freq with NSSC ref */
-	}
-
-	if ((cdctl & CDCLK_FREQ_SEL_MASK) == CDCLK_FREQ_540)
-		return 540000;
-
-	linkrate = (I915_READ(DPLL_CTRL1) &
-		    DPLL_CRTL1_LINK_RATE_MASK(SKL_DPLL0)) >> 1;
-
-	if (linkrate == DPLL_CRTL1_LINK_RATE_2160 ||
-	    linkrate == DPLL_CRTL1_LINK_RATE_1080) {
-		/* vco 8640 */
-		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
-		case CDCLK_FREQ_450_432:
-			return 432000;
-		case CDCLK_FREQ_337_308:
-			return 308570;
-		case CDCLK_FREQ_675_617:
-			return 617140;
-		default:
-			WARN(1, "Unknown cd freq selection\n");
-		}
-	} else {
-		/* vco 8100 */
-		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
-		case CDCLK_FREQ_450_432:
-			return 450000;
-		case CDCLK_FREQ_337_308:
-			return 337500;
-		case CDCLK_FREQ_675_617:
-			return 675000;
-		default:
-			WARN(1, "Unknown cd freq selection\n");
-		}
-	}
-
-	/* error case, do as if DPLL0 isn't enabled */
-	return 24000;
-}
-
-static int bdw_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	uint32_t lcpll = I915_READ(LCPLL_CTL);
-	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
-
-	if (lcpll & LCPLL_CD_SOURCE_FCLK)
-		return 800000;
-	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
-		return 450000;
-	else if (freq == LCPLL_CLK_FREQ_450)
-		return 450000;
-	else if (freq == LCPLL_CLK_FREQ_54O_BDW)
-		return 540000;
-	else if (freq == LCPLL_CLK_FREQ_337_5_BDW)
-		return 337500;
-	else
-		return 675000;
-}
-
-static int hsw_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-	uint32_t lcpll = I915_READ(LCPLL_CTL);
-	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
-
-	if (lcpll & LCPLL_CD_SOURCE_FCLK)
-		return 800000;
-	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
-		return 450000;
-	else if (freq == LCPLL_CLK_FREQ_450)
-		return 450000;
-	else if (IS_HSW_ULT(dev))
-		return 337500;
-	else
-		return 540000;
-}
-
-int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-
-	if (IS_SKYLAKE(dev))
-		return skl_get_cdclk_freq(dev_priv);
-
-	if (IS_BROADWELL(dev))
-		return bdw_get_cdclk_freq(dev_priv);
-
-	/* Haswell */
-	return hsw_get_cdclk_freq(dev_priv);
-}
-
 static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv,
 			       struct intel_shared_dpll *pll)
 {
@@ -1963,6 +2176,293 @@
 	}
 }
 
+static void broxton_phy_init(struct drm_i915_private *dev_priv,
+			     enum dpio_phy phy)
+{
+	enum port port;
+	uint32_t val;
+
+	val = I915_READ(BXT_P_CR_GT_DISP_PWRON);
+	val |= GT_DISPLAY_POWER_ON(phy);
+	I915_WRITE(BXT_P_CR_GT_DISP_PWRON, val);
+
+	/* Considering 10ms timeout until BSpec is updated */
+	if (wait_for(I915_READ(BXT_PORT_CL1CM_DW0(phy)) & PHY_POWER_GOOD, 10))
+		DRM_ERROR("timeout during PHY%d power on\n", phy);
+
+	for (port =  (phy == DPIO_PHY0 ? PORT_B : PORT_A);
+	     port <= (phy == DPIO_PHY0 ? PORT_C : PORT_A); port++) {
+		int lane;
+
+		for (lane = 0; lane < 4; lane++) {
+			val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane));
+			/*
+			 * Note that on CHV this flag is called UPAR, but has
+			 * the same function.
+			 */
+			val &= ~LATENCY_OPTIM;
+			if (lane != 1)
+				val |= LATENCY_OPTIM;
+
+			I915_WRITE(BXT_PORT_TX_DW14_LN(port, lane), val);
+		}
+	}
+
+	/* Program PLL Rcomp code offset */
+	val = I915_READ(BXT_PORT_CL1CM_DW9(phy));
+	val &= ~IREF0RC_OFFSET_MASK;
+	val |= 0xE4 << IREF0RC_OFFSET_SHIFT;
+	I915_WRITE(BXT_PORT_CL1CM_DW9(phy), val);
+
+	val = I915_READ(BXT_PORT_CL1CM_DW10(phy));
+	val &= ~IREF1RC_OFFSET_MASK;
+	val |= 0xE4 << IREF1RC_OFFSET_SHIFT;
+	I915_WRITE(BXT_PORT_CL1CM_DW10(phy), val);
+
+	/* Program power gating */
+	val = I915_READ(BXT_PORT_CL1CM_DW28(phy));
+	val |= OCL1_POWER_DOWN_EN | DW28_OLDO_DYN_PWR_DOWN_EN |
+		SUS_CLK_CONFIG;
+	I915_WRITE(BXT_PORT_CL1CM_DW28(phy), val);
+
+	if (phy == DPIO_PHY0) {
+		val = I915_READ(BXT_PORT_CL2CM_DW6_BC);
+		val |= DW6_OLDO_DYN_PWR_DOWN_EN;
+		I915_WRITE(BXT_PORT_CL2CM_DW6_BC, val);
+	}
+
+	val = I915_READ(BXT_PORT_CL1CM_DW30(phy));
+	val &= ~OCL2_LDOFUSE_PWR_DIS;
+	/*
+	 * On PHY1 disable power on the second channel, since no port is
+	 * connected there. On PHY0 both channels have a port, so leave it
+	 * enabled.
+	 * TODO: port C is only connected on BXT-P, so on BXT0/1 we should
+	 * power down the second channel on PHY0 as well.
+	 */
+	if (phy == DPIO_PHY1)
+		val |= OCL2_LDOFUSE_PWR_DIS;
+	I915_WRITE(BXT_PORT_CL1CM_DW30(phy), val);
+
+	if (phy == DPIO_PHY0) {
+		uint32_t grc_code;
+		/*
+		 * PHY0 isn't connected to an RCOMP resistor so copy over
+		 * the corresponding calibrated value from PHY1, and disable
+		 * the automatic calibration on PHY0.
+		 */
+		if (wait_for(I915_READ(BXT_PORT_REF_DW3(DPIO_PHY1)) & GRC_DONE,
+			     10))
+			DRM_ERROR("timeout waiting for PHY1 GRC\n");
+
+		val = I915_READ(BXT_PORT_REF_DW6(DPIO_PHY1));
+		val = (val & GRC_CODE_MASK) >> GRC_CODE_SHIFT;
+		grc_code = val << GRC_CODE_FAST_SHIFT |
+			   val << GRC_CODE_SLOW_SHIFT |
+			   val;
+		I915_WRITE(BXT_PORT_REF_DW6(DPIO_PHY0), grc_code);
+
+		val = I915_READ(BXT_PORT_REF_DW8(DPIO_PHY0));
+		val |= GRC_DIS | GRC_RDY_OVRD;
+		I915_WRITE(BXT_PORT_REF_DW8(DPIO_PHY0), val);
+	}
+
+	val = I915_READ(BXT_PHY_CTL_FAMILY(phy));
+	val |= COMMON_RESET_DIS;
+	I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val);
+}
+
+void broxton_ddi_phy_init(struct drm_device *dev)
+{
+	/* Enable PHY1 first since it provides Rcomp for PHY0 */
+	broxton_phy_init(dev->dev_private, DPIO_PHY1);
+	broxton_phy_init(dev->dev_private, DPIO_PHY0);
+}
+
+static void broxton_phy_uninit(struct drm_i915_private *dev_priv,
+			       enum dpio_phy phy)
+{
+	uint32_t val;
+
+	val = I915_READ(BXT_PHY_CTL_FAMILY(phy));
+	val &= ~COMMON_RESET_DIS;
+	I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val);
+}
+
+void broxton_ddi_phy_uninit(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	broxton_phy_uninit(dev_priv, DPIO_PHY1);
+	broxton_phy_uninit(dev_priv, DPIO_PHY0);
+
+	/* FIXME: do this in broxton_phy_uninit per phy */
+	I915_WRITE(BXT_P_CR_GT_DISP_PWRON, 0);
+}
+
+static const char * const bxt_ddi_pll_names[] = {
+	"PORT PLL A",
+	"PORT PLL B",
+	"PORT PLL C",
+};
+
+static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv,
+				struct intel_shared_dpll *pll)
+{
+	uint32_t temp;
+	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
+
+	temp = I915_READ(BXT_PORT_PLL_ENABLE(port));
+	temp &= ~PORT_PLL_REF_SEL;
+	/* Non-SSC reference */
+	I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp);
+
+	/* Disable 10 bit clock */
+	temp = I915_READ(BXT_PORT_PLL_EBB_4(port));
+	temp &= ~PORT_PLL_10BIT_CLK_ENABLE;
+	I915_WRITE(BXT_PORT_PLL_EBB_4(port), temp);
+
+	/* Write P1 & P2 */
+	temp = I915_READ(BXT_PORT_PLL_EBB_0(port));
+	temp &= ~(PORT_PLL_P1_MASK | PORT_PLL_P2_MASK);
+	temp |= pll->config.hw_state.ebb0;
+	I915_WRITE(BXT_PORT_PLL_EBB_0(port), temp);
+
+	/* Write M2 integer */
+	temp = I915_READ(BXT_PORT_PLL(port, 0));
+	temp &= ~PORT_PLL_M2_MASK;
+	temp |= pll->config.hw_state.pll0;
+	I915_WRITE(BXT_PORT_PLL(port, 0), temp);
+
+	/* Write N */
+	temp = I915_READ(BXT_PORT_PLL(port, 1));
+	temp &= ~PORT_PLL_N_MASK;
+	temp |= pll->config.hw_state.pll1;
+	I915_WRITE(BXT_PORT_PLL(port, 1), temp);
+
+	/* Write M2 fraction */
+	temp = I915_READ(BXT_PORT_PLL(port, 2));
+	temp &= ~PORT_PLL_M2_FRAC_MASK;
+	temp |= pll->config.hw_state.pll2;
+	I915_WRITE(BXT_PORT_PLL(port, 2), temp);
+
+	/* Write M2 fraction enable */
+	temp = I915_READ(BXT_PORT_PLL(port, 3));
+	temp &= ~PORT_PLL_M2_FRAC_ENABLE;
+	temp |= pll->config.hw_state.pll3;
+	I915_WRITE(BXT_PORT_PLL(port, 3), temp);
+
+	/* Write coeff */
+	temp = I915_READ(BXT_PORT_PLL(port, 6));
+	temp &= ~PORT_PLL_PROP_COEFF_MASK;
+	temp &= ~PORT_PLL_INT_COEFF_MASK;
+	temp &= ~PORT_PLL_GAIN_CTL_MASK;
+	temp |= pll->config.hw_state.pll6;
+	I915_WRITE(BXT_PORT_PLL(port, 6), temp);
+
+	/* Write calibration val */
+	temp = I915_READ(BXT_PORT_PLL(port, 8));
+	temp &= ~PORT_PLL_TARGET_CNT_MASK;
+	temp |= pll->config.hw_state.pll8;
+	I915_WRITE(BXT_PORT_PLL(port, 8), temp);
+
+	/*
+	 * FIXME: program PORT_PLL_9/i_lockthresh according to the latest
+	 * specification update.
+	 */
+
+	/* Recalibrate with new settings */
+	temp = I915_READ(BXT_PORT_PLL_EBB_4(port));
+	temp |= PORT_PLL_RECALIBRATE;
+	I915_WRITE(BXT_PORT_PLL_EBB_4(port), temp);
+	/* Enable 10 bit clock */
+	temp |= PORT_PLL_10BIT_CLK_ENABLE;
+	I915_WRITE(BXT_PORT_PLL_EBB_4(port), temp);
+
+	/* Enable PLL */
+	temp = I915_READ(BXT_PORT_PLL_ENABLE(port));
+	temp |= PORT_PLL_ENABLE;
+	I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp);
+	POSTING_READ(BXT_PORT_PLL_ENABLE(port));
+
+	if (wait_for_atomic_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) &
+			PORT_PLL_LOCK), 200))
+		DRM_ERROR("PLL %d not locked\n", port);
+
+	/*
+	 * While we write to the group register to program all lanes at once we
+	 * can read only lane registers and we pick lanes 0/1 for that.
+	 */
+	temp = I915_READ(BXT_PORT_PCS_DW12_LN01(port));
+	temp &= ~LANE_STAGGER_MASK;
+	temp &= ~LANESTAGGER_STRAP_OVRD;
+	temp |= pll->config.hw_state.pcsdw12;
+	I915_WRITE(BXT_PORT_PCS_DW12_GRP(port), temp);
+}
+
+static void bxt_ddi_pll_disable(struct drm_i915_private *dev_priv,
+					struct intel_shared_dpll *pll)
+{
+	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
+	uint32_t temp;
+
+	temp = I915_READ(BXT_PORT_PLL_ENABLE(port));
+	temp &= ~PORT_PLL_ENABLE;
+	I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp);
+	POSTING_READ(BXT_PORT_PLL_ENABLE(port));
+}
+
+static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
+					struct intel_shared_dpll *pll,
+					struct intel_dpll_hw_state *hw_state)
+{
+	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
+	uint32_t val;
+
+	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+		return false;
+
+	val = I915_READ(BXT_PORT_PLL_ENABLE(port));
+	if (!(val & PORT_PLL_ENABLE))
+		return false;
+
+	hw_state->ebb0 = I915_READ(BXT_PORT_PLL_EBB_0(port));
+	hw_state->pll0 = I915_READ(BXT_PORT_PLL(port, 0));
+	hw_state->pll1 = I915_READ(BXT_PORT_PLL(port, 1));
+	hw_state->pll2 = I915_READ(BXT_PORT_PLL(port, 2));
+	hw_state->pll3 = I915_READ(BXT_PORT_PLL(port, 3));
+	hw_state->pll6 = I915_READ(BXT_PORT_PLL(port, 6));
+	hw_state->pll8 = I915_READ(BXT_PORT_PLL(port, 8));
+	/*
+	 * While we write to the group register to program all lanes at once we
+	 * can read only lane registers. We configure all lanes the same way, so
+	 * here just read out lanes 0/1 and output a note if lanes 2/3 differ.
+	 */
+	hw_state->pcsdw12 = I915_READ(BXT_PORT_PCS_DW12_LN01(port));
+	if (I915_READ(BXT_PORT_PCS_DW12_LN23(port) != hw_state->pcsdw12))
+		DRM_DEBUG_DRIVER("lane stagger config different for lane 01 (%08x) and 23 (%08x)\n",
+				 hw_state->pcsdw12,
+				 I915_READ(BXT_PORT_PCS_DW12_LN23(port)));
+
+	return true;
+}
+
+static void bxt_shared_dplls_init(struct drm_i915_private *dev_priv)
+{
+	int i;
+
+	dev_priv->num_shared_dpll = 3;
+
+	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+		dev_priv->shared_dplls[i].id = i;
+		dev_priv->shared_dplls[i].name = bxt_ddi_pll_names[i];
+		dev_priv->shared_dplls[i].disable = bxt_ddi_pll_disable;
+		dev_priv->shared_dplls[i].enable = bxt_ddi_pll_enable;
+		dev_priv->shared_dplls[i].get_hw_state =
+			bxt_ddi_pll_get_hw_state;
+	}
+}
+
 void intel_ddi_pll_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1970,15 +2470,20 @@
 
 	if (IS_SKYLAKE(dev))
 		skl_shared_dplls_init(dev_priv);
+	else if (IS_BROXTON(dev))
+		bxt_shared_dplls_init(dev_priv);
 	else
 		hsw_shared_dplls_init(dev_priv);
 
 	DRM_DEBUG_KMS("CDCLK running at %dKHz\n",
-		      intel_ddi_get_cdclk_freq(dev_priv));
+		      dev_priv->display.get_display_clock_speed(dev));
 
 	if (IS_SKYLAKE(dev)) {
 		if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE))
 			DRM_ERROR("LCPLL1 is disabled\n");
+	} else if (IS_BROXTON(dev)) {
+		broxton_init_cdclk(dev);
+		broxton_ddi_phy_init(dev);
 	} else {
 		/*
 		 * The LCPLL register should be turned on by the BIOS. For now
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d547d9c8..3094b08 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -103,6 +103,10 @@
 			    const struct intel_crtc_state *pipe_config);
 static void intel_begin_crtc_commit(struct drm_crtc *crtc);
 static void intel_finish_crtc_commit(struct drm_crtc *crtc);
+static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state);
+static int i9xx_get_refclk(const struct intel_crtc_state *crtc_state,
+			   int num_connectors);
 
 static struct intel_encoder *intel_find_encoder(struct intel_connector *connector, int pipe)
 {
@@ -400,6 +404,18 @@
 	.p2 = {	.p2_slow = 1, .p2_fast = 14 },
 };
 
+static const intel_limit_t intel_limits_bxt = {
+	/* FIXME: find real dot limits */
+	.dot = { .min = 0, .max = INT_MAX },
+	.vco = { .min = 4800000, .max = 6480000 },
+	.n = { .min = 1, .max = 1 },
+	.m1 = { .min = 2, .max = 2 },
+	/* FIXME: find real m2 limits */
+	.m2 = { .min = 2 << 22, .max = 255 << 22 },
+	.p1 = { .min = 2, .max = 4 },
+	.p2 = { .p2_slow = 1, .p2_fast = 20 },
+};
+
 static void vlv_clock(int refclk, intel_clock_t *clock)
 {
 	clock->m = clock->m1 * clock->m2;
@@ -511,7 +527,9 @@
 	struct drm_device *dev = crtc_state->base.crtc->dev;
 	const intel_limit_t *limit;
 
-	if (HAS_PCH_SPLIT(dev))
+	if (IS_BROXTON(dev))
+		limit = &intel_limits_bxt;
+	else if (HAS_PCH_SPLIT(dev))
 		limit = intel_ironlake_limit(crtc_state, refclk);
 	else if (IS_G4X(dev)) {
 		limit = intel_g4x_limit(crtc_state);
@@ -596,11 +614,11 @@
 	if (clock->m1  < limit->m1.min  || limit->m1.max  < clock->m1)
 		INTELPllInvalid("m1 out of range\n");
 
-	if (!IS_PINEVIEW(dev) && !IS_VALLEYVIEW(dev))
+	if (!IS_PINEVIEW(dev) && !IS_VALLEYVIEW(dev) && !IS_BROXTON(dev))
 		if (clock->m1 <= clock->m2)
 			INTELPllInvalid("m1 <= m2\n");
 
-	if (!IS_VALLEYVIEW(dev)) {
+	if (!IS_VALLEYVIEW(dev) && !IS_BROXTON(dev)) {
 		if (clock->p < limit->p.min || limit->p.max < clock->p)
 			INTELPllInvalid("p out of range\n");
 		if (clock->m < limit->m.min || limit->m.max < clock->m)
@@ -953,6 +971,15 @@
 	return found;
 }
 
+bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock,
+			intel_clock_t *best_clock)
+{
+	int refclk = i9xx_get_refclk(crtc_state, 0);
+
+	return chv_find_best_dpll(intel_limit(crtc_state, refclk), crtc_state,
+				  target_clock, refclk, NULL, best_clock);
+}
+
 bool intel_crtc_active(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -2104,7 +2131,7 @@
 	 * a plane.  On ILK+ the pipe PLLs are integrated, so we don't
 	 * need the check.
 	 */
-	if (!HAS_PCH_SPLIT(dev_priv->dev))
+	if (HAS_GMCH_DISPLAY(dev_priv->dev))
 		if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI))
 			assert_dsi_pll_enabled(dev_priv);
 		else
@@ -2338,13 +2365,6 @@
 	info->pitch = fb->pitches[0];
 	info->fb_modifier = fb->modifier[0];
 
-	if (!(info->fb_modifier == I915_FORMAT_MOD_Y_TILED ||
-	      info->fb_modifier == I915_FORMAT_MOD_Yf_TILED)) {
-		DRM_DEBUG_KMS(
-			      "Y or Yf tiling is needed for 90/270 rotation!\n");
-		return -EINVAL;
-	}
-
 	return 0;
 }
 
@@ -2936,6 +2956,35 @@
 	return i915_gem_obj_ggtt_offset_view(obj, view);
 }
 
+/*
+ * This function detaches (aka. unbinds) unused scalers in hardware
+ */
+void skl_detach_scalers(struct intel_crtc *intel_crtc)
+{
+	struct drm_device *dev;
+	struct drm_i915_private *dev_priv;
+	struct intel_crtc_scaler_state *scaler_state;
+	int i;
+
+	if (!intel_crtc || !intel_crtc->config)
+		return;
+
+	dev = intel_crtc->base.dev;
+	dev_priv = dev->dev_private;
+	scaler_state = &intel_crtc->config->scaler_state;
+
+	/* loop through and disable scalers that aren't in use */
+	for (i = 0; i < intel_crtc->num_scalers; i++) {
+		if (!scaler_state->scalers[i].in_use) {
+			I915_WRITE(SKL_PS_CTRL(intel_crtc->pipe, i), 0);
+			I915_WRITE(SKL_PS_WIN_POS(intel_crtc->pipe, i), 0);
+			I915_WRITE(SKL_PS_WIN_SZ(intel_crtc->pipe, i), 0);
+			DRM_DEBUG_KMS("CRTC:%d Disabled scaler id %u.%u\n",
+				intel_crtc->base.base.id, intel_crtc->pipe, i);
+		}
+	}
+}
+
 static void skylake_update_primary_plane(struct drm_crtc *crtc,
 					 struct drm_framebuffer *fb,
 					 int x, int y)
@@ -2945,8 +2994,12 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_gem_object *obj;
 	int pipe = intel_crtc->pipe;
-	u32 plane_ctl, stride_div;
+	u32 plane_ctl, stride_div, stride;
+	u32 tile_height, plane_offset, plane_size;
+	unsigned int rotation;
+	int x_offset, y_offset;
 	unsigned long surf_addr;
+	struct drm_plane *plane;
 
 	if (!intel_crtc->primary_enabled) {
 		I915_WRITE(PLANE_CTL(pipe, 0), 0);
@@ -3007,21 +3060,51 @@
 	}
 
 	plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
-	if (crtc->primary->state->rotation == BIT(DRM_ROTATE_180))
+
+	plane = crtc->primary;
+	rotation = plane->state->rotation;
+	switch (rotation) {
+	case BIT(DRM_ROTATE_90):
+		plane_ctl |= PLANE_CTL_ROTATE_90;
+		break;
+
+	case BIT(DRM_ROTATE_180):
 		plane_ctl |= PLANE_CTL_ROTATE_180;
+		break;
+
+	case BIT(DRM_ROTATE_270):
+		plane_ctl |= PLANE_CTL_ROTATE_270;
+		break;
+	}
 
 	obj = intel_fb_obj(fb);
 	stride_div = intel_fb_stride_alignment(dev, fb->modifier[0],
 					       fb->pixel_format);
-	surf_addr = intel_plane_obj_offset(to_intel_plane(crtc->primary), obj);
+	surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj);
+
+	if (intel_rotation_90_or_270(rotation)) {
+		/* stride = Surface height in tiles */
+		tile_height = intel_tile_height(dev, fb->bits_per_pixel,
+						fb->modifier[0]);
+		stride = DIV_ROUND_UP(fb->height, tile_height);
+		x_offset = stride * tile_height - y - (plane->state->src_h >> 16);
+		y_offset = x;
+		plane_size = ((plane->state->src_w >> 16) - 1) << 16 |
+					((plane->state->src_h >> 16) - 1);
+	} else {
+		stride = fb->pitches[0] / stride_div;
+		x_offset = x;
+		y_offset = y;
+		plane_size = ((plane->state->src_h >> 16) - 1) << 16 |
+			((plane->state->src_w >> 16) - 1);
+	}
+	plane_offset = y_offset << 16 | x_offset;
 
 	I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
 	I915_WRITE(PLANE_POS(pipe, 0), 0);
-	I915_WRITE(PLANE_OFFSET(pipe, 0), (y << 16) | x);
-	I915_WRITE(PLANE_SIZE(pipe, 0),
-		   (intel_crtc->config->pipe_src_h - 1) << 16 |
-		   (intel_crtc->config->pipe_src_w - 1));
-	I915_WRITE(PLANE_STRIDE(pipe, 0), fb->pitches[0] / stride_div);
+	I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset);
+	I915_WRITE(PLANE_SIZE(pipe, 0), plane_size);
+	I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
 	I915_WRITE(PLANE_SURF(pipe, 0), surf_addr);
 
 	POSTING_READ(PLANE_SURF(pipe, 0));
@@ -4126,6 +4209,26 @@
 		goto found;
 	}
 
+	if (IS_BROXTON(dev_priv->dev)) {
+		/* PLL is attached to port in bxt */
+		struct intel_encoder *encoder;
+		struct intel_digital_port *intel_dig_port;
+
+		encoder = intel_ddi_get_crtc_new_encoder(crtc_state);
+		if (WARN_ON(!encoder))
+			return NULL;
+
+		intel_dig_port = enc_to_dig_port(&encoder->base);
+		/* 1:1 mapping between ports and PLLs */
+		i = (enum intel_dpll_id)intel_dig_port->port;
+		pll = &dev_priv->shared_dplls[i];
+		DRM_DEBUG_KMS("CRTC:%d using pre-allocated %s\n",
+			crtc->base.base.id, pll->name);
+		WARN_ON(pll->new_config->crtc_mask);
+
+		goto found;
+	}
+
 	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
 		pll = &dev_priv->shared_dplls[i];
 
@@ -4251,16 +4354,175 @@
 	}
 }
 
-static void skylake_pfit_enable(struct intel_crtc *crtc)
+/**
+ * skl_update_scaler_users - Stages update to crtc's scaler state
+ * @intel_crtc: crtc
+ * @crtc_state: crtc_state
+ * @plane: plane (NULL indicates crtc is requesting update)
+ * @plane_state: plane's state
+ * @force_detach: request unconditional detachment of scaler
+ *
+ * This function updates scaler state for requested plane or crtc.
+ * To request scaler usage update for a plane, caller shall pass plane pointer.
+ * To request scaler usage update for crtc, caller shall pass plane pointer
+ * as NULL.
+ *
+ * Return
+ *     0 - scaler_usage updated successfully
+ *    error - requested scaling cannot be supported or other error condition
+ */
+int
+skl_update_scaler_users(
+	struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state,
+	struct intel_plane *intel_plane, struct intel_plane_state *plane_state,
+	int force_detach)
+{
+	int need_scaling;
+	int idx;
+	int src_w, src_h, dst_w, dst_h;
+	int *scaler_id;
+	struct drm_framebuffer *fb;
+	struct intel_crtc_scaler_state *scaler_state;
+
+	if (!intel_crtc || !crtc_state)
+		return 0;
+
+	scaler_state = &crtc_state->scaler_state;
+
+	idx = intel_plane ? drm_plane_index(&intel_plane->base) : SKL_CRTC_INDEX;
+	fb = intel_plane ? plane_state->base.fb : NULL;
+
+	if (intel_plane) {
+		src_w = drm_rect_width(&plane_state->src) >> 16;
+		src_h = drm_rect_height(&plane_state->src) >> 16;
+		dst_w = drm_rect_width(&plane_state->dst);
+		dst_h = drm_rect_height(&plane_state->dst);
+		scaler_id = &plane_state->scaler_id;
+	} else {
+		struct drm_display_mode *adjusted_mode =
+			&crtc_state->base.adjusted_mode;
+		src_w = crtc_state->pipe_src_w;
+		src_h = crtc_state->pipe_src_h;
+		dst_w = adjusted_mode->hdisplay;
+		dst_h = adjusted_mode->vdisplay;
+		scaler_id = &scaler_state->scaler_id;
+	}
+	need_scaling = (src_w != dst_w || src_h != dst_h);
+
+	/*
+	 * if plane is being disabled or scaler is no more required or force detach
+	 *  - free scaler binded to this plane/crtc
+	 *  - in order to do this, update crtc->scaler_usage
+	 *
+	 * Here scaler state in crtc_state is set free so that
+	 * scaler can be assigned to other user. Actual register
+	 * update to free the scaler is done in plane/panel-fit programming.
+	 * For this purpose crtc/plane_state->scaler_id isn't reset here.
+	 */
+	if (force_detach || !need_scaling || (intel_plane &&
+		(!fb || !plane_state->visible))) {
+		if (*scaler_id >= 0) {
+			scaler_state->scaler_users &= ~(1 << idx);
+			scaler_state->scalers[*scaler_id].in_use = 0;
+
+			DRM_DEBUG_KMS("Staged freeing scaler id %d.%d from %s:%d "
+				"crtc_state = %p scaler_users = 0x%x\n",
+				intel_crtc->pipe, *scaler_id, intel_plane ? "PLANE" : "CRTC",
+				intel_plane ? intel_plane->base.base.id :
+				intel_crtc->base.base.id, crtc_state,
+				scaler_state->scaler_users);
+			*scaler_id = -1;
+		}
+		return 0;
+	}
+
+	/* range checks */
+	if (src_w < SKL_MIN_SRC_W || src_h < SKL_MIN_SRC_H ||
+		dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H ||
+
+		src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H ||
+		dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H) {
+		DRM_DEBUG_KMS("%s:%d scaler_user index %u.%u: src %ux%u dst %ux%u "
+			"size is out of scaler range\n",
+			intel_plane ? "PLANE" : "CRTC",
+			intel_plane ? intel_plane->base.base.id : intel_crtc->base.base.id,
+			intel_crtc->pipe, idx, src_w, src_h, dst_w, dst_h);
+		return -EINVAL;
+	}
+
+	/* check colorkey */
+	if (intel_plane && intel_plane->ckey.flags != I915_SET_COLORKEY_NONE) {
+		DRM_DEBUG_KMS("PLANE:%d scaling with color key not allowed",
+			intel_plane->base.base.id);
+		return -EINVAL;
+	}
+
+	/* Check src format */
+	if (intel_plane) {
+		switch (fb->pixel_format) {
+		case DRM_FORMAT_RGB565:
+		case DRM_FORMAT_XBGR8888:
+		case DRM_FORMAT_XRGB8888:
+		case DRM_FORMAT_ABGR8888:
+		case DRM_FORMAT_ARGB8888:
+		case DRM_FORMAT_XRGB2101010:
+		case DRM_FORMAT_ARGB2101010:
+		case DRM_FORMAT_XBGR2101010:
+		case DRM_FORMAT_ABGR2101010:
+		case DRM_FORMAT_YUYV:
+		case DRM_FORMAT_YVYU:
+		case DRM_FORMAT_UYVY:
+		case DRM_FORMAT_VYUY:
+			break;
+		default:
+			DRM_DEBUG_KMS("PLANE:%d FB:%d unsupported scaling format 0x%x\n",
+				intel_plane->base.base.id, fb->base.id, fb->pixel_format);
+			return -EINVAL;
+		}
+	}
+
+	/* mark this plane as a scaler user in crtc_state */
+	scaler_state->scaler_users |= (1 << idx);
+	DRM_DEBUG_KMS("%s:%d staged scaling request for %ux%u->%ux%u "
+		"crtc_state = %p scaler_users = 0x%x\n",
+		intel_plane ? "PLANE" : "CRTC",
+		intel_plane ? intel_plane->base.base.id : intel_crtc->base.base.id,
+		src_w, src_h, dst_w, dst_h, crtc_state, scaler_state->scaler_users);
+	return 0;
+}
+
+static void skylake_pfit_update(struct intel_crtc *crtc, int enable)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = crtc->pipe;
+	struct intel_crtc_scaler_state *scaler_state =
+		&crtc->config->scaler_state;
+
+	DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config);
+
+	/* To update pfit, first update scaler state */
+	skl_update_scaler_users(crtc, crtc->config, NULL, NULL, !enable);
+	intel_atomic_setup_scalers(crtc->base.dev, crtc, crtc->config);
+	skl_detach_scalers(crtc);
+	if (!enable)
+		return;
 
 	if (crtc->config->pch_pfit.enabled) {
-		I915_WRITE(PS_CTL(pipe), PS_ENABLE);
-		I915_WRITE(PS_WIN_POS(pipe), crtc->config->pch_pfit.pos);
-		I915_WRITE(PS_WIN_SZ(pipe), crtc->config->pch_pfit.size);
+		int id;
+
+		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) {
+			DRM_ERROR("Requesting pfit without getting a scaler first\n");
+			return;
+		}
+
+		id = scaler_state->scaler_id;
+		I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
+			PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
+		I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos);
+		I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size);
+
+		DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id);
 	}
 }
 
@@ -4404,7 +4666,7 @@
 	if (!crtc->state->enable || !intel_crtc->active)
 		return;
 
-	if (!HAS_PCH_SPLIT(dev_priv->dev)) {
+	if (HAS_GMCH_DISPLAY(dev_priv->dev)) {
 		if (intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI))
 			assert_dsi_pll_enabled(dev_priv);
 		else
@@ -4664,10 +4926,12 @@
 
 	intel_ddi_enable_pipe_clock(intel_crtc);
 
-	if (IS_SKYLAKE(dev))
-		skylake_pfit_enable(intel_crtc);
-	else
+	if (INTEL_INFO(dev)->gen == 9)
+		skylake_pfit_update(intel_crtc, 1);
+	else if (INTEL_INFO(dev)->gen < 9)
 		ironlake_pfit_enable(intel_crtc);
+	else
+		MISSING_CASE(INTEL_INFO(dev)->gen);
 
 	/*
 	 * On ILK+ LUT must be loaded before the pipe is running but with
@@ -4701,21 +4965,6 @@
 	intel_crtc_enable_planes(crtc);
 }
 
-static void skylake_pfit_disable(struct intel_crtc *crtc)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int pipe = crtc->pipe;
-
-	/* To avoid upsetting the power well on haswell only disable the pfit if
-	 * it's in use. The hw state code will make sure we get this right. */
-	if (crtc->config->pch_pfit.enabled) {
-		I915_WRITE(PS_CTL(pipe), 0);
-		I915_WRITE(PS_WIN_POS(pipe), 0);
-		I915_WRITE(PS_WIN_SZ(pipe), 0);
-	}
-}
-
 static void ironlake_pfit_disable(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -4827,10 +5076,12 @@
 
 	intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder);
 
-	if (IS_SKYLAKE(dev))
-		skylake_pfit_disable(intel_crtc);
-	else
+	if (INTEL_INFO(dev)->gen == 9)
+		skylake_pfit_update(intel_crtc, 0);
+	else if (INTEL_INFO(dev)->gen < 9)
 		ironlake_pfit_disable(intel_crtc);
+	else
+		MISSING_CASE(INTEL_INFO(dev)->gen);
 
 	intel_ddi_disable_pipe_clock(intel_crtc);
 
@@ -4994,6 +5245,181 @@
 	intel_display_set_init_power(dev_priv, false);
 }
 
+void broxton_set_cdclk(struct drm_device *dev, int frequency)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t divider;
+	uint32_t ratio;
+	uint32_t current_freq;
+	int ret;
+
+	/* frequency = 19.2MHz * ratio / 2 / div{1,1.5,2,4} */
+	switch (frequency) {
+	case 144000:
+		divider = BXT_CDCLK_CD2X_DIV_SEL_4;
+		ratio = BXT_DE_PLL_RATIO(60);
+		break;
+	case 288000:
+		divider = BXT_CDCLK_CD2X_DIV_SEL_2;
+		ratio = BXT_DE_PLL_RATIO(60);
+		break;
+	case 384000:
+		divider = BXT_CDCLK_CD2X_DIV_SEL_1_5;
+		ratio = BXT_DE_PLL_RATIO(60);
+		break;
+	case 576000:
+		divider = BXT_CDCLK_CD2X_DIV_SEL_1;
+		ratio = BXT_DE_PLL_RATIO(60);
+		break;
+	case 624000:
+		divider = BXT_CDCLK_CD2X_DIV_SEL_1;
+		ratio = BXT_DE_PLL_RATIO(65);
+		break;
+	case 19200:
+		/*
+		 * Bypass frequency with DE PLL disabled. Init ratio, divider
+		 * to suppress GCC warning.
+		 */
+		ratio = 0;
+		divider = 0;
+		break;
+	default:
+		DRM_ERROR("unsupported CDCLK freq %d", frequency);
+
+		return;
+	}
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+	/* Inform power controller of upcoming frequency change */
+	ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
+				      0x80000000);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	if (ret) {
+		DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n",
+			  ret, frequency);
+		return;
+	}
+
+	current_freq = I915_READ(CDCLK_CTL) & CDCLK_FREQ_DECIMAL_MASK;
+	/* convert from .1 fixpoint MHz with -1MHz offset to kHz */
+	current_freq = current_freq * 500 + 1000;
+
+	/*
+	 * DE PLL has to be disabled when
+	 * - setting to 19.2MHz (bypass, PLL isn't used)
+	 * - before setting to 624MHz (PLL needs toggling)
+	 * - before setting to any frequency from 624MHz (PLL needs toggling)
+	 */
+	if (frequency == 19200 || frequency == 624000 ||
+	    current_freq == 624000) {
+		I915_WRITE(BXT_DE_PLL_ENABLE, ~BXT_DE_PLL_PLL_ENABLE);
+		/* Timeout 200us */
+		if (wait_for(!(I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK),
+			     1))
+			DRM_ERROR("timout waiting for DE PLL unlock\n");
+	}
+
+	if (frequency != 19200) {
+		uint32_t val;
+
+		val = I915_READ(BXT_DE_PLL_CTL);
+		val &= ~BXT_DE_PLL_RATIO_MASK;
+		val |= ratio;
+		I915_WRITE(BXT_DE_PLL_CTL, val);
+
+		I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE);
+		/* Timeout 200us */
+		if (wait_for(I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK, 1))
+			DRM_ERROR("timeout waiting for DE PLL lock\n");
+
+		val = I915_READ(CDCLK_CTL);
+		val &= ~BXT_CDCLK_CD2X_DIV_SEL_MASK;
+		val |= divider;
+		/*
+		 * Disable SSA Precharge when CD clock frequency < 500 MHz,
+		 * enable otherwise.
+		 */
+		val &= ~BXT_CDCLK_SSA_PRECHARGE_ENABLE;
+		if (frequency >= 500000)
+			val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
+
+		val &= ~CDCLK_FREQ_DECIMAL_MASK;
+		/* convert from kHz to .1 fixpoint MHz with -1MHz offset */
+		val |= (frequency - 1000) / 500;
+		I915_WRITE(CDCLK_CTL, val);
+	}
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+	ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
+				      DIV_ROUND_UP(frequency, 25000));
+	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	if (ret) {
+		DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n",
+			  ret, frequency);
+		return;
+	}
+
+	dev_priv->cdclk_freq = frequency;
+}
+
+void broxton_init_cdclk(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t val;
+
+	/*
+	 * NDE_RSTWRN_OPT RST PCH Handshake En must always be 0b on BXT
+	 * or else the reset will hang because there is no PCH to respond.
+	 * Move the handshake programming to initialization sequence.
+	 * Previously was left up to BIOS.
+	 */
+	val = I915_READ(HSW_NDE_RSTWRN_OPT);
+	val &= ~RESET_PCH_HANDSHAKE_ENABLE;
+	I915_WRITE(HSW_NDE_RSTWRN_OPT, val);
+
+	/* Enable PG1 for cdclk */
+	intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
+
+	/* check if cd clock is enabled */
+	if (I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_PLL_ENABLE) {
+		DRM_DEBUG_KMS("Display already initialized\n");
+		return;
+	}
+
+	/*
+	 * FIXME:
+	 * - The initial CDCLK needs to be read from VBT.
+	 *   Need to make this change after VBT has changes for BXT.
+	 * - check if setting the max (or any) cdclk freq is really necessary
+	 *   here, it belongs to modeset time
+	 */
+	broxton_set_cdclk(dev, 624000);
+
+	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
+	udelay(10);
+
+	if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
+		DRM_ERROR("DBuf power enable timeout!\n");
+}
+
+void broxton_uninit_cdclk(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
+	udelay(10);
+
+	if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
+		DRM_ERROR("DBuf power disable timeout!\n");
+
+	/* Set minimum (bypass) frequency, in effect turning off the DE PLL */
+	broxton_set_cdclk(dev, 19200);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+}
+
 /* returns HPLL frequency in kHz */
 static int valleyview_get_vco(struct drm_i915_private *dev_priv)
 {
@@ -5012,16 +5438,16 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	dev_priv->vlv_cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
+	dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
 	DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n",
-			 dev_priv->vlv_cdclk_freq);
+			 dev_priv->cdclk_freq);
 
 	/*
 	 * Program the gmbus_freq based on the cdclk frequency.
 	 * BSpec erroneously claims we should aim for 4MHz, but
 	 * in fact 1MHz is the correct frequency.
 	 */
-	I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->vlv_cdclk_freq, 1000));
+	I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->cdclk_freq, 1000));
 }
 
 /* Adjust CDclk dividers to allow high res or save power if possible */
@@ -5030,7 +5456,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 val, cmd;
 
-	WARN_ON(dev_priv->display.get_display_clock_speed(dev) != dev_priv->vlv_cdclk_freq);
+	WARN_ON(dev_priv->display.get_display_clock_speed(dev)
+					!= dev_priv->cdclk_freq);
 
 	if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */
 		cmd = 2;
@@ -5094,7 +5521,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 val, cmd;
 
-	WARN_ON(dev_priv->display.get_display_clock_speed(dev) != dev_priv->vlv_cdclk_freq);
+	WARN_ON(dev_priv->display.get_display_clock_speed(dev)
+						!= dev_priv->cdclk_freq);
 
 	switch (cdclk) {
 	case 333333:
@@ -5159,37 +5587,74 @@
 		return 200000;
 }
 
-/* compute the max pixel clock for new configuration */
-static int intel_mode_max_pixclk(struct drm_i915_private *dev_priv)
+static int broxton_calc_cdclk(struct drm_i915_private *dev_priv,
+			      int max_pixclk)
 {
-	struct drm_device *dev = dev_priv->dev;
+	/*
+	 * FIXME:
+	 * - remove the guardband, it's not needed on BXT
+	 * - set 19.2MHz bypass frequency if there are no active pipes
+	 */
+	if (max_pixclk > 576000*9/10)
+		return 624000;
+	else if (max_pixclk > 384000*9/10)
+		return 576000;
+	else if (max_pixclk > 288000*9/10)
+		return 384000;
+	else if (max_pixclk > 144000*9/10)
+		return 288000;
+	else
+		return 144000;
+}
+
+/* compute the max pixel clock for new configuration */
+static int intel_mode_max_pixclk(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
 	struct intel_crtc *intel_crtc;
+	struct intel_crtc_state *crtc_state;
 	int max_pixclk = 0;
 
 	for_each_intel_crtc(dev, intel_crtc) {
-		if (intel_crtc->new_enabled)
-			max_pixclk = max(max_pixclk,
-					 intel_crtc->new_config->base.adjusted_mode.crtc_clock);
+		crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+
+		if (!crtc_state->base.enable)
+			continue;
+
+		max_pixclk = max(max_pixclk,
+				 crtc_state->base.adjusted_mode.crtc_clock);
 	}
 
 	return max_pixclk;
 }
 
-static void valleyview_modeset_global_pipes(struct drm_device *dev,
+static int valleyview_modeset_global_pipes(struct drm_atomic_state *state,
 					    unsigned *prepare_pipes)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(state->dev);
 	struct intel_crtc *intel_crtc;
-	int max_pixclk = intel_mode_max_pixclk(dev_priv);
+	int max_pixclk = intel_mode_max_pixclk(state);
+	int cdclk;
 
-	if (valleyview_calc_cdclk(dev_priv, max_pixclk) ==
-	    dev_priv->vlv_cdclk_freq)
-		return;
+	if (max_pixclk < 0)
+		return max_pixclk;
+
+	if (IS_VALLEYVIEW(dev_priv))
+		cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk);
+	else
+		cdclk = broxton_calc_cdclk(dev_priv, max_pixclk);
+
+	if (cdclk == dev_priv->cdclk_freq)
+		return 0;
 
 	/* disable/enable all currently active pipes while we change cdclk */
-	for_each_intel_crtc(dev, intel_crtc)
+	for_each_intel_crtc(state->dev, intel_crtc)
 		if (intel_crtc->base.state->enable)
 			*prepare_pipes |= (1 << intel_crtc->pipe);
+
+	return 0;
 }
 
 static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv)
@@ -5201,7 +5666,7 @@
 	else
 		default_credits = PFI_CREDIT(8);
 
-	if (DIV_ROUND_CLOSEST(dev_priv->vlv_cdclk_freq, 1000) >= dev_priv->rps.cz_freq) {
+	if (DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 1000) >= dev_priv->rps.cz_freq) {
 		/* CHV suggested value is 31 or 63 */
 		if (IS_CHERRYVIEW(dev_priv))
 			credits = PFI_CREDIT_31;
@@ -5232,10 +5697,20 @@
 {
 	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int max_pixclk = intel_mode_max_pixclk(dev_priv);
-	int req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk);
+	int max_pixclk = intel_mode_max_pixclk(state);
+	int req_cdclk;
 
-	if (req_cdclk != dev_priv->vlv_cdclk_freq) {
+	/* The only reason this can fail is if we fail to add the crtc_state
+	 * to the atomic state. But that can't happen since the call to
+	 * intel_mode_max_pixclk() in valleyview_modeset_global_pipes() (which
+	 * can't have failed otherwise the mode set would be aborted) added all
+	 * the states already. */
+	if (WARN_ON(max_pixclk < 0))
+		return;
+
+	req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk);
+
+	if (req_cdclk != dev_priv->cdclk_freq) {
 		/*
 		 * FIXME: We can end up here with all power domains off, yet
 		 * with a CDCLK frequency other than the minimum. To account
@@ -5554,7 +6029,7 @@
 	dev_priv->display.crtc_disable(crtc);
 	dev_priv->display.off(crtc);
 
-	crtc->primary->funcs->disable_plane(crtc->primary);
+	drm_plane_helper_disable(crtc->primary);
 
 	/* Update computed state. */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
@@ -5695,65 +6170,80 @@
 	return encoder->get_hw_state(encoder, &pipe);
 }
 
-static int pipe_required_fdi_lanes(struct drm_device *dev, enum pipe pipe)
+static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *crtc =
-		to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
-
-	if (crtc->base.state->enable &&
-	    crtc->config->has_pch_encoder)
-		return crtc->config->fdi_lanes;
+	if (crtc_state->base.enable && crtc_state->has_pch_encoder)
+		return crtc_state->fdi_lanes;
 
 	return 0;
 }
 
-static bool ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
+static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe,
 				     struct intel_crtc_state *pipe_config)
 {
+	struct drm_atomic_state *state = pipe_config->base.state;
+	struct intel_crtc *other_crtc;
+	struct intel_crtc_state *other_crtc_state;
+
 	DRM_DEBUG_KMS("checking fdi config on pipe %c, lanes %i\n",
 		      pipe_name(pipe), pipe_config->fdi_lanes);
 	if (pipe_config->fdi_lanes > 4) {
 		DRM_DEBUG_KMS("invalid fdi lane config on pipe %c: %i lanes\n",
 			      pipe_name(pipe), pipe_config->fdi_lanes);
-		return false;
+		return -EINVAL;
 	}
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		if (pipe_config->fdi_lanes > 2) {
 			DRM_DEBUG_KMS("only 2 lanes on haswell, required: %i lanes\n",
 				      pipe_config->fdi_lanes);
-			return false;
+			return -EINVAL;
 		} else {
-			return true;
+			return 0;
 		}
 	}
 
 	if (INTEL_INFO(dev)->num_pipes == 2)
-		return true;
+		return 0;
 
 	/* Ivybridge 3 pipe is really complicated */
 	switch (pipe) {
 	case PIPE_A:
-		return true;
+		return 0;
 	case PIPE_B:
-		if (pipe_config->fdi_lanes > 2 &&
-		    pipe_required_fdi_lanes(dev, PIPE_C) > 0) {
+		if (pipe_config->fdi_lanes <= 2)
+			return 0;
+
+		other_crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, PIPE_C));
+		other_crtc_state =
+			intel_atomic_get_crtc_state(state, other_crtc);
+		if (IS_ERR(other_crtc_state))
+			return PTR_ERR(other_crtc_state);
+
+		if (pipe_required_fdi_lanes(other_crtc_state) > 0) {
 			DRM_DEBUG_KMS("invalid shared fdi lane config on pipe %c: %i lanes\n",
 				      pipe_name(pipe), pipe_config->fdi_lanes);
-			return false;
+			return -EINVAL;
 		}
-		return true;
+		return 0;
 	case PIPE_C:
 		if (pipe_config->fdi_lanes > 2) {
 			DRM_DEBUG_KMS("only 2 lanes on pipe %c: required %i lanes\n",
 				      pipe_name(pipe), pipe_config->fdi_lanes);
-			return false;
+			return -EINVAL;
 		}
-		if (pipe_required_fdi_lanes(dev, PIPE_B) > 2) {
+
+		other_crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, PIPE_B));
+		other_crtc_state =
+			intel_atomic_get_crtc_state(state, other_crtc);
+		if (IS_ERR(other_crtc_state))
+			return PTR_ERR(other_crtc_state);
+
+		if (pipe_required_fdi_lanes(other_crtc_state) > 2) {
 			DRM_DEBUG_KMS("fdi link B uses too many lanes to enable link C\n");
-			return false;
+			return -EINVAL;
 		}
-		return true;
+		return 0;
 	default:
 		BUG();
 	}
@@ -5765,8 +6255,8 @@
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	int lane, link_bw, fdi_dotclock;
-	bool setup_ok, needs_recompute = false;
+	int lane, link_bw, fdi_dotclock, ret;
+	bool needs_recompute = false;
 
 retry:
 	/* FDI is a binary signal running at ~2.7GHz, encoding
@@ -5788,9 +6278,9 @@
 	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
 			       link_bw, &pipe_config->fdi_m_n);
 
-	setup_ok = ironlake_check_fdi_lanes(intel_crtc->base.dev,
-					    intel_crtc->pipe, pipe_config);
-	if (!setup_ok && pipe_config->pipe_bpp > 6*3) {
+	ret = ironlake_check_fdi_lanes(intel_crtc->base.dev,
+				       intel_crtc->pipe, pipe_config);
+	if (ret == -EINVAL && pipe_config->pipe_bpp > 6*3) {
 		pipe_config->pipe_bpp -= 2*3;
 		DRM_DEBUG_KMS("fdi link bw constraint, reducing pipe bpp to %i\n",
 			      pipe_config->pipe_bpp);
@@ -5803,7 +6293,7 @@
 	if (needs_recompute)
 		return RETRY;
 
-	return setup_ok ? 0 : -EINVAL;
+	return ret;
 }
 
 static void hsw_compute_ips_config(struct intel_crtc *crtc,
@@ -5820,6 +6310,7 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	int ret;
 
 	/* FIXME should check pixel clock limits on all platforms */
 	if (INTEL_INFO(dev)->gen < 4) {
@@ -5860,21 +6351,107 @@
 		adjusted_mode->hsync_start == adjusted_mode->hdisplay)
 		return -EINVAL;
 
-	if ((IS_G4X(dev) || IS_VALLEYVIEW(dev)) && pipe_config->pipe_bpp > 10*3) {
-		pipe_config->pipe_bpp = 10*3; /* 12bpc is gen5+ */
-	} else if (INTEL_INFO(dev)->gen <= 4 && pipe_config->pipe_bpp > 8*3) {
-		/* only a 8bpc pipe, with 6bpc dither through the panel fitter
-		 * for lvds. */
-		pipe_config->pipe_bpp = 8*3;
-	}
-
 	if (HAS_IPS(dev))
 		hsw_compute_ips_config(crtc, pipe_config);
 
 	if (pipe_config->has_pch_encoder)
 		return ironlake_fdi_compute_config(crtc, pipe_config);
 
-	return 0;
+	/* FIXME: remove below call once atomic mode set is place and all crtc
+	 * related checks called from atomic_crtc_check function */
+	ret = 0;
+	DRM_DEBUG_KMS("intel_crtc = %p drm_state (pipe_config->base.state) = %p\n",
+		crtc, pipe_config->base.state);
+	ret = intel_atomic_setup_scalers(dev, crtc, pipe_config);
+
+	return ret;
+}
+
+static int skylake_get_display_clock_speed(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
+	uint32_t cdctl = I915_READ(CDCLK_CTL);
+	uint32_t linkrate;
+
+	if (!(lcpll1 & LCPLL_PLL_ENABLE)) {
+		WARN(1, "LCPLL1 not enabled\n");
+		return 24000; /* 24MHz is the cd freq with NSSC ref */
+	}
+
+	if ((cdctl & CDCLK_FREQ_SEL_MASK) == CDCLK_FREQ_540)
+		return 540000;
+
+	linkrate = (I915_READ(DPLL_CTRL1) &
+		    DPLL_CRTL1_LINK_RATE_MASK(SKL_DPLL0)) >> 1;
+
+	if (linkrate == DPLL_CRTL1_LINK_RATE_2160 ||
+	    linkrate == DPLL_CRTL1_LINK_RATE_1080) {
+		/* vco 8640 */
+		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
+		case CDCLK_FREQ_450_432:
+			return 432000;
+		case CDCLK_FREQ_337_308:
+			return 308570;
+		case CDCLK_FREQ_675_617:
+			return 617140;
+		default:
+			WARN(1, "Unknown cd freq selection\n");
+		}
+	} else {
+		/* vco 8100 */
+		switch (cdctl & CDCLK_FREQ_SEL_MASK) {
+		case CDCLK_FREQ_450_432:
+			return 450000;
+		case CDCLK_FREQ_337_308:
+			return 337500;
+		case CDCLK_FREQ_675_617:
+			return 675000;
+		default:
+			WARN(1, "Unknown cd freq selection\n");
+		}
+	}
+
+	/* error case, do as if DPLL0 isn't enabled */
+	return 24000;
+}
+
+static int broadwell_get_display_clock_speed(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t lcpll = I915_READ(LCPLL_CTL);
+	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
+
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
+		return 800000;
+	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_450)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_54O_BDW)
+		return 540000;
+	else if (freq == LCPLL_CLK_FREQ_337_5_BDW)
+		return 337500;
+	else
+		return 675000;
+}
+
+static int haswell_get_display_clock_speed(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t lcpll = I915_READ(LCPLL_CTL);
+	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
+
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
+		return 800000;
+	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_450)
+		return 450000;
+	else if (IS_HSW_ULT(dev))
+		return 337500;
+	else
+		return 540000;
 }
 
 static int valleyview_get_display_clock_speed(struct drm_device *dev)
@@ -5899,6 +6476,11 @@
 	return DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, divider + 1);
 }
 
+static int ilk_get_display_clock_speed(struct drm_device *dev)
+{
+	return 450000;
+}
+
 static int i945_get_display_clock_speed(struct drm_device *dev)
 {
 	return 400000;
@@ -5906,7 +6488,7 @@
 
 static int i915_get_display_clock_speed(struct drm_device *dev)
 {
-	return 333000;
+	return 333333;
 }
 
 static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
@@ -5922,19 +6504,19 @@
 
 	switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
 	case GC_DISPLAY_CLOCK_267_MHZ_PNV:
-		return 267000;
+		return 266667;
 	case GC_DISPLAY_CLOCK_333_MHZ_PNV:
-		return 333000;
+		return 333333;
 	case GC_DISPLAY_CLOCK_444_MHZ_PNV:
-		return 444000;
+		return 444444;
 	case GC_DISPLAY_CLOCK_200_MHZ_PNV:
 		return 200000;
 	default:
 		DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc);
 	case GC_DISPLAY_CLOCK_133_MHZ_PNV:
-		return 133000;
+		return 133333;
 	case GC_DISPLAY_CLOCK_167_MHZ_PNV:
-		return 167000;
+		return 166667;
 	}
 }
 
@@ -5945,11 +6527,11 @@
 	pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
 
 	if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
-		return 133000;
+		return 133333;
 	else {
 		switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
 		case GC_DISPLAY_CLOCK_333_MHZ:
-			return 333000;
+			return 333333;
 		default:
 		case GC_DISPLAY_CLOCK_190_200_MHZ:
 			return 190000;
@@ -5959,7 +6541,7 @@
 
 static int i865_get_display_clock_speed(struct drm_device *dev)
 {
-	return 266000;
+	return 266667;
 }
 
 static int i855_get_display_clock_speed(struct drm_device *dev)
@@ -5975,7 +6557,7 @@
 	case GC_CLOCK_166_250:
 		return 250000;
 	case GC_CLOCK_100_133:
-		return 133000;
+		return 133333;
 	}
 
 	/* Shouldn't happen */
@@ -5984,7 +6566,7 @@
 
 static int i830_get_display_clock_speed(struct drm_device *dev)
 {
-	return 133000;
+	return 133333;
 }
 
 static void
@@ -6037,7 +6619,7 @@
 
 	WARN_ON(!crtc_state->base.state);
 
-	if (IS_VALLEYVIEW(dev)) {
+	if (IS_VALLEYVIEW(dev) || IS_BROXTON(dev)) {
 		refclk = 100000;
 	} else if (intel_pipe_will_have_type(crtc_state, INTEL_OUTPUT_LVDS) &&
 	    intel_panel_use_ssc(dev_priv) && num_connectors < 2) {
@@ -7980,14 +8562,28 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t tmp;
+	struct intel_crtc_scaler_state *scaler_state = &pipe_config->scaler_state;
+	uint32_t ps_ctrl = 0;
+	int id = -1;
+	int i;
 
-	tmp = I915_READ(PS_CTL(crtc->pipe));
+	/* find scaler attached to this pipe */
+	for (i = 0; i < crtc->num_scalers; i++) {
+		ps_ctrl = I915_READ(SKL_PS_CTRL(crtc->pipe, i));
+		if (ps_ctrl & PS_SCALER_EN && !(ps_ctrl & PS_PLANE_SEL_MASK)) {
+			id = i;
+			pipe_config->pch_pfit.enabled = true;
+			pipe_config->pch_pfit.pos = I915_READ(SKL_PS_WIN_POS(crtc->pipe, i));
+			pipe_config->pch_pfit.size = I915_READ(SKL_PS_WIN_SZ(crtc->pipe, i));
+			break;
+		}
+	}
 
-	if (tmp & PS_ENABLE) {
-		pipe_config->pch_pfit.enabled = true;
-		pipe_config->pch_pfit.pos = I915_READ(PS_WIN_POS(crtc->pipe));
-		pipe_config->pch_pfit.size = I915_READ(PS_WIN_SZ(crtc->pipe));
+	scaler_state->scaler_id = id;
+	if (id >= 0) {
+		scaler_state->scaler_users |= (1 << SKL_CRTC_INDEX);
+	} else {
+		scaler_state->scaler_users &= ~(1 << SKL_CRTC_INDEX);
 	}
 }
 
@@ -8472,6 +9068,23 @@
 	intel_prepare_ddi(dev);
 }
 
+static void broxton_modeset_global_resources(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int max_pixclk = intel_mode_max_pixclk(state);
+	int req_cdclk;
+
+	/* see the comment in valleyview_modeset_global_resources */
+	if (WARN_ON(max_pixclk < 0))
+		return;
+
+	req_cdclk = broxton_calc_cdclk(dev_priv, max_pixclk);
+
+	if (req_cdclk != dev_priv->cdclk_freq)
+		broxton_set_cdclk(dev, req_cdclk);
+}
+
 static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
 				      struct intel_crtc_state *crtc_state)
 {
@@ -8483,6 +9096,28 @@
 	return 0;
 }
 
+static void bxt_get_ddi_pll(struct drm_i915_private *dev_priv,
+				enum port port,
+				struct intel_crtc_state *pipe_config)
+{
+	switch (port) {
+	case PORT_A:
+		pipe_config->ddi_pll_sel = SKL_DPLL0;
+		pipe_config->shared_dpll = DPLL_ID_SKL_DPLL1;
+		break;
+	case PORT_B:
+		pipe_config->ddi_pll_sel = SKL_DPLL1;
+		pipe_config->shared_dpll = DPLL_ID_SKL_DPLL2;
+		break;
+	case PORT_C:
+		pipe_config->ddi_pll_sel = SKL_DPLL2;
+		pipe_config->shared_dpll = DPLL_ID_SKL_DPLL3;
+		break;
+	default:
+		DRM_ERROR("Incorrect port type\n");
+	}
+}
+
 static void skylake_get_ddi_pll(struct drm_i915_private *dev_priv,
 				enum port port,
 				struct intel_crtc_state *pipe_config)
@@ -8545,6 +9180,8 @@
 
 	if (IS_SKYLAKE(dev))
 		skylake_get_ddi_pll(dev_priv, port, pipe_config);
+	else if (IS_BROXTON(dev))
+		bxt_get_ddi_pll(dev_priv, port, pipe_config);
 	else
 		haswell_get_ddi_pll(dev_priv, port, pipe_config);
 
@@ -8621,12 +9258,22 @@
 
 	intel_get_pipe_timings(crtc, pipe_config);
 
+	if (INTEL_INFO(dev)->gen >= 9) {
+		skl_init_scalers(dev, crtc, pipe_config);
+	}
+
 	pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
 	if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
-		if (IS_SKYLAKE(dev))
+		if (INTEL_INFO(dev)->gen == 9)
 			skylake_get_pfit_config(crtc, pipe_config);
-		else
+		else if (INTEL_INFO(dev)->gen < 9)
 			ironlake_get_pfit_config(crtc, pipe_config);
+		else
+			MISSING_CASE(INTEL_INFO(dev)->gen);
+
+	} else {
+		pipe_config->scaler_state.scaler_id = -1;
+		pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
 	}
 
 	if (IS_HASWELL(dev))
@@ -9070,7 +9717,6 @@
 
 	intel_crtc = to_intel_crtc(crtc);
 	intel_crtc->new_enabled = true;
-	intel_crtc->new_config = intel_crtc->config;
 	old->dpms_mode = connector->dpms;
 	old->load_detect_temp = true;
 	old->release_fb = NULL;
@@ -9126,10 +9772,6 @@
 
  fail:
 	intel_crtc->new_enabled = crtc->state->enable;
-	if (intel_crtc->new_enabled)
-		intel_crtc->new_config = intel_crtc->config;
-	else
-		intel_crtc->new_config = NULL;
 fail_unlock:
 	if (state) {
 		drm_atomic_state_free(state);
@@ -9175,7 +9817,6 @@
 		to_intel_connector(connector)->new_encoder = NULL;
 		intel_encoder->new_crtc = NULL;
 		intel_crtc->new_enabled = false;
-		intel_crtc->new_config = NULL;
 
 		connector_state->best_encoder = NULL;
 		connector_state->crtc = NULL;
@@ -9915,23 +10556,34 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
-	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
-	struct drm_i915_gem_object *obj = intel_fb->obj;
 	const enum pipe pipe = intel_crtc->pipe;
 	u32 ctl, stride;
 
 	ctl = I915_READ(PLANE_CTL(pipe, 0));
 	ctl &= ~PLANE_CTL_TILED_MASK;
-	if (obj->tiling_mode == I915_TILING_X)
+	switch (fb->modifier[0]) {
+	case DRM_FORMAT_MOD_NONE:
+		break;
+	case I915_FORMAT_MOD_X_TILED:
 		ctl |= PLANE_CTL_TILED_X;
+		break;
+	case I915_FORMAT_MOD_Y_TILED:
+		ctl |= PLANE_CTL_TILED_Y;
+		break;
+	case I915_FORMAT_MOD_Yf_TILED:
+		ctl |= PLANE_CTL_TILED_YF;
+		break;
+	default:
+		MISSING_CASE(fb->modifier[0]);
+	}
 
 	/*
 	 * The stride is either expressed as a multiple of 64 bytes chunks for
 	 * linear buffers or in number of tiles for tiled buffers.
 	 */
-	stride = fb->pitches[0] >> 6;
-	if (obj->tiling_mode == I915_TILING_X)
-		stride = fb->pitches[0] >> 9; /* X tiles are 512 bytes wide */
+	stride = fb->pitches[0] /
+		 intel_fb_stride_alignment(dev, fb->modifier[0],
+					   fb->pixel_format);
 
 	/*
 	 * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
@@ -10085,6 +10737,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_unpin_work *work;
 
 	WARN_ON(!in_interrupt());
 
@@ -10092,12 +10745,16 @@
 		return;
 
 	spin_lock(&dev->event_lock);
-	if (intel_crtc->unpin_work && __intel_pageflip_stall_check(dev, crtc)) {
+	work = intel_crtc->unpin_work;
+	if (work != NULL && __intel_pageflip_stall_check(dev, crtc)) {
 		WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now %d\n",
-			 intel_crtc->unpin_work->flip_queued_vblank,
-			 drm_vblank_count(dev, pipe));
+			 work->flip_queued_vblank, drm_vblank_count(dev, pipe));
 		page_flip_completed(intel_crtc);
+		work = NULL;
 	}
+	if (work != NULL &&
+	    drm_vblank_count(dev, pipe) - work->flip_queued_vblank > 1)
+		intel_queue_rps_boost_for_request(dev, work->flip_queued_req);
 	spin_unlock(&dev->event_lock);
 }
 
@@ -10115,6 +10772,7 @@
 	enum pipe pipe = intel_crtc->pipe;
 	struct intel_unpin_work *work;
 	struct intel_engine_cs *ring;
+	bool mmio_flip;
 	int ret;
 
 	/*
@@ -10212,15 +10870,23 @@
 		ring = &dev_priv->ring[RCS];
 	}
 
+	mmio_flip = use_mmio_flip(ring, obj);
+
+	/* When using CS flips, we want to emit semaphores between rings.
+	 * However, when using mmio flips we will create a task to do the
+	 * synchronisation, so all we want here is to pin the framebuffer
+	 * into the display plane and skip any waits.
+	 */
 	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
-					 crtc->primary->state, ring);
+					 crtc->primary->state,
+					 mmio_flip ? i915_gem_request_get_ring(obj->last_read_req) : ring);
 	if (ret)
 		goto cleanup_pending;
 
 	work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary), obj)
 						  + intel_crtc->dspaddr_offset;
 
-	if (use_mmio_flip(ring, obj)) {
+	if (mmio_flip) {
 		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring,
 					    page_flip_flags);
 		if (ret)
@@ -10315,11 +10981,6 @@
 
 	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = crtc->base.state->enable;
-
-		if (crtc->new_enabled)
-			crtc->new_config = crtc->config;
-		else
-			crtc->new_config = NULL;
 	}
 }
 
@@ -10399,7 +11060,6 @@
 
 static int
 compute_baseline_pipe_bpp(struct intel_crtc *crtc,
-			  struct drm_framebuffer *fb,
 			  struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -10407,41 +11067,13 @@
 	struct intel_connector *connector;
 	int bpp, i;
 
-	switch (fb->pixel_format) {
-	case DRM_FORMAT_C8:
-		bpp = 8*3; /* since we go through a colormap */
-		break;
-	case DRM_FORMAT_XRGB1555:
-	case DRM_FORMAT_ARGB1555:
-		/* checked in intel_framebuffer_init already */
-		if (WARN_ON(INTEL_INFO(dev)->gen > 3))
-			return -EINVAL;
-	case DRM_FORMAT_RGB565:
-		bpp = 6*3; /* min is 18bpp */
-		break;
-	case DRM_FORMAT_XBGR8888:
-	case DRM_FORMAT_ABGR8888:
-		/* checked in intel_framebuffer_init already */
-		if (WARN_ON(INTEL_INFO(dev)->gen < 4))
-			return -EINVAL;
-	case DRM_FORMAT_XRGB8888:
-	case DRM_FORMAT_ARGB8888:
-		bpp = 8*3;
-		break;
-	case DRM_FORMAT_XRGB2101010:
-	case DRM_FORMAT_ARGB2101010:
-	case DRM_FORMAT_XBGR2101010:
-	case DRM_FORMAT_ABGR2101010:
-		/* checked in intel_framebuffer_init already */
-		if (WARN_ON(INTEL_INFO(dev)->gen < 4))
-			return -EINVAL;
+	if ((IS_G4X(dev) || IS_VALLEYVIEW(dev)))
 		bpp = 10*3;
-		break;
-	/* TODO: gen4+ supports 16 bpc floating point, too. */
-	default:
-		DRM_DEBUG_KMS("unsupported depth\n");
-		return -EINVAL;
-	}
+	else if (INTEL_INFO(dev)->gen >= 5)
+		bpp = 12*3;
+	else
+		bpp = 8*3;
+
 
 	pipe_config->pipe_bpp = bpp;
 
@@ -10477,8 +11109,14 @@
 				   struct intel_crtc_state *pipe_config,
 				   const char *context)
 {
-	DRM_DEBUG_KMS("[CRTC:%d]%s config for pipe %c\n", crtc->base.base.id,
-		      context, pipe_name(crtc->pipe));
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_plane *plane;
+	struct intel_plane *intel_plane;
+	struct intel_plane_state *state;
+	struct drm_framebuffer *fb;
+
+	DRM_DEBUG_KMS("[CRTC:%d]%s config %p for pipe %c\n", crtc->base.base.id,
+		      context, pipe_config, pipe_name(crtc->pipe));
 
 	DRM_DEBUG_KMS("cpu_transcoder: %c\n", transcoder_name(pipe_config->cpu_transcoder));
 	DRM_DEBUG_KMS("pipe bpp: %i, dithering: %i\n",
@@ -10515,6 +11153,9 @@
 	DRM_DEBUG_KMS("port clock: %d\n", pipe_config->port_clock);
 	DRM_DEBUG_KMS("pipe src size: %dx%d\n",
 		      pipe_config->pipe_src_w, pipe_config->pipe_src_h);
+	DRM_DEBUG_KMS("num_scalers: %d\n", crtc->num_scalers);
+	DRM_DEBUG_KMS("scaler_users: 0x%x\n", pipe_config->scaler_state.scaler_users);
+	DRM_DEBUG_KMS("scaler id: %d\n", pipe_config->scaler_state.scaler_id);
 	DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n",
 		      pipe_config->gmch_pfit.control,
 		      pipe_config->gmch_pfit.pgm_ratios,
@@ -10525,6 +11166,40 @@
 		      pipe_config->pch_pfit.enabled ? "enabled" : "disabled");
 	DRM_DEBUG_KMS("ips: %i\n", pipe_config->ips_enabled);
 	DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
+
+	DRM_DEBUG_KMS("planes on this crtc\n");
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		intel_plane = to_intel_plane(plane);
+		if (intel_plane->pipe != crtc->pipe)
+			continue;
+
+		state = to_intel_plane_state(plane->state);
+		fb = state->base.fb;
+		if (!fb) {
+			DRM_DEBUG_KMS("%s PLANE:%d plane: %u.%u idx: %d "
+				"disabled, scaler_id = %d\n",
+				plane->type == DRM_PLANE_TYPE_CURSOR ? "CURSOR" : "STANDARD",
+				plane->base.id, intel_plane->pipe,
+				(crtc->base.primary == plane) ? 0 : intel_plane->plane + 1,
+				drm_plane_index(plane), state->scaler_id);
+			continue;
+		}
+
+		DRM_DEBUG_KMS("%s PLANE:%d plane: %u.%u idx: %d enabled",
+			plane->type == DRM_PLANE_TYPE_CURSOR ? "CURSOR" : "STANDARD",
+			plane->base.id, intel_plane->pipe,
+			crtc->base.primary == plane ? 0 : intel_plane->plane + 1,
+			drm_plane_index(plane));
+		DRM_DEBUG_KMS("\tFB:%d, fb = %ux%u format = 0x%x",
+			fb->base.id, fb->width, fb->height, fb->pixel_format);
+		DRM_DEBUG_KMS("\tscaler:%d src (%u, %u) %ux%u dst (%u, %u) %ux%u\n",
+			state->scaler_id,
+			state->src.x1 >> 16, state->src.y1 >> 16,
+			drm_rect_width(&state->src) >> 16,
+			drm_rect_height(&state->src) >> 16,
+			state->dst.x1, state->dst.y1,
+			drm_rect_width(&state->dst), drm_rect_height(&state->dst));
+	}
 }
 
 static bool encoders_cloneable(const struct intel_encoder *a,
@@ -10535,16 +11210,24 @@
 			  b->cloneable & (1 << a->type));
 }
 
-static bool check_single_encoder_cloning(struct intel_crtc *crtc,
+static bool check_single_encoder_cloning(struct drm_atomic_state *state,
+					 struct intel_crtc *crtc,
 					 struct intel_encoder *encoder)
 {
-	struct drm_device *dev = crtc->base.dev;
 	struct intel_encoder *source_encoder;
+	struct drm_connector_state *connector_state;
+	int i;
 
-	for_each_intel_encoder(dev, source_encoder) {
-		if (source_encoder->new_crtc != crtc)
+	for (i = 0; i < state->num_connector; i++) {
+		if (!state->connectors[i])
 			continue;
 
+		connector_state = state->connector_states[i];
+		if (connector_state->crtc != &crtc->base)
+			continue;
+
+		source_encoder =
+			to_intel_encoder(connector_state->best_encoder);
 		if (!encoders_cloneable(encoder, source_encoder))
 			return false;
 	}
@@ -10552,39 +11235,53 @@
 	return true;
 }
 
-static bool check_encoder_cloning(struct intel_crtc *crtc)
+static bool check_encoder_cloning(struct drm_atomic_state *state,
+				  struct intel_crtc *crtc)
 {
-	struct drm_device *dev = crtc->base.dev;
 	struct intel_encoder *encoder;
+	struct drm_connector_state *connector_state;
+	int i;
 
-	for_each_intel_encoder(dev, encoder) {
-		if (encoder->new_crtc != crtc)
+	for (i = 0; i < state->num_connector; i++) {
+		if (!state->connectors[i])
 			continue;
 
-		if (!check_single_encoder_cloning(crtc, encoder))
+		connector_state = state->connector_states[i];
+		if (connector_state->crtc != &crtc->base)
+			continue;
+
+		encoder = to_intel_encoder(connector_state->best_encoder);
+		if (!check_single_encoder_cloning(state, crtc, encoder))
 			return false;
 	}
 
 	return true;
 }
 
-static bool check_digital_port_conflicts(struct drm_device *dev)
+static bool check_digital_port_conflicts(struct drm_atomic_state *state)
 {
-	struct intel_connector *connector;
+	struct drm_device *dev = state->dev;
+	struct intel_encoder *encoder;
+	struct drm_connector_state *connector_state;
 	unsigned int used_ports = 0;
+	int i;
 
 	/*
 	 * Walk the connector list instead of the encoder
 	 * list to detect the problem on ddi platforms
 	 * where there's just one encoder per digital port.
 	 */
-	for_each_intel_connector(dev, connector) {
-		struct intel_encoder *encoder = connector->new_encoder;
-
-		if (!encoder)
+	for (i = 0; i < state->num_connector; i++) {
+		if (!state->connectors[i])
 			continue;
 
-		WARN_ON(!encoder->new_crtc);
+		connector_state = state->connector_states[i];
+		if (!connector_state->best_encoder)
+			continue;
+
+		encoder = to_intel_encoder(connector_state->best_encoder);
+
+		WARN_ON(!connector_state->crtc);
 
 		switch (encoder->type) {
 			unsigned int port_mask;
@@ -10613,34 +11310,35 @@
 clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 {
 	struct drm_crtc_state tmp_state;
+	struct intel_crtc_scaler_state scaler_state;
 
-	/* Clear only the intel specific part of the crtc state */
+	/* Clear only the intel specific part of the crtc state excluding scalers */
 	tmp_state = crtc_state->base;
+	scaler_state = crtc_state->scaler_state;
 	memset(crtc_state, 0, sizeof *crtc_state);
 	crtc_state->base = tmp_state;
+	crtc_state->scaler_state = scaler_state;
 }
 
 static struct intel_crtc_state *
 intel_modeset_pipe_config(struct drm_crtc *crtc,
-			  struct drm_framebuffer *fb,
 			  struct drm_display_mode *mode,
 			  struct drm_atomic_state *state)
 {
-	struct drm_device *dev = crtc->dev;
 	struct intel_encoder *encoder;
 	struct intel_connector *connector;
 	struct drm_connector_state *connector_state;
 	struct intel_crtc_state *pipe_config;
-	int plane_bpp, ret = -EINVAL;
+	int base_bpp, ret = -EINVAL;
 	int i;
 	bool retry = true;
 
-	if (!check_encoder_cloning(to_intel_crtc(crtc))) {
+	if (!check_encoder_cloning(state, to_intel_crtc(crtc))) {
 		DRM_DEBUG_KMS("rejecting invalid cloning configuration\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (!check_digital_port_conflicts(dev)) {
+	if (!check_digital_port_conflicts(state)) {
 		DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -10676,9 +11374,9 @@
 	 * plane pixel format and any sink constraints into account. Returns the
 	 * source plane bpp so that dithering can be selected on mismatches
 	 * after encoders and crtc also have had their say. */
-	plane_bpp = compute_baseline_pipe_bpp(to_intel_crtc(crtc),
-					      fb, pipe_config);
-	if (plane_bpp < 0)
+	base_bpp = compute_baseline_pipe_bpp(to_intel_crtc(crtc),
+					     pipe_config);
+	if (base_bpp < 0)
 		goto fail;
 
 	/*
@@ -10746,9 +11444,9 @@
 		goto encoder_retry;
 	}
 
-	pipe_config->dither = pipe_config->pipe_bpp != plane_bpp;
+	pipe_config->dither = pipe_config->pipe_bpp != base_bpp;
 	DRM_DEBUG_KMS("plane bpp: %i, pipe bpp: %i, dithering: %i\n",
-		      plane_bpp, pipe_config->pipe_bpp, pipe_config->dither);
+		      base_bpp, pipe_config->pipe_bpp, pipe_config->dither);
 
 	return pipe_config;
 fail:
@@ -10880,9 +11578,6 @@
 	/* Double check state. */
 	for_each_intel_crtc(dev, intel_crtc) {
 		WARN_ON(intel_crtc->base.state->enable != intel_crtc_in_use(&intel_crtc->base));
-		WARN_ON(intel_crtc->new_config &&
-			intel_crtc->new_config != intel_crtc->config);
-		WARN_ON(intel_crtc->base.state->enable != !!intel_crtc->new_config);
 	}
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
@@ -11087,6 +11782,8 @@
 		PIPE_CONF_CHECK_I(pch_pfit.size);
 	}
 
+	PIPE_CONF_CHECK_I(scaler_state.scaler_id);
+
 	/* BDW+ don't expose a synchronous way to read the state */
 	if (IS_HASWELL(dev))
 		PIPE_CONF_CHECK_I(ips_enabled);
@@ -11429,7 +12126,6 @@
 static struct intel_crtc_state *
 intel_modeset_compute_config(struct drm_crtc *crtc,
 			     struct drm_display_mode *mode,
-			     struct drm_framebuffer *fb,
 			     struct drm_atomic_state *state,
 			     unsigned *modeset_pipes,
 			     unsigned *prepare_pipes,
@@ -11467,10 +12163,12 @@
 		if (WARN_ON(&intel_crtc->base != crtc))
 			continue;
 
-		pipe_config = intel_modeset_pipe_config(crtc, fb, mode, state);
+		pipe_config = intel_modeset_pipe_config(crtc, mode, state);
 		if (IS_ERR(pipe_config))
 			return pipe_config;
 
+		pipe_config->base.enable = true;
+
 		intel_dump_pipe_config(to_intel_crtc(crtc), pipe_config,
 				       "[modeset]");
 	}
@@ -11478,10 +12176,11 @@
 	return intel_atomic_get_crtc_state(state, to_intel_crtc(crtc));;
 }
 
-static int __intel_set_mode_setup_plls(struct drm_device *dev,
+static int __intel_set_mode_setup_plls(struct drm_atomic_state *state,
 				       unsigned modeset_pipes,
 				       unsigned disable_pipes)
 {
+	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	unsigned clear_pipes = modeset_pipes | disable_pipes;
 	struct intel_crtc *intel_crtc;
@@ -11495,9 +12194,15 @@
 		goto done;
 
 	for_each_intel_crtc_masked(dev, modeset_pipes, intel_crtc) {
-		struct intel_crtc_state *state = intel_crtc->new_config;
+		struct intel_crtc_state *crtc_state =
+			intel_atomic_get_crtc_state(state, intel_crtc);
+
+		/* Modeset pipes should have a new state by now */
+		if (WARN_ON(IS_ERR(crtc_state)))
+			continue;
+
 		ret = dev_priv->display.crtc_compute_clock(intel_crtc,
-							   state);
+							   crtc_state);
 		if (ret) {
 			intel_shared_dpll_abort_config(dev_priv);
 			goto done;
@@ -11519,6 +12224,7 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_display_mode *saved_mode;
+	struct drm_atomic_state *state = pipe_config->base.state;
 	struct intel_crtc_state *crtc_state_copy = NULL;
 	struct intel_crtc *intel_crtc;
 	int ret = 0;
@@ -11535,9 +12241,6 @@
 
 	*saved_mode = crtc->mode;
 
-	if (modeset_pipes)
-		to_intel_crtc(crtc)->new_config = pipe_config;
-
 	/*
 	 * See if the config requires any additional preparation, e.g.
 	 * to adjust global state with pipes off.  We need to do this
@@ -11545,14 +12248,16 @@
 	 * mode set on this crtc.  For other crtcs we need to use the
 	 * adjusted_mode bits in the crtc directly.
 	 */
-	if (IS_VALLEYVIEW(dev)) {
-		valleyview_modeset_global_pipes(dev, &prepare_pipes);
+	if (IS_VALLEYVIEW(dev) || IS_BROXTON(dev)) {
+		ret = valleyview_modeset_global_pipes(state, &prepare_pipes);
+		if (ret)
+			goto done;
 
 		/* may have added more to prepare_pipes than we should */
 		prepare_pipes &= ~disable_pipes;
 	}
 
-	ret = __intel_set_mode_setup_plls(dev, modeset_pipes, disable_pipes);
+	ret = __intel_set_mode_setup_plls(state, modeset_pipes, disable_pipes);
 	if (ret)
 		goto done;
 
@@ -11590,21 +12295,18 @@
 	 * update the the output configuration. */
 	intel_modeset_update_state(dev, prepare_pipes);
 
-	modeset_update_crtc_power_domains(pipe_config->base.state);
+	modeset_update_crtc_power_domains(state);
 
-	/* Set up the DPLL and any encoders state that needs to adjust or depend
-	 * on the DPLL.
-	 */
 	for_each_intel_crtc_masked(dev, modeset_pipes, intel_crtc) {
 		struct drm_plane *primary = intel_crtc->base.primary;
 		int vdisplay, hdisplay;
 
 		drm_crtc_get_hv_timing(mode, &hdisplay, &vdisplay);
-		ret = primary->funcs->update_plane(primary, &intel_crtc->base,
-						   fb, 0, 0,
-						   hdisplay, vdisplay,
-						   x << 16, y << 16,
-						   hdisplay << 16, vdisplay << 16);
+		ret = drm_plane_helper_update(primary, &intel_crtc->base,
+					      fb, 0, 0,
+					      hdisplay, vdisplay,
+					      x << 16, y << 16,
+					      hdisplay << 16, vdisplay << 16);
 	}
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
@@ -11628,9 +12330,6 @@
 		       sizeof *crtc_state_copy);
 		intel_crtc->config = crtc_state_copy;
 		intel_crtc->base.state = &crtc_state_copy->base;
-
-		if (modeset_pipes)
-			intel_crtc->new_config = intel_crtc->config;
 	} else {
 		kfree(crtc_state_copy);
 	}
@@ -11667,7 +12366,7 @@
 	unsigned modeset_pipes, prepare_pipes, disable_pipes;
 	int ret = 0;
 
-	pipe_config = intel_modeset_compute_config(crtc, mode, fb, state,
+	pipe_config = intel_modeset_compute_config(crtc, mode, state,
 						   &modeset_pipes,
 						   &prepare_pipes,
 						   &disable_pipes);
@@ -11809,11 +12508,6 @@
 	count = 0;
 	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = config->save_crtc_enabled[count++];
-
-		if (crtc->new_enabled)
-			crtc->new_config = crtc->config;
-		else
-			crtc->new_config = NULL;
 	}
 
 	count = 0;
@@ -12021,6 +12715,7 @@
 				connector->encoder = connector->new_encoder;
 		} else {
 			connector_state->crtc = NULL;
+			connector_state->best_encoder = NULL;
 		}
 	}
 	for_each_intel_crtc(dev, crtc) {
@@ -12039,11 +12734,6 @@
 				      crtc->new_enabled ? "en" : "dis");
 			config->mode_changed = true;
 		}
-
-		if (crtc->new_enabled)
-			crtc->new_config = crtc->config;
-		else
-			crtc->new_config = NULL;
 	}
 
 	return 0;
@@ -12070,7 +12760,6 @@
 	}
 
 	crtc->new_enabled = false;
-	crtc->new_config = NULL;
 }
 
 static int intel_crtc_set_config(struct drm_mode_set *set)
@@ -12135,7 +12824,7 @@
 		goto fail;
 
 	pipe_config = intel_modeset_compute_config(set->crtc, set->mode,
-						   set->fb, state,
+						   state,
 						   &modeset_pipes,
 						   &prepare_pipes,
 						   &disable_pipes);
@@ -12168,10 +12857,10 @@
 		int vdisplay, hdisplay;
 
 		drm_crtc_get_hv_timing(set->mode, &hdisplay, &vdisplay);
-		ret = primary->funcs->update_plane(primary, set->crtc, set->fb,
-						   0, 0, hdisplay, vdisplay,
-						   set->x << 16, set->y << 16,
-						   hdisplay << 16, vdisplay << 16);
+		ret = drm_plane_helper_update(primary, set->crtc, set->fb,
+					      0, 0, hdisplay, vdisplay,
+					      set->x << 16, set->y << 16,
+					      hdisplay << 16, vdisplay << 16);
 
 		/*
 		 * We need to make sure the primary plane is re-enabled if it
@@ -12456,16 +13145,21 @@
 	struct drm_rect *dest = &state->dst;
 	struct drm_rect *src = &state->src;
 	const struct drm_rect *clip = &state->clip;
+	bool can_position = false;
 	int ret;
 
 	crtc = crtc ? crtc : plane->crtc;
 	intel_crtc = to_intel_crtc(crtc);
 
+	if (INTEL_INFO(dev)->gen >= 9)
+		can_position = true;
+
 	ret = drm_plane_helper_check_update(plane, crtc, fb,
 					    src, dest, clip,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    DRM_PLANE_HELPER_NO_SCALING,
-					    false, true, &state->visible);
+					    can_position, true,
+					    &state->visible);
 	if (ret)
 		return ret;
 
@@ -12655,8 +13349,8 @@
 }
 
 const struct drm_plane_funcs intel_plane_funcs = {
-	.update_plane = drm_plane_helper_update,
-	.disable_plane = drm_plane_helper_disable,
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
 	.set_property = drm_atomic_helper_plane_set_property,
 	.atomic_get_property = intel_plane_atomic_get_property,
@@ -12687,10 +13381,12 @@
 
 	primary->can_scale = false;
 	primary->max_downscale = 1;
+	state->scaler_id = -1;
 	primary->pipe = pipe;
 	primary->plane = pipe;
 	primary->check_plane = intel_check_primary_plane;
 	primary->commit_plane = intel_commit_primary_plane;
+	primary->ckey.flags = I915_SET_COLORKEY_NONE;
 	if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4)
 		primary->plane = !pipe;
 
@@ -12707,23 +13403,32 @@
 				 intel_primary_formats, num_formats,
 				 DRM_PLANE_TYPE_PRIMARY);
 
-	if (INTEL_INFO(dev)->gen >= 4) {
-		if (!dev->mode_config.rotation_property)
-			dev->mode_config.rotation_property =
-				drm_mode_create_rotation_property(dev,
-							BIT(DRM_ROTATE_0) |
-							BIT(DRM_ROTATE_180));
-		if (dev->mode_config.rotation_property)
-			drm_object_attach_property(&primary->base.base,
-				dev->mode_config.rotation_property,
-				state->base.rotation);
-	}
+	if (INTEL_INFO(dev)->gen >= 4)
+		intel_create_rotation_property(dev, primary);
 
 	drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs);
 
 	return &primary->base;
 }
 
+void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane)
+{
+	if (!dev->mode_config.rotation_property) {
+		unsigned long flags = BIT(DRM_ROTATE_0) |
+			BIT(DRM_ROTATE_180);
+
+		if (INTEL_INFO(dev)->gen >= 9)
+			flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270);
+
+		dev->mode_config.rotation_property =
+			drm_mode_create_rotation_property(dev, flags);
+	}
+	if (dev->mode_config.rotation_property)
+		drm_object_attach_property(&plane->base.base,
+				dev->mode_config.rotation_property,
+				plane->base.state->rotation);
+}
+
 static int
 intel_check_cursor_plane(struct drm_plane *plane,
 			 struct intel_plane_state *state)
@@ -12841,6 +13546,7 @@
 	cursor->max_downscale = 1;
 	cursor->pipe = pipe;
 	cursor->plane = pipe;
+	state->scaler_id = -1;
 	cursor->check_plane = intel_check_cursor_plane;
 	cursor->commit_plane = intel_commit_cursor_plane;
 
@@ -12867,6 +13573,24 @@
 	return &cursor->base;
 }
 
+static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state)
+{
+	int i;
+	struct intel_scaler *intel_scaler;
+	struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state;
+
+	for (i = 0; i < intel_crtc->num_scalers; i++) {
+		intel_scaler = &scaler_state->scalers[i];
+		intel_scaler->in_use = 0;
+		intel_scaler->id = i;
+
+		intel_scaler->mode = PS_SCALER_MODE_DYN;
+	}
+
+	scaler_state->scaler_id = -1;
+}
+
 static void intel_crtc_init(struct drm_device *dev, int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -12886,6 +13610,16 @@
 	intel_crtc_set_state(intel_crtc, crtc_state);
 	crtc_state->base.crtc = &intel_crtc->base;
 
+	/* initialize shared scalers */
+	if (INTEL_INFO(dev)->gen >= 9) {
+		if (pipe == PIPE_C)
+			intel_crtc->num_scalers = 1;
+		else
+			intel_crtc->num_scalers = SKL_NUM_SCALERS;
+
+		skl_init_scalers(dev, intel_crtc, crtc_state);
+	}
+
 	primary = intel_primary_plane_create(dev, pipe);
 	if (!primary)
 		goto fail;
@@ -13038,7 +13772,16 @@
 	if (intel_crt_present(dev))
 		intel_crt_init(dev);
 
-	if (HAS_DDI(dev)) {
+	if (IS_BROXTON(dev)) {
+		/*
+		 * FIXME: Broxton doesn't support port detection via the
+		 * DDI_BUF_CTL_A or SFUSE_STRAP registers, find another way to
+		 * detect the ports.
+		 */
+		intel_ddi_init(dev, PORT_A);
+		intel_ddi_init(dev, PORT_B);
+		intel_ddi_init(dev, PORT_C);
+	} else if (HAS_DDI(dev)) {
 		int found;
 
 		/*
@@ -13474,10 +14217,23 @@
 	}
 
 	/* Returns the core display clock speed */
-	if (IS_VALLEYVIEW(dev))
+	if (IS_SKYLAKE(dev))
+		dev_priv->display.get_display_clock_speed =
+			skylake_get_display_clock_speed;
+	else if (IS_BROADWELL(dev))
+		dev_priv->display.get_display_clock_speed =
+			broadwell_get_display_clock_speed;
+	else if (IS_HASWELL(dev))
+		dev_priv->display.get_display_clock_speed =
+			haswell_get_display_clock_speed;
+	else if (IS_VALLEYVIEW(dev))
 		dev_priv->display.get_display_clock_speed =
 			valleyview_get_display_clock_speed;
-	else if (IS_I945G(dev) || (IS_G33(dev) && !IS_PINEVIEW_M(dev)))
+	else if (IS_GEN5(dev))
+		dev_priv->display.get_display_clock_speed =
+			ilk_get_display_clock_speed;
+	else if (IS_I945G(dev) || IS_BROADWATER(dev) ||
+		 IS_GEN6(dev) || IS_IVYBRIDGE(dev) || (IS_G33(dev) && !IS_PINEVIEW_M(dev)))
 		dev_priv->display.get_display_clock_speed =
 			i945_get_display_clock_speed;
 	else if (IS_I915G(dev))
@@ -13514,6 +14270,9 @@
 	} else if (IS_VALLEYVIEW(dev)) {
 		dev_priv->display.modeset_global_resources =
 			valleyview_modeset_global_resources;
+	} else if (IS_BROXTON(dev)) {
+		dev_priv->display.modeset_global_resources =
+			broxton_modeset_global_resources;
 	}
 
 	switch (INTEL_INFO(dev)->gen) {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index d023710..937ba31 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -696,15 +696,13 @@
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (index)
 		return 0;
 
 	if (intel_dig_port->port == PORT_A) {
-		if (IS_GEN6(dev) || IS_GEN7(dev))
-			return 200; /* SNB & IVB eDP input clock at 400Mhz */
-		else
-			return 225; /* eDP input clock at 450Mhz */
+		return DIV_ROUND_UP(dev_priv->display.get_display_clock_speed(dev), 2000);
 	} else {
 		return DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
 	}
@@ -719,7 +717,7 @@
 	if (intel_dig_port->port == PORT_A) {
 		if (index)
 			return 0;
-		return DIV_ROUND_CLOSEST(intel_ddi_get_cdclk_freq(dev_priv), 2000);
+		return DIV_ROUND_CLOSEST(dev_priv->display.get_display_clock_speed(dev), 2000);
 	} else if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
 		/* Workaround for non-ULT HSW */
 		switch (index) {
@@ -876,9 +874,18 @@
 				   DP_AUX_CH_CTL_TIME_OUT_ERROR |
 				   DP_AUX_CH_CTL_RECEIVE_ERROR);
 
-			if (status & (DP_AUX_CH_CTL_TIME_OUT_ERROR |
-				      DP_AUX_CH_CTL_RECEIVE_ERROR))
+			if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR)
 				continue;
+
+			/* DP CTS 1.2 Core Rev 1.1, 4.2.1.1 & 4.2.1.2
+			 *   400us delay required for errors and timeouts
+			 *   Timeout errors from the HW already meet this
+			 *   requirement so skip to next iteration
+			 */
+			if (status & DP_AUX_CH_CTL_RECEIVE_ERROR) {
+				usleep_range(400, 500);
+				continue;
+			}
 			if (status & DP_AUX_CH_CTL_DONE)
 				break;
 		}
@@ -1353,6 +1360,14 @@
 	if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) {
 		intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
 				       adjusted_mode);
+
+		if (INTEL_INFO(dev)->gen >= 9) {
+			int ret;
+			ret = skl_update_scaler_users(intel_crtc, pipe_config, NULL, NULL, 0);
+			if (ret)
+				return ret;
+		}
+
 		if (!HAS_PCH_SPLIT(dev))
 			intel_gmch_panel_fitting(intel_crtc, pipe_config,
 						 intel_connector->panel.fitting_mode);
@@ -1465,6 +1480,8 @@
 
 	if (IS_SKYLAKE(dev) && is_edp(intel_dp))
 		skl_edp_set_pll_config(pipe_config, common_rates[clock]);
+	else if (IS_BROXTON(dev))
+		/* handled in ddi */;
 	else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		hsw_dp_set_ddi_pll_sel(pipe_config, intel_dp->link_bw);
 	else
@@ -2874,7 +2891,9 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum port port = dp_to_dig_port(intel_dp)->port;
 
-	if (INTEL_INFO(dev)->gen >= 9) {
+	if (IS_BROXTON(dev))
+		return DP_TRAIN_VOLTAGE_SWING_LEVEL_3;
+	else if (INTEL_INFO(dev)->gen >= 9) {
 		if (dev_priv->vbt.edp_low_vswing && port == PORT_A)
 			return DP_TRAIN_VOLTAGE_SWING_LEVEL_3;
 		return DP_TRAIN_VOLTAGE_SWING_LEVEL_2;
@@ -2956,7 +2975,7 @@
 	}
 }
 
-static uint32_t intel_vlv_signal_levels(struct intel_dp *intel_dp)
+static uint32_t vlv_signal_levels(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3056,7 +3075,7 @@
 	return 0;
 }
 
-static uint32_t intel_chv_signal_levels(struct intel_dp *intel_dp)
+static uint32_t chv_signal_levels(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3263,7 +3282,7 @@
 }
 
 static uint32_t
-intel_gen4_signal_levels(uint8_t train_set)
+gen4_signal_levels(uint8_t train_set)
 {
 	uint32_t	signal_levels = 0;
 
@@ -3302,7 +3321,7 @@
 
 /* Gen6's DP voltage swing and pre-emphasis control */
 static uint32_t
-intel_gen6_edp_signal_levels(uint8_t train_set)
+gen6_edp_signal_levels(uint8_t train_set)
 {
 	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
 					 DP_TRAIN_PRE_EMPHASIS_MASK);
@@ -3330,7 +3349,7 @@
 
 /* Gen7's DP voltage swing and pre-emphasis control */
 static uint32_t
-intel_gen7_edp_signal_levels(uint8_t train_set)
+gen7_edp_signal_levels(uint8_t train_set)
 {
 	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
 					 DP_TRAIN_PRE_EMPHASIS_MASK);
@@ -3361,7 +3380,7 @@
 
 /* Gen7.5's (HSW) DP voltage swing and pre-emphasis control */
 static uint32_t
-intel_hsw_signal_levels(uint8_t train_set)
+hsw_signal_levels(uint8_t train_set)
 {
 	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
 					 DP_TRAIN_PRE_EMPHASIS_MASK);
@@ -3396,6 +3415,55 @@
 	}
 }
 
+static void bxt_signal_levels(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
+	enum port port = dport->port;
+	struct drm_device *dev = dport->base.base.dev;
+	struct intel_encoder *encoder = &dport->base;
+	uint8_t train_set = intel_dp->train_set[0];
+	uint32_t level = 0;
+
+	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
+					 DP_TRAIN_PRE_EMPHASIS_MASK);
+	switch (signal_levels) {
+	default:
+		DRM_DEBUG_KMS("Unsupported voltage swing/pre-emph level\n");
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0:
+		level = 0;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1:
+		level = 1;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2:
+		level = 2;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3:
+		level = 3;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0:
+		level = 4;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1:
+		level = 5;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2:
+		level = 6;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0:
+		level = 7;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1:
+		level = 8;
+		break;
+	case DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0:
+		level = 9;
+		break;
+	}
+
+	bxt_ddi_vswing_sequence(dev, level, port, encoder->type);
+}
+
 /* Properly updates "DP" with the correct signal levels. */
 static void
 intel_dp_set_signal_levels(struct intel_dp *intel_dp, uint32_t *DP)
@@ -3406,27 +3474,38 @@
 	uint32_t signal_levels, mask;
 	uint8_t train_set = intel_dp->train_set[0];
 
-	if (IS_HASWELL(dev) || IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9) {
-		signal_levels = intel_hsw_signal_levels(train_set);
+	if (IS_BROXTON(dev)) {
+		signal_levels = 0;
+		bxt_signal_levels(intel_dp);
+		mask = 0;
+	} else if (HAS_DDI(dev)) {
+		signal_levels = hsw_signal_levels(train_set);
 		mask = DDI_BUF_EMP_MASK;
 	} else if (IS_CHERRYVIEW(dev)) {
-		signal_levels = intel_chv_signal_levels(intel_dp);
+		signal_levels = chv_signal_levels(intel_dp);
 		mask = 0;
 	} else if (IS_VALLEYVIEW(dev)) {
-		signal_levels = intel_vlv_signal_levels(intel_dp);
+		signal_levels = vlv_signal_levels(intel_dp);
 		mask = 0;
 	} else if (IS_GEN7(dev) && port == PORT_A) {
-		signal_levels = intel_gen7_edp_signal_levels(train_set);
+		signal_levels = gen7_edp_signal_levels(train_set);
 		mask = EDP_LINK_TRAIN_VOL_EMP_MASK_IVB;
 	} else if (IS_GEN6(dev) && port == PORT_A) {
-		signal_levels = intel_gen6_edp_signal_levels(train_set);
+		signal_levels = gen6_edp_signal_levels(train_set);
 		mask = EDP_LINK_TRAIN_VOL_EMP_MASK_SNB;
 	} else {
-		signal_levels = intel_gen4_signal_levels(train_set);
+		signal_levels = gen4_signal_levels(train_set);
 		mask = DP_VOLTAGE_MASK | DP_PRE_EMPHASIS_MASK;
 	}
 
-	DRM_DEBUG_KMS("Using signal levels %08x\n", signal_levels);
+	if (mask)
+		DRM_DEBUG_KMS("Using signal levels %08x\n", signal_levels);
+
+	DRM_DEBUG_KMS("Using vswing level %d\n",
+		train_set & DP_TRAIN_VOLTAGE_SWING_MASK);
+	DRM_DEBUG_KMS("Using pre-emphasis level %d\n",
+		(train_set & DP_TRAIN_PRE_EMPHASIS_MASK) >>
+			DP_TRAIN_PRE_EMPHASIS_SHIFT);
 
 	*DP = (*DP & ~mask) | signal_levels;
 }
@@ -3782,6 +3861,21 @@
 			dev_priv->psr.sink_support = true;
 			DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
 		}
+
+		if (INTEL_INFO(dev)->gen >= 9 &&
+			(intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) {
+			uint8_t frame_sync_cap;
+
+			dev_priv->psr.sink_support = true;
+			intel_dp_dpcd_read_wake(&intel_dp->aux,
+					DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
+					&frame_sync_cap, 1);
+			dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false;
+			/* PSR2 needs frame sync as well */
+			dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
+			DRM_DEBUG_KMS("PSR2 %s on sink",
+				dev_priv->psr.psr2_support ? "supported" : "not supported");
+		}
 	}
 
 	/* Training Pattern 3 support, both source and sink */
@@ -3949,11 +4043,78 @@
 	return true;
 }
 
-static void
-intel_dp_handle_test_request(struct intel_dp *intel_dp)
+static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 {
-	/* NAK by default */
-	drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, DP_TEST_NAK);
+	uint8_t test_result = DP_TEST_ACK;
+	return test_result;
+}
+
+static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp)
+{
+	uint8_t test_result = DP_TEST_NAK;
+	return test_result;
+}
+
+static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
+{
+	uint8_t test_result = DP_TEST_NAK;
+	return test_result;
+}
+
+static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp)
+{
+	uint8_t test_result = DP_TEST_NAK;
+	return test_result;
+}
+
+static void intel_dp_handle_test_request(struct intel_dp *intel_dp)
+{
+	uint8_t response = DP_TEST_NAK;
+	uint8_t rxdata = 0;
+	int status = 0;
+
+	intel_dp->compliance_test_type = 0;
+	intel_dp->aux.i2c_nack_count = 0;
+	intel_dp->aux.i2c_defer_count = 0;
+
+	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1);
+	if (status <= 0) {
+		DRM_DEBUG_KMS("Could not read test request from sink\n");
+		goto update_status;
+	}
+
+	switch (rxdata) {
+	case DP_TEST_LINK_TRAINING:
+		DRM_DEBUG_KMS("LINK_TRAINING test requested\n");
+		intel_dp->compliance_test_type = DP_TEST_LINK_TRAINING;
+		response = intel_dp_autotest_link_training(intel_dp);
+		break;
+	case DP_TEST_LINK_VIDEO_PATTERN:
+		DRM_DEBUG_KMS("TEST_PATTERN test requested\n");
+		intel_dp->compliance_test_type = DP_TEST_LINK_VIDEO_PATTERN;
+		response = intel_dp_autotest_video_pattern(intel_dp);
+		break;
+	case DP_TEST_LINK_EDID_READ:
+		DRM_DEBUG_KMS("EDID test requested\n");
+		intel_dp->compliance_test_type = DP_TEST_LINK_EDID_READ;
+		response = intel_dp_autotest_edid(intel_dp);
+		break;
+	case DP_TEST_LINK_PHY_TEST_PATTERN:
+		DRM_DEBUG_KMS("PHY_PATTERN test requested\n");
+		intel_dp->compliance_test_type = DP_TEST_LINK_PHY_TEST_PATTERN;
+		response = intel_dp_autotest_phy_pattern(intel_dp);
+		break;
+	default:
+		DRM_DEBUG_KMS("Invalid test request '%02x'\n", rxdata);
+		break;
+	}
+
+update_status:
+	status = drm_dp_dpcd_write(&intel_dp->aux,
+				   DP_TEST_RESPONSE,
+				   &response, 1);
+	if (status <= 0)
+		DRM_DEBUG_KMS("Could not write test response to sink\n");
 }
 
 static int
@@ -5574,6 +5735,8 @@
 		I915_WRITE(PEG_BAND_GAP_DATA, (temp & ~0xf) | 0xd);
 	}
 
+	i915_debugfs_connector_add(connector);
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 5cb4748..3945057 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -150,14 +150,14 @@
 	enum port port = intel_dig_port->port;
 	int ret;
 	uint32_t temp;
-	struct intel_connector *found = NULL, *intel_connector;
+	struct intel_connector *found = NULL, *connector;
 	int slots;
 	struct drm_crtc *crtc = encoder->base.crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	for_each_intel_connector(dev, intel_connector) {
-		if (intel_connector->new_encoder == encoder) {
-			found = intel_connector;
+	for_each_intel_connector(dev, connector) {
+		if (connector->base.state->best_encoder == &encoder->base) {
+			found = connector;
 			break;
 		}
 	}
@@ -173,8 +173,10 @@
 	if (intel_dp->active_mst_links == 0) {
 		enum port port = intel_ddi_get_encoder_port(encoder);
 
-		I915_WRITE(PORT_CLK_SEL(port),
-			   intel_crtc->config->ddi_pll_sel);
+		/* FIXME: add support for SKL */
+		if (INTEL_INFO(dev)->gen < 9)
+			I915_WRITE(PORT_CLK_SEL(port),
+				   intel_crtc->config->ddi_pll_sel);
 
 		intel_ddi_init_dp_buf_reg(&intel_dig_port->base);
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 897f17d..43fe003 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -253,6 +253,26 @@
 	 * enable/disable the primary plane
 	 */
 	bool hides_primary;
+
+	/*
+	 * scaler_id
+	 *    = -1 : not using a scaler
+	 *    >=  0 : using a scalers
+	 *
+	 * plane requiring a scaler:
+	 *   - During check_plane, its bit is set in
+	 *     crtc_state->scaler_state.scaler_users by calling helper function
+	 *     update_scaler_users.
+	 *   - scaler_id indicates the scaler it got assigned.
+	 *
+	 * plane doesn't require a scaler:
+	 *   - this can happen when scaling is no more required or plane simply
+	 *     got disabled.
+	 *   - During check_plane, corresponding bit is reset in
+	 *     crtc_state->scaler_state.scaler_users by calling helper function
+	 *     update_scaler_users.
+	 */
+	int scaler_id;
 };
 
 struct intel_initial_plane_config {
@@ -262,6 +282,49 @@
 	u32 base;
 };
 
+#define SKL_MIN_SRC_W 8
+#define SKL_MAX_SRC_W 4096
+#define SKL_MIN_SRC_H 8
+#define SKL_MAX_SRC_H 2304
+#define SKL_MIN_DST_W 8
+#define SKL_MAX_DST_W 4096
+#define SKL_MIN_DST_H 8
+#define SKL_MAX_DST_H 2304
+
+struct intel_scaler {
+	int id;
+	int in_use;
+	uint32_t mode;
+};
+
+struct intel_crtc_scaler_state {
+#define SKL_NUM_SCALERS 2
+	struct intel_scaler scalers[SKL_NUM_SCALERS];
+
+	/*
+	 * scaler_users: keeps track of users requesting scalers on this crtc.
+	 *
+	 *     If a bit is set, a user is using a scaler.
+	 *     Here user can be a plane or crtc as defined below:
+	 *       bits 0-30 - plane (bit position is index from drm_plane_index)
+	 *       bit 31    - crtc
+	 *
+	 * Instead of creating a new index to cover planes and crtc, using
+	 * existing drm_plane_index for planes which is well less than 31
+	 * planes and bit 31 for crtc. This should be fine to cover all
+	 * our platforms.
+	 *
+	 * intel_atomic_setup_scalers will setup available scalers to users
+	 * requesting scalers. It will gracefully fail if request exceeds
+	 * avilability.
+	 */
+#define SKL_CRTC_INDEX 31
+	unsigned scaler_users;
+
+	/* scaler used by crtc for panel fitting purpose */
+	int scaler_id;
+};
+
 struct intel_crtc_state {
 	struct drm_crtc_state base;
 
@@ -388,6 +451,8 @@
 
 	bool dp_encoder_is_mst;
 	int pbn;
+
+	struct intel_crtc_scaler_state scaler_state;
 };
 
 struct intel_pipe_wm {
@@ -468,7 +533,6 @@
 
 	struct intel_initial_plane_config plane_config;
 	struct intel_crtc_state *config;
-	struct intel_crtc_state *new_config;
 	bool new_enabled;
 
 	/* reset counter value when the last flip was submitted */
@@ -490,6 +554,9 @@
 	struct intel_mmio_flip mmio_flip;
 
 	struct intel_crtc_atomic_commit atomic;
+
+	/* scalers available on this crtc */
+	int num_scalers;
 };
 
 struct intel_plane_wm_parameters {
@@ -669,6 +736,9 @@
 				     bool has_aux_irq,
 				     int send_bytes,
 				     uint32_t aux_clock_divider);
+
+	/* Displayport compliance testing */
+	unsigned long compliance_test_type;
 };
 
 struct intel_digital_port {
@@ -852,7 +922,6 @@
 void intel_ddi_init(struct drm_device *dev, enum port port);
 enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder);
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe);
-int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv);
 void intel_ddi_pll_init(struct drm_device *dev);
 void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc);
 void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
@@ -867,11 +936,15 @@
 void intel_ddi_fdi_disable(struct drm_crtc *crtc);
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config);
+struct intel_encoder *
+intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state);
 
 void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder);
 void intel_ddi_clock_get(struct intel_encoder *encoder,
 			 struct intel_crtc_state *pipe_config);
 void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state);
+void bxt_ddi_vswing_sequence(struct drm_device *dev, u32 level,
+				enum port port, int type);
 
 /* intel_frontbuffer.c */
 void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
@@ -997,6 +1070,12 @@
 	return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270));
 }
 
+unsigned int
+intel_tile_height(struct drm_device *dev, uint32_t bits_per_pixel,
+		  uint64_t fb_modifier);
+void intel_create_rotation_property(struct drm_device *dev,
+					struct intel_plane *plane);
+
 bool intel_wm_need_update(struct drm_plane *plane,
 			  struct drm_plane_state *state);
 
@@ -1037,6 +1116,13 @@
 void intel_finish_reset(struct drm_device *dev);
 void hsw_enable_pc8(struct drm_i915_private *dev_priv);
 void hsw_disable_pc8(struct drm_i915_private *dev_priv);
+void broxton_init_cdclk(struct drm_device *dev);
+void broxton_uninit_cdclk(struct drm_device *dev);
+void broxton_set_cdclk(struct drm_device *dev, int frequency);
+void broxton_ddi_phy_init(struct drm_device *dev);
+void broxton_ddi_phy_uninit(struct drm_device *dev);
+void bxt_enable_dc9(struct drm_i915_private *dev_priv);
+void bxt_disable_dc9(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n);
@@ -1044,6 +1130,8 @@
 void
 ironlake_check_encoder_dotclock(const struct intel_crtc_state *pipe_config,
 				int dotclock);
+bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock,
+			intel_clock_t *best_clock);
 bool intel_crtc_active(struct drm_crtc *crtc);
 void hsw_enable_ips(struct intel_crtc *crtc);
 void hsw_disable_ips(struct intel_crtc *crtc);
@@ -1053,6 +1141,10 @@
 				 struct intel_crtc_state *pipe_config);
 void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc);
 void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file);
+void skl_detach_scalers(struct intel_crtc *intel_crtc);
+int skl_update_scaler_users(struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state, struct intel_plane *intel_plane,
+	struct intel_plane_state *plane_state, int force_detach);
 
 unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane,
 				     struct drm_i915_gem_object *obj);
@@ -1215,6 +1307,7 @@
 void intel_psr_flush(struct drm_device *dev,
 			 unsigned frontbuffer_bits);
 void intel_psr_init(struct drm_device *dev);
+void intel_psr_single_frame_update(struct drm_device *dev);
 
 /* intel_runtime_pm.c */
 int intel_power_domains_init(struct drm_i915_private *);
@@ -1263,7 +1356,10 @@
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct drm_i915_private *dev_priv);
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+		    struct drm_i915_file_private *file_priv);
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+				       struct drm_i915_gem_request *rq);
 void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
@@ -1314,6 +1410,9 @@
 
 	return to_intel_crtc_state(crtc_state);
 }
+int intel_atomic_setup_scalers(struct drm_device *dev,
+	struct intel_crtc *intel_crtc,
+	struct intel_crtc_state *crtc_state);
 
 /* intel_atomic_plane.c */
 struct intel_plane_state *intel_create_plane_state(struct drm_plane *plane);
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 770040f..ece5bd7 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -80,7 +80,7 @@
 		.name = "ch7017",
 		.dvo_reg = DVOC,
 		.slave_addr = 0x75,
-		.gpio = GMBUS_PORT_DPB,
+		.gpio = GMBUS_PIN_DPB,
 		.dev_ops = &ch7017_ops,
 	},
 	{
@@ -364,7 +364,7 @@
 	 * that's not the case.
 	 */
 	intel_ddc_get_modes(connector,
-			    intel_gmbus_get_adapter(dev_priv, GMBUS_PORT_DPC));
+			    intel_gmbus_get_adapter(dev_priv, GMBUS_PIN_DPC));
 	if (!list_empty(&connector->probed_modes))
 		return 1;
 
@@ -495,17 +495,19 @@
 		struct i2c_adapter *i2c;
 		int gpio;
 		bool dvoinit;
+		enum pipe pipe;
+		uint32_t dpll[I915_MAX_PIPES];
 
 		/* Allow the I2C driver info to specify the GPIO to be used in
 		 * special cases, but otherwise default to what's defined
 		 * in the spec.
 		 */
-		if (intel_gmbus_is_port_valid(dvo->gpio))
+		if (intel_gmbus_is_valid_pin(dev_priv, dvo->gpio))
 			gpio = dvo->gpio;
 		else if (dvo->type == INTEL_DVO_CHIP_LVDS)
-			gpio = GMBUS_PORT_SSC;
+			gpio = GMBUS_PIN_SSC;
 		else
-			gpio = GMBUS_PORT_DPB;
+			gpio = GMBUS_PIN_DPB;
 
 		/* Set up the I2C bus necessary for the chip we're probing.
 		 * It appears that everything is on GPIOE except for panels
@@ -520,8 +522,23 @@
 		 */
 		intel_gmbus_force_bit(i2c, true);
 
+		/* ns2501 requires the DVO 2x clock before it will
+		 * respond to i2c accesses, so make sure we have
+		 * have the clock enabled before we attempt to
+		 * initialize the device.
+		 */
+		for_each_pipe(dev_priv, pipe) {
+			dpll[pipe] = I915_READ(DPLL(pipe));
+			I915_WRITE(DPLL(pipe), dpll[pipe] | DPLL_DVO_2X_MODE);
+		}
+
 		dvoinit = dvo->dev_ops->init(&intel_dvo->dev, i2c);
 
+		/* restore the DVO 2x clock state to original */
+		for_each_pipe(dev_priv, pipe) {
+			I915_WRITE(DPLL(pipe), dpll[pipe]);
+		}
+
 		intel_gmbus_force_bit(i2c, false);
 
 		if (!dvoinit)
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index a20cffb..57095f5 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -243,6 +243,8 @@
 	/* Remove stale busy bits due to the old buffer. */
 	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
 	mutex_unlock(&dev_priv->fb_tracking.lock);
+
+	intel_psr_single_frame_update(dev);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index bfbe07b..65bc386 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -223,10 +223,14 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
-	return val & VIDEO_DIP_ENABLE;
+	if (VIDEO_DIP_PORT(intel_dig_port->port) == (val & VIDEO_DIP_PORT_MASK))
+		return val & VIDEO_DIP_ENABLE;
+
+	return false;
 }
 
 static void cpt_write_infoframe(struct drm_encoder *encoder,
@@ -324,10 +328,14 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	int reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
-	return val & VIDEO_DIP_ENABLE;
+	if (VIDEO_DIP_PORT(intel_dig_port->port) == (val & VIDEO_DIP_PORT_MASK))
+		return val & VIDEO_DIP_ENABLE;
+
+	return false;
 }
 
 static void hsw_write_infoframe(struct drm_encoder *encoder,
@@ -1676,18 +1684,26 @@
 
 	switch (port) {
 	case PORT_B:
-		intel_hdmi->ddc_bus = GMBUS_PORT_DPB;
+		if (IS_BROXTON(dev_priv))
+			intel_hdmi->ddc_bus = GMBUS_PIN_1_BXT;
+		else
+			intel_hdmi->ddc_bus = GMBUS_PIN_DPB;
 		intel_encoder->hpd_pin = HPD_PORT_B;
 		break;
 	case PORT_C:
-		intel_hdmi->ddc_bus = GMBUS_PORT_DPC;
+		if (IS_BROXTON(dev_priv))
+			intel_hdmi->ddc_bus = GMBUS_PIN_2_BXT;
+		else
+			intel_hdmi->ddc_bus = GMBUS_PIN_DPC;
 		intel_encoder->hpd_pin = HPD_PORT_C;
 		break;
 	case PORT_D:
-		if (IS_CHERRYVIEW(dev))
-			intel_hdmi->ddc_bus = GMBUS_PORT_DPD_CHV;
+		if (WARN_ON(IS_BROXTON(dev_priv)))
+			intel_hdmi->ddc_bus = GMBUS_PIN_DISABLED;
+		else if (IS_CHERRYVIEW(dev_priv))
+			intel_hdmi->ddc_bus = GMBUS_PIN_DPD_CHV;
 		else
-			intel_hdmi->ddc_bus = GMBUS_PORT_DPD;
+			intel_hdmi->ddc_bus = GMBUS_PIN_DPD;
 		intel_encoder->hpd_pin = HPD_PORT_D;
 		break;
 	case PORT_A:
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 56e437e..3daa7e3 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -34,20 +34,50 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
-struct gmbus_port {
+struct gmbus_pin {
 	const char *name;
 	int reg;
 };
 
-static const struct gmbus_port gmbus_ports[] = {
-	{ "ssc", GPIOB },
-	{ "vga", GPIOA },
-	{ "panel", GPIOC },
-	{ "dpc", GPIOD },
-	{ "dpb", GPIOE },
-	{ "dpd", GPIOF },
+/* Map gmbus pin pairs to names and registers. */
+static const struct gmbus_pin gmbus_pins[] = {
+	[GMBUS_PIN_SSC] = { "ssc", GPIOB },
+	[GMBUS_PIN_VGADDC] = { "vga", GPIOA },
+	[GMBUS_PIN_PANEL] = { "panel", GPIOC },
+	[GMBUS_PIN_DPC] = { "dpc", GPIOD },
+	[GMBUS_PIN_DPB] = { "dpb", GPIOE },
+	[GMBUS_PIN_DPD] = { "dpd", GPIOF },
 };
 
+static const struct gmbus_pin gmbus_pins_bxt[] = {
+	[GMBUS_PIN_1_BXT] = { "dpb", PCH_GPIOB },
+	[GMBUS_PIN_2_BXT] = { "dpc", PCH_GPIOC },
+	[GMBUS_PIN_3_BXT] = { "misc", PCH_GPIOD },
+};
+
+/* pin is expected to be valid */
+static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv,
+					     unsigned int pin)
+{
+	if (IS_BROXTON(dev_priv))
+		return &gmbus_pins_bxt[pin];
+	else
+		return &gmbus_pins[pin];
+}
+
+bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
+			      unsigned int pin)
+{
+	unsigned int size;
+
+	if (IS_BROXTON(dev_priv))
+		size = ARRAY_SIZE(gmbus_pins_bxt);
+	else
+		size = ARRAY_SIZE(gmbus_pins);
+
+	return pin < size && get_gmbus_pin(dev_priv, pin)->reg;
+}
+
 /* Intel GPIO access functions */
 
 #define I2C_RISEFALL_TIME 10
@@ -182,15 +212,15 @@
 }
 
 static void
-intel_gpio_setup(struct intel_gmbus *bus, u32 pin)
+intel_gpio_setup(struct intel_gmbus *bus, unsigned int pin)
 {
 	struct drm_i915_private *dev_priv = bus->dev_priv;
 	struct i2c_algo_bit_data *algo;
 
 	algo = &bus->bit_algo;
 
-	/* -1 to map pin pair to gmbus index */
-	bus->gpio_reg = dev_priv->gpio_mmio_base + gmbus_ports[pin - 1].reg;
+	bus->gpio_reg = dev_priv->gpio_mmio_base +
+		get_gmbus_pin(dev_priv, pin)->reg;
 
 	bus->adapter.algo_data = algo;
 	algo->setsda = set_data;
@@ -563,7 +593,9 @@
 int intel_setup_gmbus(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret, i;
+	struct intel_gmbus *bus;
+	unsigned int pin;
+	int ret;
 
 	if (HAS_PCH_NOP(dev))
 		return 0;
@@ -577,16 +609,18 @@
 	mutex_init(&dev_priv->gmbus_mutex);
 	init_waitqueue_head(&dev_priv->gmbus_wait_queue);
 
-	for (i = 0; i < GMBUS_NUM_PORTS; i++) {
-		struct intel_gmbus *bus = &dev_priv->gmbus[i];
-		u32 port = i + 1; /* +1 to map gmbus index to pin pair */
+	for (pin = 0; pin < ARRAY_SIZE(dev_priv->gmbus); pin++) {
+		if (!intel_gmbus_is_valid_pin(dev_priv, pin))
+			continue;
+
+		bus = &dev_priv->gmbus[pin];
 
 		bus->adapter.owner = THIS_MODULE;
 		bus->adapter.class = I2C_CLASS_DDC;
 		snprintf(bus->adapter.name,
 			 sizeof(bus->adapter.name),
 			 "i915 gmbus %s",
-			 gmbus_ports[i].name);
+			 get_gmbus_pin(dev_priv, pin)->name);
 
 		bus->adapter.dev.parent = &dev->pdev->dev;
 		bus->dev_priv = dev_priv;
@@ -594,13 +628,13 @@
 		bus->adapter.algo = &gmbus_algorithm;
 
 		/* By default use a conservative clock rate */
-		bus->reg0 = port | GMBUS_RATE_100KHZ;
+		bus->reg0 = pin | GMBUS_RATE_100KHZ;
 
 		/* gmbus seems to be broken on i830 */
 		if (IS_I830(dev))
 			bus->force_bit = 1;
 
-		intel_gpio_setup(bus, port);
+		intel_gpio_setup(bus, pin);
 
 		ret = i2c_add_adapter(&bus->adapter);
 		if (ret)
@@ -612,20 +646,23 @@
 	return 0;
 
 err:
-	while (--i) {
-		struct intel_gmbus *bus = &dev_priv->gmbus[i];
+	while (--pin) {
+		if (!intel_gmbus_is_valid_pin(dev_priv, pin))
+			continue;
+
+		bus = &dev_priv->gmbus[pin];
 		i2c_del_adapter(&bus->adapter);
 	}
 	return ret;
 }
 
 struct i2c_adapter *intel_gmbus_get_adapter(struct drm_i915_private *dev_priv,
-					    unsigned port)
+					    unsigned int pin)
 {
-	WARN_ON(!intel_gmbus_is_port_valid(port));
-	/* -1 to map pin pair to gmbus index */
-	return (intel_gmbus_is_port_valid(port)) ?
-		&dev_priv->gmbus[port - 1].adapter : NULL;
+	if (WARN_ON(!intel_gmbus_is_valid_pin(dev_priv, pin)))
+		return NULL;
+
+	return &dev_priv->gmbus[pin].adapter;
 }
 
 void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed)
@@ -648,10 +685,14 @@
 void intel_teardown_gmbus(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int i;
+	struct intel_gmbus *bus;
+	unsigned int pin;
 
-	for (i = 0; i < GMBUS_NUM_PORTS; i++) {
-		struct intel_gmbus *bus = &dev_priv->gmbus[i];
+	for (pin = 0; pin < ARRAY_SIZE(dev_priv->gmbus); pin++) {
+		if (!intel_gmbus_is_valid_pin(dev_priv, pin))
+			continue;
+
+		bus = &dev_priv->gmbus[pin];
 		i2c_del_adapter(&bus->adapter);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 09df74b..732fd63 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -188,6 +188,15 @@
 #define GEN8_CTX_FORCE_RESTORE (1<<2)
 #define GEN8_CTX_L3LLC_COHERENT (1<<5)
 #define GEN8_CTX_PRIVILEGE (1<<8)
+
+#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
+	const u64 _addr = test_bit(n, ppgtt->pdp.used_pdpes) ? \
+		ppgtt->pdp.page_directory[n]->daddr : \
+		ppgtt->scratch_pd->daddr; \
+	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
+	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
+}
+
 enum {
 	ADVANCED_CONTEXT = 0,
 	LEGACY_CONTEXT,
@@ -265,7 +274,8 @@
 
 	desc = GEN8_CTX_VALID;
 	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
-	desc |= GEN8_CTX_L3LLC_COHERENT;
+	if (IS_GEN8(ctx_obj->base.dev))
+		desc |= GEN8_CTX_L3LLC_COHERENT;
 	desc |= GEN8_CTX_PRIVILEGE;
 	desc |= lrca;
 	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
@@ -305,21 +315,24 @@
 	desc[3] = (u32)(temp >> 32);
 	desc[2] = (u32)temp;
 
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	I915_WRITE(RING_ELSP(ring), desc[1]);
-	I915_WRITE(RING_ELSP(ring), desc[0]);
-	I915_WRITE(RING_ELSP(ring), desc[3]);
+	spin_lock(&dev_priv->uncore.lock);
+	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+	I915_WRITE_FW(RING_ELSP(ring), desc[1]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[0]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[3]);
 
 	/* The context is automatically loaded after the following */
-	I915_WRITE(RING_ELSP(ring), desc[2]);
+	I915_WRITE_FW(RING_ELSP(ring), desc[2]);
 
 	/* ELSP is a wo register, so use another nearby reg for posting instead */
-	POSTING_READ(RING_EXECLIST_STATUS(ring));
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	POSTING_READ_FW(RING_EXECLIST_STATUS(ring));
+	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+	spin_unlock(&dev_priv->uncore.lock);
 }
 
 static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
 				    struct drm_i915_gem_object *ring_obj,
+				    struct i915_hw_ppgtt *ppgtt,
 				    u32 tail)
 {
 	struct page *page;
@@ -331,6 +344,16 @@
 	reg_state[CTX_RING_TAIL+1] = tail;
 	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
+	/* True PPGTT with dynamic page allocation: update PDP registers and
+	 * point the unallocated PDPs to the scratch page
+	 */
+	if (ppgtt) {
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	}
+
 	kunmap_atomic(reg_state);
 
 	return 0;
@@ -349,7 +372,7 @@
 	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
 	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
 
-	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
+	execlists_update_context(ctx_obj0, ringbuf0->obj, to0->ppgtt, tail0);
 
 	if (to1) {
 		ringbuf1 = to1->engine[ring->id].ringbuf;
@@ -358,7 +381,7 @@
 		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
 		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
 
-		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
+		execlists_update_context(ctx_obj1, ringbuf1->obj, to1->ppgtt, tail1);
 	}
 
 	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -520,8 +543,6 @@
 				   struct drm_i915_gem_request *request)
 {
 	struct drm_i915_gem_request *cursor;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	unsigned long flags;
 	int num_elements = 0;
 
 	if (to != ring->default_context)
@@ -538,7 +559,6 @@
 		request->ring = ring;
 		request->ctx = to;
 		kref_init(&request->ref);
-		request->uniq = dev_priv->request_uniq++;
 		i915_gem_context_reference(request->ctx);
 	} else {
 		i915_gem_request_reference(request);
@@ -546,9 +566,7 @@
 	}
 	request->tail = tail;
 
-	intel_runtime_pm_get(dev_priv);
-
-	spin_lock_irqsave(&ring->execlist_lock, flags);
+	spin_lock_irq(&ring->execlist_lock);
 
 	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
 		if (++num_elements > 2)
@@ -574,7 +592,7 @@
 	if (num_elements == 0)
 		execlists_context_unqueue(ring);
 
-	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+	spin_unlock_irq(&ring->execlist_lock);
 
 	return 0;
 }
@@ -631,6 +649,173 @@
 	return logical_ring_invalidate_all_caches(ringbuf, ctx);
 }
 
+int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request,
+					    struct intel_context *ctx)
+{
+	int ret;
+
+	if (ctx != request->ring->default_context) {
+		ret = intel_lr_context_pin(request->ring, ctx);
+		if (ret)
+			return ret;
+	}
+
+	request->ringbuf = ctx->engine[request->ring->id].ringbuf;
+	request->ctx     = ctx;
+	i915_gem_context_reference(request->ctx);
+
+	return 0;
+}
+
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
+				       struct intel_context *ctx,
+				       int bytes)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_i915_gem_request *request;
+	int ret, new_space;
+
+	if (intel_ring_space(ringbuf) >= bytes)
+		return 0;
+
+	list_for_each_entry(request, &ring->request_list, list) {
+		/*
+		 * The request queue is per-engine, so can contain requests
+		 * from multiple ringbuffers. Here, we must ignore any that
+		 * aren't from the ringbuffer we're considering.
+		 */
+		struct intel_context *ctx = request->ctx;
+		if (ctx->engine[ring->id].ringbuf != ringbuf)
+			continue;
+
+		/* Would completion of this request free enough space? */
+		new_space = __intel_ring_space(request->postfix, ringbuf->tail,
+				       ringbuf->size);
+		if (new_space >= bytes)
+			break;
+	}
+
+	if (WARN_ON(&request->list == &ring->request_list))
+		return -ENOSPC;
+
+	ret = i915_wait_request(request);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests_ring(ring);
+
+	WARN_ON(intel_ring_space(ringbuf) < new_space);
+
+	return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC;
+}
+
+/*
+ * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
+ * @ringbuf: Logical Ringbuffer to advance.
+ *
+ * The tail is updated in our logical ringbuffer struct, not in the actual context. What
+ * really happens during submission is that the context and current tail will be placed
+ * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
+ * point, the tail *inside* the context is updated and the ELSP written to.
+ */
+static void
+intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
+				      struct intel_context *ctx,
+				      struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+
+	intel_logical_ring_advance(ringbuf);
+
+	if (intel_ring_stopped(ring))
+		return;
+
+	execlists_context_queue(ring, ctx, ringbuf->tail, request);
+}
+
+static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf,
+				    struct intel_context *ctx)
+{
+	uint32_t __iomem *virt;
+	int rem = ringbuf->size - ringbuf->tail;
+
+	if (ringbuf->space < rem) {
+		int ret = logical_ring_wait_for_space(ringbuf, ctx, rem);
+
+		if (ret)
+			return ret;
+	}
+
+	virt = ringbuf->virtual_start + ringbuf->tail;
+	rem /= 4;
+	while (rem--)
+		iowrite32(MI_NOOP, virt++);
+
+	ringbuf->tail = 0;
+	intel_ring_update_space(ringbuf);
+
+	return 0;
+}
+
+static int logical_ring_prepare(struct intel_ringbuffer *ringbuf,
+				struct intel_context *ctx, int bytes)
+{
+	int ret;
+
+	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
+		ret = logical_ring_wrap_buffer(ringbuf, ctx);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	if (unlikely(ringbuf->space < bytes)) {
+		ret = logical_ring_wait_for_space(ringbuf, ctx, bytes);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
+ *
+ * @ringbuf: Logical ringbuffer.
+ * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
+ *
+ * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
+ * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
+ * and also preallocates a request (every workload submission is still mediated through
+ * requests, same as it did with legacy ringbuffer submission).
+ *
+ * Return: non-zero if the ringbuffer is not ready to be written to.
+ */
+static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
+				    struct intel_context *ctx, int num_dwords)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible);
+	if (ret)
+		return ret;
+
+	ret = logical_ring_prepare(ringbuf, ctx, num_dwords * sizeof(uint32_t));
+	if (ret)
+		return ret;
+
+	/* Preallocate the olr before touching the ring */
+	ret = i915_gem_request_alloc(ring, ctx);
+	if (ret)
+		return ret;
+
+	ringbuf->space -= num_dwords * sizeof(uint32_t);
+	return 0;
+}
+
 /**
  * execlists_submission() - submit a batchbuffer for execution, Execlists style
  * @dev: DRM device.
@@ -742,8 +927,6 @@
 void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *req, *tmp;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	unsigned long flags;
 	struct list_head retired_list;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
@@ -751,9 +934,9 @@
 		return;
 
 	INIT_LIST_HEAD(&retired_list);
-	spin_lock_irqsave(&ring->execlist_lock, flags);
+	spin_lock_irq(&ring->execlist_lock);
 	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
-	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+	spin_unlock_irq(&ring->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
 		struct intel_context *ctx = req->ctx;
@@ -762,7 +945,6 @@
 
 		if (ctx_obj && (ctx != ring->default_context))
 			intel_lr_context_unpin(ring, ctx);
-		intel_runtime_pm_put(dev_priv);
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
 	}
@@ -807,30 +989,6 @@
 	return 0;
 }
 
-/*
- * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
- * @ringbuf: Logical Ringbuffer to advance.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static void
-intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
-				      struct intel_context *ctx,
-				      struct drm_i915_gem_request *request)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-
-	intel_logical_ring_advance(ringbuf);
-
-	if (intel_ring_stopped(ring))
-		return;
-
-	execlists_context_queue(ring, ctx, ringbuf->tail, request);
-}
-
 static int intel_lr_context_pin(struct intel_engine_cs *ring,
 		struct intel_context *ctx)
 {
@@ -875,219 +1033,6 @@
 	}
 }
 
-static int logical_ring_alloc_request(struct intel_engine_cs *ring,
-				      struct intel_context *ctx)
-{
-	struct drm_i915_gem_request *request;
-	struct drm_i915_private *dev_private = ring->dev->dev_private;
-	int ret;
-
-	if (ring->outstanding_lazy_request)
-		return 0;
-
-	request = kzalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL)
-		return -ENOMEM;
-
-	if (ctx != ring->default_context) {
-		ret = intel_lr_context_pin(ring, ctx);
-		if (ret) {
-			kfree(request);
-			return ret;
-		}
-	}
-
-	kref_init(&request->ref);
-	request->ring = ring;
-	request->uniq = dev_private->request_uniq++;
-
-	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
-	if (ret) {
-		intel_lr_context_unpin(ring, ctx);
-		kfree(request);
-		return ret;
-	}
-
-	request->ctx = ctx;
-	i915_gem_context_reference(request->ctx);
-	request->ringbuf = ctx->engine[ring->id].ringbuf;
-
-	ring->outstanding_lazy_request = request;
-	return 0;
-}
-
-static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
-				     int bytes)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_i915_gem_request *request;
-	int ret;
-
-	if (intel_ring_space(ringbuf) >= bytes)
-		return 0;
-
-	list_for_each_entry(request, &ring->request_list, list) {
-		/*
-		 * The request queue is per-engine, so can contain requests
-		 * from multiple ringbuffers. Here, we must ignore any that
-		 * aren't from the ringbuffer we're considering.
-		 */
-		struct intel_context *ctx = request->ctx;
-		if (ctx->engine[ring->id].ringbuf != ringbuf)
-			continue;
-
-		/* Would completion of this request free enough space? */
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= bytes) {
-			break;
-		}
-	}
-
-	if (&request->list == &ring->request_list)
-		return -ENOSPC;
-
-	ret = i915_wait_request(request);
-	if (ret)
-		return ret;
-
-	i915_gem_retire_requests_ring(ring);
-
-	return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC;
-}
-
-static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
-				       struct intel_context *ctx,
-				       int bytes)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long end;
-	int ret;
-
-	ret = logical_ring_wait_request(ringbuf, bytes);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* Force the context submission in case we have been skipping it */
-	intel_logical_ring_advance_and_submit(ringbuf, ctx, NULL);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	ret = 0;
-	do {
-		if (intel_ring_space(ringbuf) >= bytes)
-			break;
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-
-	return ret;
-}
-
-static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf,
-				    struct intel_context *ctx)
-{
-	uint32_t __iomem *virt;
-	int rem = ringbuf->size - ringbuf->tail;
-
-	if (ringbuf->space < rem) {
-		int ret = logical_ring_wait_for_space(ringbuf, ctx, rem);
-
-		if (ret)
-			return ret;
-	}
-
-	virt = ringbuf->virtual_start + ringbuf->tail;
-	rem /= 4;
-	while (rem--)
-		iowrite32(MI_NOOP, virt++);
-
-	ringbuf->tail = 0;
-	intel_ring_update_space(ringbuf);
-
-	return 0;
-}
-
-static int logical_ring_prepare(struct intel_ringbuffer *ringbuf,
-				struct intel_context *ctx, int bytes)
-{
-	int ret;
-
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = logical_ring_wrap_buffer(ringbuf, ctx);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = logical_ring_wait_for_space(ringbuf, ctx, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	return 0;
-}
-
-/**
- * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
- *
- * @ringbuf: Logical ringbuffer.
- * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
- *
- * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
- * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
- * and also preallocates a request (every workload submission is still mediated through
- * requests, same as it did with legacy ringbuffer submission).
- *
- * Return: non-zero if the ringbuffer is not ready to be written to.
- */
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
-			     struct intel_context *ctx, int num_dwords)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
-	ret = logical_ring_prepare(ringbuf, ctx, num_dwords * sizeof(uint32_t));
-	if (ret)
-		return ret;
-
-	/* Preallocate the olr before touching the ring */
-	ret = logical_ring_alloc_request(ring, ctx);
-	if (ret)
-		return ret;
-
-	ringbuf->space -= num_dwords * sizeof(uint32_t);
-	return 0;
-}
-
 static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
 					       struct intel_context *ctx)
 {
@@ -1282,6 +1227,7 @@
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	bool vf_flush_wa;
 	u32 flags = 0;
 	int ret;
 
@@ -1303,10 +1249,26 @@
 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 	}
 
-	ret = intel_logical_ring_begin(ringbuf, ctx, 6);
+	/*
+	 * On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
+	 * control.
+	 */
+	vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
+		      flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
+
+	ret = intel_logical_ring_begin(ringbuf, ctx, vf_flush_wa ? 12 : 6);
 	if (ret)
 		return ret;
 
+	if (vf_flush_wa) {
+		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+	}
+
 	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
 	intel_logical_ring_emit(ringbuf, flags);
 	intel_logical_ring_emit(ringbuf, scratch_addr);
@@ -1437,6 +1399,7 @@
 		ring->cleanup(ring);
 
 	i915_cmd_parser_fini_ring(ring);
+	i915_gem_batch_pool_fini(&ring->batch_pool);
 
 	if (ring->status_page.obj) {
 		kunmap(sg_page(ring->status_page.obj->pages->sgl));
@@ -1454,6 +1417,7 @@
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
@@ -1806,14 +1770,14 @@
 	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
 	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
 	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
-	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr);
-	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr);
-	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr);
-	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr);
-	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr);
-	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr);
-	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr);
-	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr);
+
+	/* With dynamic page allocation, PDPs may not be allocated at this point,
+	 * Point the unallocated PDPs to the scratch page
+	 */
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
+	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
@@ -1930,7 +1894,7 @@
 
 	context_size = round_up(get_lr_context_size(ring), 4096);
 
-	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
+	ctx_obj = i915_gem_alloc_object(dev, context_size);
 	if (IS_ERR(ctx_obj)) {
 		ret = PTR_ERR(ctx_obj);
 		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index adb731e4..04d3a6d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -36,6 +36,8 @@
 #define RING_CONTEXT_STATUS_PTR(ring)	((ring)->mmio_base+0x3a0)
 
 /* Logical Rings */
+int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request,
+					    struct intel_context *ctx);
 void intel_logical_ring_stop(struct intel_engine_cs *ring);
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_init(struct drm_device *dev);
@@ -63,9 +65,6 @@
 	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
-			     struct intel_context *ctx,
-			     int num_dwords);
 
 /* Logical Ring Contexts */
 void intel_lr_context_free(struct intel_context *ctx);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 5abda1d..9a9df0f 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -781,7 +781,7 @@
 		    child->device_type != DEVICE_TYPE_LFP)
 			continue;
 
-		if (intel_gmbus_is_port_valid(child->i2c_pin))
+		if (intel_gmbus_is_valid_pin(dev_priv, child->i2c_pin))
 			*i2c_pin = child->i2c_pin;
 
 		/* However, we cannot trust the BIOS writers to populate
@@ -921,7 +921,7 @@
 	if (dmi_check_system(intel_no_lvds))
 		return;
 
-	pin = GMBUS_PORT_PANEL;
+	pin = GMBUS_PIN_PANEL;
 	if (!lvds_is_present_in_vbt(dev, &pin)) {
 		DRM_DEBUG_KMS("LVDS is not present in VBT\n");
 		return;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index dd92122..5fd2d5a 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -172,10 +172,11 @@
 	struct intel_crtc *crtc;
 	struct drm_i915_gem_object *vid_bo;
 	struct drm_i915_gem_object *old_vid_bo;
-	int active;
-	int pfit_active;
+	bool active;
+	bool pfit_active;
 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
-	u32 color_key;
+	u32 color_key:24;
+	u32 color_key_enabled:1;
 	u32 brightness, contrast, saturation;
 	u32 old_xscale, old_yscale;
 	/* register access */
@@ -216,7 +217,7 @@
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->last_flip_req);
+	WARN_ON(overlay->last_flip_req);
 	i915_gem_request_assign(&overlay->last_flip_req,
 					     ring->outstanding_lazy_request);
 	ret = i915_add_request(ring);
@@ -241,15 +242,15 @@
 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->active);
-	overlay->active = 1;
-
+	WARN_ON(overlay->active);
 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
 
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
 
+	overlay->active = true;
+
 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
@@ -270,7 +271,7 @@
 	u32 tmp;
 	int ret;
 
-	BUG_ON(!overlay->active);
+	WARN_ON(!overlay->active);
 
 	if (load_polyphase_filter)
 		flip_addr |= OFC_UPDATE;
@@ -309,7 +310,8 @@
 	struct drm_i915_gem_object *obj = overlay->vid_bo;
 
 	/* never have the overlay hw on without showing a frame */
-	BUG_ON(!overlay->vid_bo);
+	if (WARN_ON(!obj))
+		return;
 
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
@@ -317,7 +319,7 @@
 
 	overlay->crtc->overlay = NULL;
 	overlay->crtc = NULL;
-	overlay->active = 0;
+	overlay->active = false;
 }
 
 /* overlay needs to be disabled in OCMD reg */
@@ -329,7 +331,7 @@
 	u32 flip_addr = overlay->flip_addr;
 	int ret;
 
-	BUG_ON(!overlay->active);
+	WARN_ON(!overlay->active);
 
 	/* According to intel docs the overlay hw may hang (when switching
 	 * off) without loading the filter coeffs. It is however unclear whether
@@ -629,31 +631,36 @@
 			    struct overlay_registers __iomem *regs)
 {
 	u32 key = overlay->color_key;
+	u32 flags;
+
+	flags = 0;
+	if (overlay->color_key_enabled)
+		flags |= DST_KEY_ENABLE;
 
 	switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
 	case 8:
-		iowrite32(0, &regs->DCLRKV);
-		iowrite32(CLK_RGB8I_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
+		key = 0;
+		flags |= CLK_RGB8I_MASK;
 		break;
 
 	case 16:
 		if (overlay->crtc->base.primary->fb->depth == 15) {
-			iowrite32(RGB15_TO_COLORKEY(key), &regs->DCLRKV);
-			iowrite32(CLK_RGB15_MASK | DST_KEY_ENABLE,
-				  &regs->DCLRKM);
+			key = RGB15_TO_COLORKEY(key);
+			flags |= CLK_RGB15_MASK;
 		} else {
-			iowrite32(RGB16_TO_COLORKEY(key), &regs->DCLRKV);
-			iowrite32(CLK_RGB16_MASK | DST_KEY_ENABLE,
-				  &regs->DCLRKM);
+			key = RGB16_TO_COLORKEY(key);
+			flags |= CLK_RGB16_MASK;
 		}
 		break;
 
 	case 24:
 	case 32:
-		iowrite32(key, &regs->DCLRKV);
-		iowrite32(CLK_RGB24_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
+		flags |= CLK_RGB24_MASK;
 		break;
 	}
+
+	iowrite32(key, &regs->DCLRKV);
+	iowrite32(flags, &regs->DCLRKM);
 }
 
 static u32 overlay_cmd_reg(struct put_image_params *params)
@@ -712,9 +719,8 @@
 	u32 swidth, swidthsw, sheight, ostride;
 	enum pipe pipe = overlay->crtc->pipe;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
-	BUG_ON(!overlay);
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	ret = intel_overlay_release_old_vid(overlay);
 	if (ret != 0)
@@ -824,8 +830,8 @@
 	struct drm_device *dev = overlay->dev;
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	ret = intel_overlay_recover_from_interrupt(overlay);
 	if (ret != 0)
@@ -1131,10 +1137,10 @@
 		/* line too wide, i.e. one-line-mode */
 		if (mode->hdisplay > 1024 &&
 		    intel_panel_fitter_pipe(dev) == crtc->pipe) {
-			overlay->pfit_active = 1;
+			overlay->pfit_active = true;
 			update_pfit_vscale_ratio(overlay);
 		} else
-			overlay->pfit_active = 0;
+			overlay->pfit_active = false;
 	}
 
 	ret = check_overlay_dst(overlay, put_image_rec);
@@ -1329,6 +1335,7 @@
 			I915_WRITE(OGAMC5, attrs->gamma5);
 		}
 	}
+	overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0;
 
 	ret = 0;
 out_unlock:
@@ -1392,6 +1399,7 @@
 
 	/* init all values */
 	overlay->color_key = 0x0101fe;
+	overlay->color_key_enabled = true;
 	overlay->brightness = -19;
 	overlay->contrast = 75;
 	overlay->saturation = 146;
@@ -1432,7 +1440,7 @@
 	/* The bo's should be free'd by the generic code already.
 	 * Furthermore modesetting teardown happens beforehand so the
 	 * hardware should be off already */
-	BUG_ON(dev_priv->overlay->active);
+	WARN_ON(dev_priv->overlay->active);
 
 	drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
 	kfree(dev_priv->overlay);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fa4ccb3..a7516ed 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -67,7 +67,7 @@
 
 	gen9_init_clock_gating(dev);
 
-	if (INTEL_REVID(dev) == SKL_REVID_A0) {
+	if (INTEL_REVID(dev) <= SKL_REVID_B0) {
 		/*
 		 * WaDisableSDEUnitClockGating:skl
 		 * WaSetGAPSunitClckGateDisable:skl
@@ -75,6 +75,10 @@
 		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 			   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
 			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+		/* WaDisableVFUnitClockGating:skl */
+		I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
+			   GEN6_VFUNIT_CLOCK_GATE_DISABLE);
 	}
 
 	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
@@ -94,6 +98,26 @@
 			   GEN8_LQSC_RO_PERF_DIS);
 }
 
+static void bxt_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	gen9_init_clock_gating(dev);
+
+	/*
+	 * FIXME:
+	 * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
+	 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
+	 */
+	 /* WaDisableSDEUnitClockGating:bxt */
+	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
+		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+
+	/* FIXME: apply on A0 only */
+	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+}
+
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1792,7 +1816,7 @@
 	linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
 				     mode->crtc_clock);
 	ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
-					 intel_ddi_get_cdclk_freq(dev_priv));
+					 dev_priv->display.get_display_clock_speed(dev_priv->dev));
 
 	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
 	       PIPE_WM_LINETIME_TIME(linetime);
@@ -2538,6 +2562,7 @@
  */
 
 #define SKL_DDB_SIZE		896	/* in blocks */
+#define BXT_DDB_SIZE		512
 
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
@@ -2556,7 +2581,10 @@
 		return;
 	}
 
-	ddb_size = SKL_DDB_SIZE;
+	if (IS_BROXTON(dev))
+		ddb_size = BXT_DDB_SIZE;
+	else
+		ddb_size = SKL_DDB_SIZE;
 
 	ddb_size -= 4; /* 4 blocks for bypass path allocation */
 
@@ -3178,7 +3206,7 @@
 {
 	struct drm_device *dev = dev_priv->dev;
 	struct skl_ddb_allocation *cur_ddb, *new_ddb;
-	bool reallocated[I915_MAX_PIPES] = {false, false, false};
+	bool reallocated[I915_MAX_PIPES] = {};
 	struct intel_crtc *crtc;
 	enum pipe pipe;
 
@@ -3930,6 +3958,8 @@
 		    GEN6_RP_DOWN_IDLE_AVG);
 
 	dev_priv->rps.power = new_power;
+	dev_priv->rps.up_threshold = threshold_up;
+	dev_priv->rps.down_threshold = threshold_down;
 	dev_priv->rps.last_adj = 0;
 }
 
@@ -4001,8 +4031,11 @@
 		      "Odd GPU freq value\n"))
 		val &= ~1;
 
-	if (val != dev_priv->rps.cur_freq)
+	if (val != dev_priv->rps.cur_freq) {
 		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		if (!IS_CHERRYVIEW(dev_priv))
+			gen6_set_rps_thresholds(dev_priv, val);
+	}
 
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
@@ -4051,6 +4084,7 @@
 				& GENFREQSTATUS) == 0, 100))
 		DRM_ERROR("timed out waiting for Punit\n");
 
+	gen6_set_rps_thresholds(dev_priv, val);
 	vlv_force_gfx_clock(dev_priv, false);
 
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
@@ -4081,10 +4115,14 @@
 		dev_priv->rps.last_adj = 0;
 		I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 	}
+
+	while (!list_empty(&dev_priv->rps.clients))
+		list_del_init(dev_priv->rps.clients.next);
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
-void gen6_rps_boost(struct drm_i915_private *dev_priv)
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+		    struct drm_i915_file_private *file_priv)
 {
 	u32 val;
 
@@ -4092,9 +4130,16 @@
 	val = dev_priv->rps.max_freq_softlimit;
 	if (dev_priv->rps.enabled &&
 	    dev_priv->mm.busy &&
-	    dev_priv->rps.cur_freq < val) {
+	    dev_priv->rps.cur_freq < val &&
+	    (file_priv == NULL || list_empty(&file_priv->rps_boost))) {
 		intel_set_rps(dev_priv->dev, val);
 		dev_priv->rps.last_adj = 0;
+
+		if (file_priv != NULL) {
+			list_add(&file_priv->rps_boost, &dev_priv->rps.clients);
+			file_priv->rps_boosts++;
+		} else
+			dev_priv->rps.boosts++;
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
@@ -4325,8 +4370,13 @@
 				   GEN6_RC_CTL_EI_MODE(1) |
 				   rc6_mask);
 
-	/* 3b: Enable Coarse Power Gating only when RC6 is enabled */
-	I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0);
+	/*
+	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
+	 * WaDisableRenderPowerGating:skl,bxt - Render PG need to be disabled with RC6.
+	 */
+	I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
+			GEN9_MEDIA_PG_ENABLE : 0);
+
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
@@ -4996,8 +5046,8 @@
 		I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
 	I915_WRITE(GEN6_RC_SLEEP, 0);
 
-	/* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
-	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
 
 	/* allows RC6 residency counter to work */
 	I915_WRITE(VLV_COUNTER_CONTROL,
@@ -6544,7 +6594,12 @@
 	if (INTEL_INFO(dev)->gen >= 9) {
 		skl_setup_wm_latency(dev);
 
-		dev_priv->display.init_clock_gating = skl_init_clock_gating;
+		if (IS_BROXTON(dev))
+			dev_priv->display.init_clock_gating =
+				bxt_init_clock_gating;
+		else if (IS_SKYLAKE(dev))
+			dev_priv->display.init_clock_gating =
+				skl_init_clock_gating;
 		dev_priv->display.update_wm = skl_update_wm;
 		dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
 	} else if (HAS_PCH_SPLIT(dev)) {
@@ -6762,6 +6817,41 @@
 		return val / GT_FREQUENCY_MULTIPLIER;
 }
 
+struct request_boost {
+	struct work_struct work;
+	struct drm_i915_gem_request *rq;
+};
+
+static void __intel_rps_boost_work(struct work_struct *work)
+{
+	struct request_boost *boost = container_of(work, struct request_boost, work);
+
+	if (!i915_gem_request_completed(boost->rq, true))
+		gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL);
+
+	i915_gem_request_unreference__unlocked(boost->rq);
+	kfree(boost);
+}
+
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+				       struct drm_i915_gem_request *rq)
+{
+	struct request_boost *boost;
+
+	if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+		return;
+
+	boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
+	if (boost == NULL)
+		return;
+
+	i915_gem_request_reference(rq);
+	boost->rq = rq;
+
+	INIT_WORK(&boost->work, __intel_rps_boost_work);
+	queue_work(to_i915(dev)->wq, &boost->work);
+}
+
 void intel_pm_setup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -6770,6 +6860,7 @@
 
 	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
 			  intel_gen6_powersave_work);
+	INIT_LIST_HEAD(&dev_priv->rps.clients);
 
 	dev_priv->pm.suspended = false;
 }
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index a8f9348..5ee0fa5 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -117,6 +117,19 @@
 	I915_WRITE(VLV_VSCSDP(pipe), val);
 }
 
+static void skl_psr_setup_su_vsc(struct intel_dp *intel_dp)
+{
+	struct edp_vsc_psr psr_vsc;
+
+	/* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */
+	memset(&psr_vsc, 0, sizeof(psr_vsc));
+	psr_vsc.sdp_header.HB0 = 0;
+	psr_vsc.sdp_header.HB1 = 0x7;
+	psr_vsc.sdp_header.HB2 = 0x3;
+	psr_vsc.sdp_header.HB3 = 0xb;
+	intel_psr_write_vsc(intel_dp, &psr_vsc);
+}
+
 static void hsw_psr_setup_vsc(struct intel_dp *intel_dp)
 {
 	struct edp_vsc_psr psr_vsc;
@@ -133,7 +146,7 @@
 static void vlv_psr_enable_sink(struct intel_dp *intel_dp)
 {
 	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-			   DP_PSR_ENABLE);
+			   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
 }
 
 static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
@@ -157,13 +170,14 @@
 
 	aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0);
 
-	/* Enable PSR in sink */
-	if (dev_priv->psr.link_standby)
-		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-				   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
-	else
-		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-				   DP_PSR_ENABLE & ~DP_PSR_MAIN_LINK_ACTIVE);
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
+			   DP_PSR_ENABLE & ~DP_PSR_MAIN_LINK_ACTIVE);
+
+	/* Enable AUX frame sync at sink */
+	if (dev_priv->psr.aux_frame_sync)
+		drm_dp_dpcd_writeb(&intel_dp->aux,
+				DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF,
+				DP_AUX_FRAME_SYNC_ENABLE);
 
 	aux_data_reg = (INTEL_INFO(dev)->gen >= 9) ?
 				DPA_AUX_CH_DATA1 : EDP_PSR_AUX_DATA1(dev);
@@ -183,8 +197,10 @@
 		val |= DP_AUX_CH_CTL_TIME_OUT_1600us;
 		val &= ~DP_AUX_CH_CTL_MESSAGE_SIZE_MASK;
 		val |= (sizeof(aux_msg) << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT);
-		/* Use hardcoded data values for PSR */
+		/* Use hardcoded data values for PSR, frame sync and GTC */
 		val &= ~DP_AUX_CH_CTL_PSR_DATA_AUX_REG_SKL;
+		val &= ~DP_AUX_CH_CTL_FS_DATA_AUX_REG_SKL;
+		val &= ~DP_AUX_CH_CTL_GTC_DATA_AUX_REG_SKL;
 		I915_WRITE(aux_ctl_reg, val);
 	} else {
 		I915_WRITE(aux_ctl_reg,
@@ -193,6 +209,8 @@
 		   (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) |
 		   (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT));
 	}
+
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, DP_PSR_ENABLE);
 }
 
 static void vlv_psr_enable_source(struct intel_dp *intel_dp)
@@ -232,6 +250,7 @@
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	uint32_t max_sleep_time = 0x1f;
 	/* Lately it was identified that depending on panel idle frame count
 	 * calculated at HW can be off by 1. So let's use what came
@@ -242,19 +261,25 @@
 	uint32_t val = 0x0;
 	const uint32_t link_entry_time = EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES;
 
-	if (dev_priv->psr.link_standby) {
-		val |= EDP_PSR_LINK_STANDBY;
+	if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT) {
+		/* It doesn't mean we shouldn't send TPS patters, so let's
+		   send the minimal TP1 possible and skip TP2. */
+		val |= EDP_PSR_TP1_TIME_100us;
 		val |= EDP_PSR_TP2_TP3_TIME_0us;
-		val |= EDP_PSR_TP1_TIME_0us;
 		val |= EDP_PSR_SKIP_AUX_EXIT;
-	} else
-		val |= EDP_PSR_LINK_DISABLE;
+		/* Sink should be able to train with the 5 or 6 idle patterns */
+		idle_frames += 4;
+	}
 
 	I915_WRITE(EDP_PSR_CTL(dev), val |
 		   (IS_BROADWELL(dev) ? 0 : link_entry_time) |
 		   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
 		   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
 		   EDP_PSR_ENABLE);
+
+	if (dev_priv->psr.psr2_support)
+		I915_WRITE(EDP_PSR2_CTL, EDP_PSR2_ENABLE |
+				EDP_SU_TRACK_ENABLE | EDP_PSR2_TP2_TIME_100);
 }
 
 static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
@@ -294,6 +319,12 @@
 		return false;
 	}
 
+	if (!IS_VALLEYVIEW(dev) && ((dev_priv->vbt.psr.full_link) ||
+				    (dig_port->port != PORT_A))) {
+		DRM_DEBUG_KMS("PSR condition failed: Link Standby requested/needed but not supported on this platform\n");
+		return false;
+	}
+
 	dev_priv->psr.source_ok = true;
 	return true;
 }
@@ -332,6 +363,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
 
 	if (!HAS_PSR(dev)) {
 		DRM_DEBUG_KMS("PSR not supported on this platform\n");
@@ -352,18 +384,20 @@
 	if (!intel_psr_match_conditions(intel_dp))
 		goto unlock;
 
-	/* First we check VBT, but we must respect sink and source
-	 * known restrictions */
-	dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link;
-	if ((intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT) ||
-	    (IS_BROADWELL(dev) && intel_dig_port->port != PORT_A))
-		dev_priv->psr.link_standby = true;
-
 	dev_priv->psr.busy_frontbuffer_bits = 0;
 
 	if (HAS_DDI(dev)) {
 		hsw_psr_setup_vsc(intel_dp);
 
+		if (dev_priv->psr.psr2_support) {
+			/* PSR2 is restricted to work with panel resolutions upto 3200x2000 */
+			if (crtc->config->pipe_src_w > 3200 ||
+				crtc->config->pipe_src_h > 2000)
+				dev_priv->psr.psr2_support = false;
+			else
+				skl_psr_setup_su_vsc(intel_dp);
+		}
+
 		/* Avoid continuous PSR exit by masking memup and hpd */
 		I915_WRITE(EDP_PSR_DEBUG_CTL(dev), EDP_PSR_DEBUG_MASK_MEMUP |
 			   EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP);
@@ -560,6 +594,48 @@
 }
 
 /**
+ * intel_psr_single_frame_update - Single Frame Update
+ * @dev: DRM device
+ *
+ * Some platforms support a single frame update feature that is used to
+ * send and update only one frame on Remote Frame Buffer.
+ * So far it is only implemented for Valleyview and Cherryview because
+ * hardware requires this to be done before a page flip.
+ */
+void intel_psr_single_frame_update(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	enum pipe pipe;
+	u32 val;
+
+	/*
+	 * Single frame update is already supported on BDW+ but it requires
+	 * many W/A and it isn't really needed.
+	 */
+	if (!IS_VALLEYVIEW(dev))
+		return;
+
+	mutex_lock(&dev_priv->psr.lock);
+	if (!dev_priv->psr.enabled) {
+		mutex_unlock(&dev_priv->psr.lock);
+		return;
+	}
+
+	crtc = dp_to_dig_port(dev_priv->psr.enabled)->base.base.crtc;
+	pipe = to_intel_crtc(crtc)->pipe;
+	val = I915_READ(VLV_PSRCTL(pipe));
+
+	/*
+	 * We need to set this bit before writing registers for a flip.
+	 * This bit will be self-clear when it gets to the PSR active state.
+	 */
+	I915_WRITE(VLV_PSRCTL(pipe), val | VLV_EDP_PSR_SINGLE_FRAME_UPDATE);
+
+	mutex_unlock(&dev_priv->psr.lock);
+}
+
+/**
  * intel_psr_invalidate - Invalidade PSR
  * @dev: DRM device
  * @frontbuffer_bits: frontbuffer plane tracking bits
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..de8c074 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -853,6 +853,9 @@
 			    GEN6_WIZ_HASHING_MASK,
 			    GEN6_WIZ_HASHING_16x4);
 
+	/* WaProgramL3SqcReg1Default:bdw */
+	WA_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+
 	return 0;
 }
 
@@ -966,6 +969,15 @@
 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
+	/*
+	 * FIXME: don't apply the following on BXT for stepping C. On BXT A0
+	 * the flag reads back as 0.
+	 */
+	/* WaDisableMaskBasedCammingInRCC:sklC,bxtA */
+	if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev))
+		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
+				  PIXEL_MASK_CAMMING_DISABLE);
+
 	return 0;
 }
 
@@ -1027,6 +1039,13 @@
 	return skl_tune_iz_hashing(ring);
 }
 
+static int bxt_init_workarounds(struct intel_engine_cs *ring)
+{
+	gen9_init_workarounds(ring);
+
+	return 0;
+}
+
 int init_workarounds_ring(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
@@ -1044,8 +1063,9 @@
 
 	if (IS_SKYLAKE(dev))
 		return skl_init_workarounds(ring);
-	else if (IS_GEN9(dev))
-		return gen9_init_workarounds(ring);
+
+	if (IS_BROXTON(dev))
+		return bxt_init_workarounds(ring);
 
 	return 0;
 }
@@ -1972,6 +1992,7 @@
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	ringbuf->size = 32 * PAGE_SIZE;
 	ringbuf->ring = ring;
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
@@ -2050,28 +2071,29 @@
 	cleanup_status_page(ring);
 
 	i915_cmd_parser_fini_ring(ring);
+	i915_gem_batch_pool_fini(&ring->batch_pool);
 
 	kfree(ringbuf);
 	ring->buffer = NULL;
 }
 
-static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
+static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
 	struct drm_i915_gem_request *request;
-	int ret;
+	int ret, new_space;
 
 	if (intel_ring_space(ringbuf) >= n)
 		return 0;
 
 	list_for_each_entry(request, &ring->request_list, list) {
-		if (__intel_ring_space(request->postfix, ringbuf->tail,
-				       ringbuf->size) >= n) {
+		new_space = __intel_ring_space(request->postfix, ringbuf->tail,
+				       ringbuf->size);
+		if (new_space >= n)
 			break;
-		}
 	}
 
-	if (&request->list == &ring->request_list)
+	if (WARN_ON(&request->list == &ring->request_list))
 		return -ENOSPC;
 
 	ret = i915_wait_request(request);
@@ -2080,61 +2102,11 @@
 
 	i915_gem_retire_requests_ring(ring);
 
+	WARN_ON(intel_ring_space(ringbuf) < new_space);
+
 	return 0;
 }
 
-static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
-{
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	unsigned long end;
-	int ret;
-
-	ret = intel_ring_wait_request(ring, n);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* force the tail write in case we have been skipping them */
-	__intel_ring_advance(ring);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	ret = 0;
-	trace_i915_ring_wait_begin(ring);
-	do {
-		if (intel_ring_space(ringbuf) >= n)
-			break;
-		ringbuf->head = I915_READ_HEAD(ring);
-		if (intel_ring_space(ringbuf) >= n)
-			break;
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-	trace_i915_ring_wait_end(ring);
-	return ret;
-}
-
 static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 {
 	uint32_t __iomem *virt;
@@ -2181,32 +2153,9 @@
 	return i915_wait_request(req);
 }
 
-static int
-intel_ring_alloc_request(struct intel_engine_cs *ring)
+int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
-	int ret;
-	struct drm_i915_gem_request *request;
-	struct drm_i915_private *dev_private = ring->dev->dev_private;
-
-	if (ring->outstanding_lazy_request)
-		return 0;
-
-	request = kzalloc(sizeof(*request), GFP_KERNEL);
-	if (request == NULL)
-		return -ENOMEM;
-
-	kref_init(&request->ref);
-	request->ring = ring;
-	request->ringbuf = ring->buffer;
-	request->uniq = dev_private->request_uniq++;
-
-	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
-	if (ret) {
-		kfree(request);
-		return ret;
-	}
-
-	ring->outstanding_lazy_request = request;
+	request->ringbuf = request->ring->buffer;
 	return 0;
 }
 
@@ -2247,7 +2196,7 @@
 		return ret;
 
 	/* Preallocate the olr before touching the ring */
-	ret = intel_ring_alloc_request(ring);
+	ret = i915_gem_request_alloc(ring, ring->default_context);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..39f6dfc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -2,6 +2,7 @@
 #define _INTEL_RINGBUFFER_H_
 
 #include <linux/hashtable.h>
+#include "i915_gem_batch_pool.h"
 
 #define I915_CMD_HASH_ORDER 9
 
@@ -133,6 +134,13 @@
 	struct		drm_device *dev;
 	struct intel_ringbuffer *buffer;
 
+	/*
+	 * A pool of objects to use as shadow copies of client batch buffers
+	 * when the command parser is enabled. Prevents the client from
+	 * modifying the batch contents after software parsing.
+	 */
+	struct i915_gem_batch_pool batch_pool;
+
 	struct intel_hw_status_page status_page;
 
 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
@@ -390,6 +398,8 @@
 void intel_stop_ring_buffer(struct intel_engine_cs *ring);
 void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 
+int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
+
 int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
 int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
 static inline void intel_ring_emit(struct intel_engine_cs *ring,
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index ce00e69..8fe2fde 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -319,6 +319,104 @@
 	SKL_DISPLAY_MISC_IO_POWER_DOMAINS)) |		\
 	BIT(POWER_DOMAIN_INIT))
 
+#define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
+	BIT(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT(POWER_DOMAIN_PIPE_B) |			\
+	BIT(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT(POWER_DOMAIN_PIPE_C) |			\
+	BIT(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
+	BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
+	BIT(POWER_DOMAIN_AUX_B) |			\
+	BIT(POWER_DOMAIN_AUX_C) |			\
+	BIT(POWER_DOMAIN_AUDIO) |			\
+	BIT(POWER_DOMAIN_VGA) |				\
+	BIT(POWER_DOMAIN_INIT))
+#define BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS (		\
+	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT(POWER_DOMAIN_PIPE_A) |			\
+	BIT(POWER_DOMAIN_TRANSCODER_EDP) |		\
+	BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
+	BIT(POWER_DOMAIN_AUX_A) |			\
+	BIT(POWER_DOMAIN_PLLS) |			\
+	BIT(POWER_DOMAIN_INIT))
+#define BXT_DISPLAY_ALWAYS_ON_POWER_DOMAINS (		\
+	(POWER_DOMAIN_MASK & ~(BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS |	\
+	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS)) |	\
+	BIT(POWER_DOMAIN_INIT))
+
+static void assert_can_enable_dc9(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	WARN(!IS_BROXTON(dev), "Platform doesn't support DC9.\n");
+	WARN((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC9),
+		"DC9 already programmed to be enabled.\n");
+	WARN(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5,
+		"DC5 still not disabled to enable DC9.\n");
+	WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on.\n");
+	WARN(intel_irqs_enabled(dev_priv), "Interrupts not disabled yet.\n");
+
+	 /*
+	  * TODO: check for the following to verify the conditions to enter DC9
+	  * state are satisfied:
+	  * 1] Check relevant display engine registers to verify if mode set
+	  * disable sequence was followed.
+	  * 2] Check if display uninitialize sequence is initialized.
+	  */
+}
+
+static void assert_can_disable_dc9(struct drm_i915_private *dev_priv)
+{
+	WARN(intel_irqs_enabled(dev_priv), "Interrupts not disabled yet.\n");
+	WARN(!(I915_READ(DC_STATE_EN) & DC_STATE_EN_DC9),
+		"DC9 already programmed to be disabled.\n");
+	WARN(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5,
+		"DC5 still not disabled.\n");
+
+	 /*
+	  * TODO: check for the following to verify DC9 state was indeed
+	  * entered before programming to disable it:
+	  * 1] Check relevant display engine registers to verify if mode
+	  *  set disable sequence was followed.
+	  * 2] Check if display uninitialize sequence is initialized.
+	  */
+}
+
+void bxt_enable_dc9(struct drm_i915_private *dev_priv)
+{
+	uint32_t val;
+
+	assert_can_enable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Enabling DC9\n");
+
+	val = I915_READ(DC_STATE_EN);
+	val |= DC_STATE_EN_DC9;
+	I915_WRITE(DC_STATE_EN, val);
+	POSTING_READ(DC_STATE_EN);
+}
+
+void bxt_disable_dc9(struct drm_i915_private *dev_priv)
+{
+	uint32_t val;
+
+	assert_can_disable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Disabling DC9\n");
+
+	val = I915_READ(DC_STATE_EN);
+	val &= ~DC_STATE_EN_DC9;
+	I915_WRITE(DC_STATE_EN, val);
+	POSTING_READ(DC_STATE_EN);
+}
+
 static void skl_set_power_well(struct drm_i915_private *dev_priv,
 			struct i915_power_well *power_well, bool enable)
 {
@@ -1313,6 +1411,27 @@
 	},
 };
 
+static struct i915_power_well bxt_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = 1,
+		.domains = BXT_DISPLAY_ALWAYS_ON_POWER_DOMAINS,
+		.ops = &i9xx_always_on_power_well_ops,
+	},
+	{
+		.name = "power well 1",
+		.domains = BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS,
+		.ops = &skl_power_well_ops,
+		.data = SKL_DISP_PW_1,
+	},
+	{
+		.name = "power well 2",
+		.domains = BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS,
+		.ops = &skl_power_well_ops,
+		.data = SKL_DISP_PW_2,
+	}
+};
+
 #define set_power_wells(power_domains, __power_wells) ({		\
 	(power_domains)->power_wells = (__power_wells);			\
 	(power_domains)->power_well_count = ARRAY_SIZE(__power_wells);	\
@@ -1341,6 +1460,8 @@
 		set_power_wells(power_domains, bdw_power_wells);
 	} else if (IS_SKYLAKE(dev_priv->dev)) {
 		set_power_wells(power_domains, skl_power_wells);
+	} else if (IS_BROXTON(dev_priv->dev)) {
+		set_power_wells(power_domains, bxt_power_wells);
 	} else if (IS_CHERRYVIEW(dev_priv->dev)) {
 		set_power_wells(power_domains, chv_power_wells);
 	} else if (IS_VALLEYVIEW(dev_priv->dev)) {
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index e87d2f4..10cd332 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2291,10 +2291,11 @@
 	else
 		mapping = &dev_priv->sdvo_mappings[1];
 
-	if (mapping->initialized && intel_gmbus_is_port_valid(mapping->i2c_pin))
+	if (mapping->initialized &&
+	    intel_gmbus_is_valid_pin(dev_priv, mapping->i2c_pin))
 		pin = mapping->i2c_pin;
 	else
-		pin = GMBUS_PORT_DPB;
+		pin = GMBUS_PIN_DPB;
 
 	sdvo->i2c = intel_gmbus_get_adapter(dev_priv, pin);
 
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index a4c0a04..e3d41c0 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -190,10 +190,13 @@
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	const int pipe = intel_plane->pipe;
 	const int plane = intel_plane->plane + 1;
-	u32 plane_ctl, stride_div;
+	u32 plane_ctl, stride_div, stride;
 	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
 	const struct drm_intel_sprite_colorkey *key = &intel_plane->ckey;
 	unsigned long surf_addr;
+	u32 tile_height, plane_offset, plane_size;
+	unsigned int rotation;
+	int x_offset, y_offset;
 
 	plane_ctl = PLANE_CTL_ENABLE |
 		PLANE_CTL_PIPE_CSC_ENABLE;
@@ -254,8 +257,20 @@
 		MISSING_CASE(fb->modifier[0]);
 	}
 
-	if (drm_plane->state->rotation == BIT(DRM_ROTATE_180))
+	rotation = drm_plane->state->rotation;
+	switch (rotation) {
+	case BIT(DRM_ROTATE_90):
+		plane_ctl |= PLANE_CTL_ROTATE_90;
+		break;
+
+	case BIT(DRM_ROTATE_180):
 		plane_ctl |= PLANE_CTL_ROTATE_180;
+		break;
+
+	case BIT(DRM_ROTATE_270):
+		plane_ctl |= PLANE_CTL_ROTATE_270;
+		break;
+	}
 
 	intel_update_sprite_watermarks(drm_plane, crtc, src_w, src_h,
 				       pixel_size, true,
@@ -283,10 +298,26 @@
 
 	surf_addr = intel_plane_obj_offset(intel_plane, obj);
 
-	I915_WRITE(PLANE_OFFSET(pipe, plane), (y << 16) | x);
-	I915_WRITE(PLANE_STRIDE(pipe, plane), fb->pitches[0] / stride_div);
+	if (intel_rotation_90_or_270(rotation)) {
+		/* stride: Surface height in tiles */
+		tile_height = intel_tile_height(dev, fb->bits_per_pixel,
+							fb->modifier[0]);
+		stride = DIV_ROUND_UP(fb->height, tile_height);
+		plane_size = (src_w << 16) | src_h;
+		x_offset = stride * tile_height - y - (src_h + 1);
+		y_offset = x;
+	} else {
+		stride = fb->pitches[0] / stride_div;
+		plane_size = (src_h << 16) | src_w;
+		x_offset = x;
+		y_offset = y;
+	}
+	plane_offset = y_offset << 16 | x_offset;
+
+	I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset);
+	I915_WRITE(PLANE_STRIDE(pipe, plane), stride);
 	I915_WRITE(PLANE_POS(pipe, plane), (crtc_y << 16) | crtc_x);
-	I915_WRITE(PLANE_SIZE(pipe, plane), (crtc_h << 16) | crtc_w);
+	I915_WRITE(PLANE_SIZE(pipe, plane), plane_size);
 	I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl);
 	I915_WRITE(PLANE_SURF(pipe, plane), surf_addr);
 	POSTING_READ(PLANE_SURF(pipe, plane));
@@ -1006,10 +1037,10 @@
 	}
 
 	if (state->visible) {
-		src->x1 = src_x;
-		src->x2 = src_x + src_w;
-		src->y1 = src_y;
-		src->y2 = src_y + src_h;
+		src->x1 = src_x << 16;
+		src->x2 = (src_x + src_w) << 16;
+		src->y1 = src_y << 16;
+		src->y2 = (src_y + src_h) << 16;
 	}
 
 	dst->x1 = crtc_x;
@@ -1081,10 +1112,10 @@
 			crtc_y = state->dst.y1;
 			crtc_w = drm_rect_width(&state->dst);
 			crtc_h = drm_rect_height(&state->dst);
-			src_x = state->src.x1;
-			src_y = state->src.y1;
-			src_w = drm_rect_width(&state->src);
-			src_h = drm_rect_height(&state->src);
+			src_x = state->src.x1 >> 16;
+			src_y = state->src.y1 >> 16;
+			src_w = drm_rect_width(&state->src) >> 16;
+			src_h = drm_rect_height(&state->src) >> 16;
 			intel_plane->update_plane(plane, crtc, fb,
 						  crtc_x, crtc_y, crtc_w, crtc_h,
 						  src_x, src_y, src_w, src_h);
@@ -1139,11 +1170,11 @@
 	if (!plane->crtc || !plane->state->fb)
 		return 0;
 
-	return plane->funcs->update_plane(plane, plane->crtc, plane->state->fb,
-				  plane->state->crtc_x, plane->state->crtc_y,
-				  plane->state->crtc_w, plane->state->crtc_h,
-				  plane->state->src_x, plane->state->src_y,
-				  plane->state->src_w, plane->state->src_h);
+	return drm_plane_helper_update(plane, plane->crtc, plane->state->fb,
+				       plane->state->crtc_x, plane->state->crtc_y,
+				       plane->state->crtc_w, plane->state->crtc_h,
+				       plane->state->src_x, plane->state->src_y,
+				       plane->state->src_w, plane->state->src_h);
 }
 
 static uint32_t ilk_plane_formats[] = {
@@ -1263,6 +1294,7 @@
 		intel_plane->max_downscale = 1;
 		intel_plane->update_plane = skl_update_plane;
 		intel_plane->disable_plane = skl_disable_plane;
+		state->scaler_id = -1;
 
 		plane_formats = skl_plane_formats;
 		num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1276,6 +1308,7 @@
 	intel_plane->plane = plane;
 	intel_plane->check_plane = intel_check_sprite_plane;
 	intel_plane->commit_plane = intel_commit_sprite_plane;
+	intel_plane->ckey.flags = I915_SET_COLORKEY_NONE;
 	possible_crtcs = (1 << pipe);
 	ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs,
 				       &intel_plane_funcs,
@@ -1286,16 +1319,7 @@
 		goto out;
 	}
 
-	if (!dev->mode_config.rotation_property)
-		dev->mode_config.rotation_property =
-			drm_mode_create_rotation_property(dev,
-							  BIT(DRM_ROTATE_0) |
-							  BIT(DRM_ROTATE_180));
-
-	if (dev->mode_config.rotation_property)
-		drm_object_attach_property(&intel_plane->base.base,
-					   dev->mode_config.rotation_property,
-					   state->base.rotation);
+	intel_create_rotation_property(dev, intel_plane);
 
 	drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs);
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index ff2a746..a6d8a3e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -383,6 +383,26 @@
 	intel_disable_gt_powersave(dev);
 }
 
+static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
+					 enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *domain;
+	enum forcewake_domain_id id;
+
+	if (!dev_priv->uncore.funcs.force_wake_get)
+		return;
+
+	fw_domains &= dev_priv->uncore.fw_domains;
+
+	for_each_fw_domain_mask(domain, fw_domains, dev_priv, id) {
+		if (domain->wake_count++)
+			fw_domains &= ~(1 << id);
+	}
+
+	if (fw_domains)
+		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
+}
+
 /**
  * intel_uncore_forcewake_get - grab forcewake domain references
  * @dev_priv: i915 device instance
@@ -400,27 +420,57 @@
 				enum forcewake_domains fw_domains)
 {
 	unsigned long irqflags;
-	struct intel_uncore_forcewake_domain *domain;
-	enum forcewake_domain_id id;
 
 	if (!dev_priv->uncore.funcs.force_wake_get)
 		return;
 
 	WARN_ON(dev_priv->pm.suspended);
 
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	__intel_uncore_forcewake_get(dev_priv, fw_domains);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+/**
+ * intel_uncore_forcewake_get__locked - grab forcewake domain references
+ * @dev_priv: i915 device instance
+ * @fw_domains: forcewake domains to get reference on
+ *
+ * See intel_uncore_forcewake_get(). This variant places the onus
+ * on the caller to explicitly handle the dev_priv->uncore.lock spinlock.
+ */
+void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains fw_domains)
+{
+	assert_spin_locked(&dev_priv->uncore.lock);
+
+	if (!dev_priv->uncore.funcs.force_wake_get)
+		return;
+
+	__intel_uncore_forcewake_get(dev_priv, fw_domains);
+}
+
+static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
+					 enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *domain;
+	enum forcewake_domain_id id;
+
+	if (!dev_priv->uncore.funcs.force_wake_put)
+		return;
+
 	fw_domains &= dev_priv->uncore.fw_domains;
 
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
 	for_each_fw_domain_mask(domain, fw_domains, dev_priv, id) {
-		if (domain->wake_count++)
-			fw_domains &= ~(1 << id);
+		if (WARN_ON(domain->wake_count == 0))
+			continue;
+
+		if (--domain->wake_count)
+			continue;
+
+		domain->wake_count++;
+		fw_domain_arm_timer(domain);
 	}
-
-	if (fw_domains)
-		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
 /**
@@ -435,30 +485,34 @@
 				enum forcewake_domains fw_domains)
 {
 	unsigned long irqflags;
-	struct intel_uncore_forcewake_domain *domain;
-	enum forcewake_domain_id id;
 
 	if (!dev_priv->uncore.funcs.force_wake_put)
 		return;
 
-	fw_domains &= dev_priv->uncore.fw_domains;
-
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
-	for_each_fw_domain_mask(domain, fw_domains, dev_priv, id) {
-		if (WARN_ON(domain->wake_count == 0))
-			continue;
-
-		if (--domain->wake_count)
-			continue;
-
-		domain->wake_count++;
-		fw_domain_arm_timer(domain);
-	}
-
+	__intel_uncore_forcewake_put(dev_priv, fw_domains);
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+/**
+ * intel_uncore_forcewake_put__locked - grab forcewake domain references
+ * @dev_priv: i915 device instance
+ * @fw_domains: forcewake domains to get reference on
+ *
+ * See intel_uncore_forcewake_put(). This variant places the onus
+ * on the caller to explicitly handle the dev_priv->uncore.lock spinlock.
+ */
+void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
+					enum forcewake_domains fw_domains)
+{
+	assert_spin_locked(&dev_priv->uncore.lock);
+
+	if (!dev_priv->uncore.funcs.force_wake_put)
+		return;
+
+	__intel_uncore_forcewake_put(dev_priv, fw_domains);
+}
+
 void assert_forcewakes_inactive(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore_forcewake_domain *domain;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index ca71c03..db9a30f 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -1263,6 +1263,7 @@
 			  bool is_primary);
 extern void drm_plane_cleanup(struct drm_plane *plane);
 extern unsigned int drm_plane_index(struct drm_plane *plane);
+extern struct drm_plane * drm_plane_from_index(struct drm_device *dev, int idx);
 extern void drm_plane_force_disable(struct drm_plane *plane);
 extern int drm_plane_check_pixel_format(const struct drm_plane *plane,
 					u32 format);
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 6133723..bd0d644 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -287,4 +287,10 @@
 	INTEL_SKL_GT3_IDS(info)
 
 
+#define INTEL_BXT_IDS(info) \
+	INTEL_VGA_DEVICE(0x0A84, info), \
+	INTEL_VGA_DEVICE(0x0A85, info), \
+	INTEL_VGA_DEVICE(0x0A86, info), \
+	INTEL_VGA_DEVICE(0x0A87, info)
+
 #endif /* _I915_PCIIDS_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..4851d66 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -996,6 +996,7 @@
 /* flags */
 #define I915_OVERLAY_UPDATE_ATTRS	(1<<0)
 #define I915_OVERLAY_UPDATE_GAMMA	(1<<1)
+#define I915_OVERLAY_DISABLE_DEST_COLORKEY	(1<<2)
 struct drm_intel_overlay_attrs {
 	__u32 flags;
 	__u32 color_key;