Merge tag 'drm-intel-next-2016-06-20' of git://anongit.freedesktop.org/drm-intel into drm-next

- Infrastructure for GVT-g (paravirtualized gpu on gen8+), from Zhi Wang
- another attemp at nonblocking atomic plane updates
- bugfixes and refactoring for GuC doorbell code (Dave Gordon)
- GuC command submission enabled by default, if fw available (Dave Gordon)
- more bxt w/a (Arun Siluvery)
- bxt phy improvements (Imre Deak)
- prep work for stolen objects support (Ankitprasa Sharma & Chris Wilson)
- skl/bkl w/a update from Mika Kuoppala
- bunch of small improvements and fixes all over, as usual

* tag 'drm-intel-next-2016-06-20' of git://anongit.freedesktop.org/drm-intel: (81 commits)
  drm/i915: Update DRIVER_DATE to 20160620
  drm/i915: Introduce GVT context creation API
  drm/i915: Support LRC context single submission
  drm/i915: Introduce execlist context status change notification
  drm/i915: Make addressing mode bits in context descriptor configurable
  drm/i915: Make ring buffer size of a LRC context configurable
  drm/i915: gvt: Introduce the basic architecture of GVT-g
  drm/i915: Fold vGPU active check into inner functions
  drm/i915: Use offsetof() to calculate the offset of members in PVINFO page
  drm/i915: Factor out i915_pvinfo.h
  drm/i915: Serialise presentation with imported dmabufs
  drm/i915: Use atomic commits for legacy page_flips
  drm/i915: Move fb_bits updating later in atomic_commit
  drm/i915: nonblocking commit
  Reapply "drm/i915: Pass atomic states to fbc update, functions."
  drm/i915: Roll out the helper nonblock tracking
  drm/i915: Signal drm events for atomic
  drm/i915/ilk: Don't disable SSC source if it's in use
  drm/i915/guc: (re)initialise doorbell h/w when enabling GuC submission
  drm/i915/guc: replace assign_doorbell() with select_doorbell_register()
  ...
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index aef87fd..4431129 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -840,6 +840,14 @@
 	return false;
 }
 
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags)
+{
+	intel_private.driver->write_entry(addr, pg, flags);
+}
+EXPORT_SYMBOL(intel_gtt_insert_page);
+
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags)
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 29a32b1..7769e46 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -57,6 +57,28 @@
 
 	  If in doubt, say "Y".
 
+config DRM_I915_GVT
+        bool "Enable Intel GVT-g graphics virtualization host support"
+        depends on DRM_I915
+        default n
+        help
+	  Choose this option if you want to enable Intel GVT-g graphics
+	  virtualization technology host support with integrated graphics.
+	  With GVT-g, it's possible to have one integrated graphics
+	  device shared by multiple VMs under different hypervisors.
+
+	  Note that at least one hypervisor like Xen or KVM is required for
+	  this driver to work, and it only supports newer device from
+	  Broadwell+. For further information and setup guide, you can
+	  visit: http://01.org/igvt-g.
+
+	  Now it's just a stub to support the modifications of i915 for
+	  GVT device model. It requires at least one MPT modules for Xen/KVM
+	  and other components of GVT device model to work. Use it under
+	  you own risk.
+
+	  If in doubt, say "N".
+
 menu "drm/i915 Debugging"
 depends on DRM_I915
 depends on EXPERT
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 7e29444..276abf1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -104,6 +104,11 @@
 # legacy horrors
 i915-y += i915_dma.o
 
+ifeq ($(CONFIG_DRM_I915_GVT),y)
+i915-y += intel_gvt.o
+include $(src)/gvt/Makefile
+endif
+
 obj-$(CONFIG_DRM_I915)  += i915.o
 
 CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile
new file mode 100644
index 0000000..d0f21a6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/Makefile
@@ -0,0 +1,5 @@
+GVT_DIR := gvt
+GVT_SOURCE := gvt.o
+
+ccflags-y                      += -I$(src) -I$(src)/$(GVT_DIR) -Wall
+i915-y			       += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h
new file mode 100644
index 0000000..7ef412b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __GVT_DEBUG_H__
+#define __GVT_DEBUG_H__
+
+#define gvt_dbg_core(fmt, args...) \
+	DRM_DEBUG_DRIVER("gvt: core: "fmt, ##args)
+
+/*
+ * Other GVT debug stuff will be introduced in the GVT device model patches.
+ */
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
new file mode 100644
index 0000000..927f457
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <xen/xen.h>
+
+#include "i915_drv.h"
+
+struct intel_gvt_host intel_gvt_host;
+
+static const char * const supported_hypervisors[] = {
+	[INTEL_GVT_HYPERVISOR_XEN] = "XEN",
+	[INTEL_GVT_HYPERVISOR_KVM] = "KVM",
+};
+
+/**
+ * intel_gvt_init_host - Load MPT modules and detect if we're running in host
+ * @gvt: intel gvt device
+ *
+ * This function is called at the driver loading stage. If failed to find a
+ * loadable MPT module or detect currently we're running in a VM, then GVT-g
+ * will be disabled
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_gvt_init_host(void)
+{
+	if (intel_gvt_host.initialized)
+		return 0;
+
+	/* Xen DOM U */
+	if (xen_domain() && !xen_initial_domain())
+		return -ENODEV;
+
+	/* Try to load MPT modules for hypervisors */
+	if (xen_initial_domain()) {
+		/* In Xen dom0 */
+		intel_gvt_host.mpt = try_then_request_module(
+				symbol_get(xengt_mpt), "xengt");
+		intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_XEN;
+	} else {
+		/* not in Xen. Try KVMGT */
+		intel_gvt_host.mpt = try_then_request_module(
+				symbol_get(kvmgt_mpt), "kvm");
+		intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_KVM;
+	}
+
+	/* Fail to load MPT modules - bail out */
+	if (!intel_gvt_host.mpt)
+		return -EINVAL;
+
+	/* Try to detect if we're running in host instead of VM. */
+	if (!intel_gvt_hypervisor_detect_host())
+		return -ENODEV;
+
+	gvt_dbg_core("Running with hypervisor %s in host mode\n",
+			supported_hypervisors[intel_gvt_host.hypervisor_type]);
+
+	intel_gvt_host.initialized = true;
+	return 0;
+}
+
+static void init_device_info(struct intel_gvt *gvt)
+{
+	if (IS_BROADWELL(gvt->dev_priv))
+		gvt->device_info.max_support_vgpus = 8;
+	/* This function will grow large in GVT device model patches. */
+}
+
+/**
+ * intel_gvt_clean_device - clean a GVT device
+ * @gvt: intel gvt device
+ *
+ * This function is called at the driver unloading stage, to free the
+ * resources owned by a GVT device.
+ *
+ */
+void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
+{
+	struct intel_gvt *gvt = &dev_priv->gvt;
+
+	if (WARN_ON(!gvt->initialized))
+		return;
+
+	/* Other de-initialization of GVT components will be introduced. */
+
+	gvt->initialized = false;
+}
+
+/**
+ * intel_gvt_init_device - initialize a GVT device
+ * @dev_priv: drm i915 private data
+ *
+ * This function is called at the initialization stage, to initialize
+ * necessary GVT components.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_gvt_init_device(struct drm_i915_private *dev_priv)
+{
+	struct intel_gvt *gvt = &dev_priv->gvt;
+	/*
+	 * Cannot initialize GVT device without intel_gvt_host gets
+	 * initialized first.
+	 */
+	if (WARN_ON(!intel_gvt_host.initialized))
+		return -EINVAL;
+
+	if (WARN_ON(gvt->initialized))
+		return -EEXIST;
+
+	gvt_dbg_core("init gvt device\n");
+
+	init_device_info(gvt);
+	/*
+	 * Other initialization of GVT components will be introduce here.
+	 */
+	gvt_dbg_core("gvt device creation is done\n");
+	gvt->initialized = true;
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
new file mode 100644
index 0000000..fb619a6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _GVT_H_
+#define _GVT_H_
+
+#include "debug.h"
+#include "hypercall.h"
+
+#define GVT_MAX_VGPU 8
+
+enum {
+	INTEL_GVT_HYPERVISOR_XEN = 0,
+	INTEL_GVT_HYPERVISOR_KVM,
+};
+
+struct intel_gvt_host {
+	bool initialized;
+	int hypervisor_type;
+	struct intel_gvt_mpt *mpt;
+};
+
+extern struct intel_gvt_host intel_gvt_host;
+
+/* Describe per-platform limitations. */
+struct intel_gvt_device_info {
+	u32 max_support_vgpus;
+	/* This data structure will grow bigger in GVT device model patches */
+};
+
+struct intel_vgpu {
+	struct intel_gvt *gvt;
+	int id;
+	unsigned long handle; /* vGPU handle used by hypervisor MPT modules */
+};
+
+struct intel_gvt {
+	struct mutex lock;
+	bool initialized;
+
+	struct drm_i915_private *dev_priv;
+	struct idr vgpu_idr;	/* vGPU IDR pool */
+
+	struct intel_gvt_device_info device_info;
+};
+
+#include "mpt.h"
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
new file mode 100644
index 0000000..254df8b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _GVT_HYPERCALL_H_
+#define _GVT_HYPERCALL_H_
+
+/*
+ * Specific GVT-g MPT modules function collections. Currently GVT-g supports
+ * both Xen and KVM by providing dedicated hypervisor-related MPT modules.
+ */
+struct intel_gvt_mpt {
+	int (*detect_host)(void);
+};
+
+extern struct intel_gvt_mpt xengt_mpt;
+extern struct intel_gvt_mpt kvmgt_mpt;
+
+#endif /* _GVT_HYPERCALL_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
new file mode 100644
index 0000000..03601e3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _GVT_MPT_H_
+#define _GVT_MPT_H_
+
+/**
+ * DOC: Hypervisor Service APIs for GVT-g Core Logic
+ *
+ * This is the glue layer between specific hypervisor MPT modules and GVT-g core
+ * logic. Each kind of hypervisor MPT module provides a collection of function
+ * callbacks and will be attached to GVT host when the driver is loading.
+ * GVT-g core logic will call these APIs to request specific services from
+ * hypervisor.
+ */
+
+/**
+ * intel_gvt_hypervisor_detect_host - check if GVT-g is running within
+ * hypervisor host/privilged domain
+ *
+ * Returns:
+ * Zero on success, -ENODEV if current kernel is running inside a VM
+ */
+static inline int intel_gvt_hypervisor_detect_host(void)
+{
+	return intel_gvt_host.mpt->detect_host();
+}
+
+#endif /* _GVT_MPT_H_ */
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index d97f28b..b0fd6a7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -737,7 +737,7 @@
 
 /**
  * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer
- * @ring: the ringbuffer to initialize
+ * @engine: the engine to initialize
  *
  * Optionally initializes fields related to batch buffer command parsing in the
  * struct intel_engine_cs based on whether the platform requires software
@@ -830,7 +830,7 @@
 
 /**
  * i915_cmd_parser_fini_ring() - clean up cmd parser related fields
- * @ring: the ringbuffer to clean up
+ * @engine: the engine to clean up
  *
  * Releases any resources related to command parsing that may have been
  * initialized for the specified ring.
@@ -1024,7 +1024,7 @@
 
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
- * @ring: the ring in question
+ * @engine: the engine in question
  *
  * Only certain platforms require software batch buffer command parsing, and
  * only when enabled via module parameter.
@@ -1176,7 +1176,7 @@
 
 /**
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
- * @ring: the ring on which the batch is to execute
+ * @engine: the engine on which the batch is to execute
  * @batch_obj: the batch buffer in question
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
@@ -1281,6 +1281,7 @@
 
 /**
  * i915_cmd_parser_get_version() - get the cmd parser version number
+ * @dev_priv: i915 device private
  *
  * The cmd parser maintains a simple increasing integer version number suitable
  * for passing to userspace clients to determine what operations are permitted.
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 614ac08..5b75266 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2574,6 +2574,10 @@
 
 	mutex_unlock(&dev->struct_mutex);
 
+	seq_printf(m, "Doorbell map:\n");
+	seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc.doorbell_bitmap);
+	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc.db_cacheline);
+
 	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
 	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
 	seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd);
@@ -5306,6 +5310,10 @@
 		   INTEL_INFO(dev)->eu_total);
 	seq_printf(m, "  Available EU Per Subslice: %u\n",
 		   INTEL_INFO(dev)->eu_per_subslice);
+	seq_printf(m, "  Has Pooled EU: %s\n", yesno(HAS_POOLED_EU(dev)));
+	if (HAS_POOLED_EU(dev))
+		seq_printf(m, "  Min EU in pool: %u\n",
+			   INTEL_INFO(dev)->min_eu_in_pool);
 	seq_printf(m, "  Has Slice Power Gating: %s\n",
 		   yesno(INTEL_INFO(dev)->has_slice_pg));
 	seq_printf(m, "  Has Subslice Power Gating: %s\n",
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 07edaed..d15a461f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -764,6 +764,32 @@
 			       (info->slice_total > 1));
 	info->has_subslice_pg = (IS_BROXTON(dev) && (info->subslice_total > 1));
 	info->has_eu_pg = (info->eu_per_subslice > 2);
+
+	if (IS_BROXTON(dev)) {
+#define IS_SS_DISABLED(_ss_disable, ss)    (_ss_disable & (0x1 << ss))
+		/*
+		 * There is a HW issue in 2x6 fused down parts that requires
+		 * Pooled EU to be enabled as a WA. The pool configuration
+		 * changes depending upon which subslice is fused down. This
+		 * doesn't affect if the device has all 3 subslices enabled.
+		 */
+		/* WaEnablePooledEuFor2x6:bxt */
+		info->has_pooled_eu = ((info->subslice_per_slice == 3) ||
+				       (info->subslice_per_slice == 2 &&
+					INTEL_REVID(dev) < BXT_REVID_C0));
+
+		info->min_eu_in_pool = 0;
+		if (info->has_pooled_eu) {
+			if (IS_SS_DISABLED(ss_disable, 0) ||
+			    IS_SS_DISABLED(ss_disable, 2))
+				info->min_eu_in_pool = 3;
+			else if (IS_SS_DISABLED(ss_disable, 1))
+				info->min_eu_in_pool = 6;
+			else
+				info->min_eu_in_pool = 9;
+		}
+#undef IS_SS_DISABLED
+	}
 }
 
 static void broadwell_sseu_info_init(struct drm_device *dev)
@@ -962,6 +988,9 @@
 	DRM_DEBUG_DRIVER("subslice per slice: %u\n", info->subslice_per_slice);
 	DRM_DEBUG_DRIVER("EU total: %u\n", info->eu_total);
 	DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->eu_per_subslice);
+	DRM_DEBUG_DRIVER("Has Pooled EU: %s\n", HAS_POOLED_EU(dev) ? "y" : "n");
+	if (HAS_POOLED_EU(dev))
+		DRM_DEBUG_DRIVER("Min EU in pool: %u\n", info->min_eu_in_pool);
 	DRM_DEBUG_DRIVER("has slice power gating: %s\n",
 			 info->has_slice_pg ? "y" : "n");
 	DRM_DEBUG_DRIVER("has subslice power gating: %s\n",
@@ -1091,6 +1120,10 @@
 	if (ret < 0)
 		return ret;
 
+	ret = intel_gvt_init(dev_priv);
+	if (ret < 0)
+		goto err_workqueues;
+
 	/* This must be called before any calls to HAS_PCH_* */
 	intel_detect_pch(dev);
 
@@ -1116,6 +1149,10 @@
 			 "It may not be fully functional.\n");
 
 	return 0;
+
+err_workqueues:
+	i915_workqueues_cleanup(dev_priv);
+	return ret;
 }
 
 /**
@@ -1487,6 +1524,8 @@
 
 	intel_fbdev_fini(dev);
 
+	intel_gvt_cleanup(dev_priv);
+
 	ret = i915_gem_suspend(dev);
 	if (ret) {
 		DRM_ERROR("failed to idle hardware: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 872c6060..3eb47fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -355,6 +355,7 @@
 	.has_ddi = 1,
 	.has_fpga_dbg = 1,
 	.has_fbc = 1,
+	.has_pooled_eu = 0,
 	GEN_DEFAULT_PIPEOFFSETS,
 	IVB_CURSOR_OFFSETS,
 	BDW_COLORS,
@@ -517,8 +518,10 @@
 			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
 				   (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) ||
 				   ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
-				    pch->subsystem_vendor == 0x1af4 &&
-				    pch->subsystem_device == 0x1100)) {
+				    pch->subsystem_vendor ==
+					    PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
+				    pch->subsystem_device ==
+					    PCI_SUBDEVICE_ID_QEMU)) {
 				dev_priv->pch_type = intel_virt_detect_pch(dev);
 			} else
 				continue;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee33865..24a86c6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -62,12 +62,14 @@
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
 
+#include "intel_gvt.h"
+
 /* General customization:
  */
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20160606"
+#define DRIVER_DATE		"20160620"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -762,7 +764,8 @@
 	func(has_llc) sep \
 	func(has_snoop) sep \
 	func(has_ddi) sep \
-	func(has_fpga_dbg)
+	func(has_fpga_dbg) sep \
+	func(has_pooled_eu)
 
 #define DEFINE_FLAG(name) u8 name:1
 #define SEP_SEMICOLON ;
@@ -788,6 +791,7 @@
 	u8 subslice_per_slice;
 	u8 eu_total;
 	u8 eu_per_subslice;
+	u8 min_eu_in_pool;
 	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
 	u8 subslice_7eu[3];
 	u8 has_slice_pg:1;
@@ -877,6 +881,10 @@
 		int pin_count;
 		bool initialised;
 	} engine[I915_NUM_ENGINES];
+	u32 ring_size;
+	u32 desc_template;
+	struct atomic_notifier_head status_notifier;
+	bool execlists_force_single_submission;
 
 	struct list_head link;
 
@@ -1740,6 +1748,8 @@
 
 	struct i915_virtual_gpu vgpu;
 
+	struct intel_gvt gvt;
+
 	struct intel_guc guc;
 
 	struct intel_csr csr;
@@ -2718,6 +2728,15 @@
 
 #define IS_BXT_REVID(p, since, until) (IS_BROXTON(p) && IS_REVID(p, since, until))
 
+#define KBL_REVID_A0		0x0
+#define KBL_REVID_B0		0x1
+#define KBL_REVID_C0		0x2
+#define KBL_REVID_D0		0x3
+#define KBL_REVID_E0		0x4
+
+#define IS_KBL_REVID(p, since, until) \
+	(IS_KABYLAKE(p) && IS_REVID(p, since, until))
+
 /*
  * The genX designation typically refers to the render engine, so render
  * capability related checks should use IS_GEN, while display and other checks
@@ -2824,6 +2843,8 @@
 				 !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && \
 				 !IS_BROXTON(dev))
 
+#define HAS_POOLED_EU(dev)	(INTEL_INFO(dev)->has_pooled_eu)
+
 #define INTEL_PCH_DEVICE_ID_MASK		0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE		0x3b00
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE		0x1c00
@@ -2941,6 +2962,12 @@
 u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
 
 void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
+
+static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
+{
+	return dev_priv->gvt.initialized;
+}
+
 static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv)
 {
 	return dev_priv->vgpu.active;
@@ -3110,6 +3137,23 @@
 struct page *
 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
 
+static inline dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
+{
+	if (n < obj->get_page.last) {
+		obj->get_page.sg = obj->pages->sgl;
+		obj->get_page.last = 0;
+	}
+
+	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
+		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
+		if (unlikely(sg_is_chain(obj->get_page.sg)))
+			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
+	}
+
+	return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
+}
+
 static inline struct page *
 i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
 {
@@ -3432,6 +3476,8 @@
 void i915_gem_context_free(struct kref *ctx_ref);
 struct drm_i915_gem_object *
 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size);
+struct i915_gem_context *
+i915_gem_context_create_gvt(struct drm_device *dev);
 
 static inline struct i915_gem_context *
 i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
@@ -3620,6 +3666,7 @@
 bool intel_bios_is_valid_vbt(const void *buf, size_t size);
 bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
 bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
+bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port);
 bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
 bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port);
 bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 343d881..21d0dea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -54,12 +54,33 @@
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
+	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return false;
+
 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
 		return true;
 
 	return obj->pin_display;
 }
 
+static int
+insert_mappable_node(struct drm_i915_private *i915,
+                     struct drm_mm_node *node, u32 size)
+{
+	memset(node, 0, sizeof(*node));
+	return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
+						   size, 0, 0, 0,
+						   i915->ggtt.mappable_end,
+						   DRM_MM_SEARCH_DEFAULT,
+						   DRM_MM_CREATE_DEFAULT);
+}
+
+static void
+remove_mappable_node(struct drm_mm_node *node)
+{
+	drm_mm_remove_node(node);
+}
+
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
 				  size_t size)
@@ -409,6 +430,9 @@
 
 /**
  * Creates a new mm object and returns a handle to it.
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
  */
 int
 i915_gem_create_ioctl(struct drm_device *dev, void *data,
@@ -585,6 +609,142 @@
 	return ret ? - EFAULT : 0;
 }
 
+static inline unsigned long
+slow_user_access(struct io_mapping *mapping,
+		 uint64_t page_base, int page_offset,
+		 char __user *user_data,
+		 unsigned long length, bool pwrite)
+{
+	void __iomem *ioaddr;
+	void *vaddr;
+	uint64_t unwritten;
+
+	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+	/* We can use the cpu mem copy function because this is X86. */
+	vaddr = (void __force *)ioaddr + page_offset;
+	if (pwrite)
+		unwritten = __copy_from_user(vaddr, user_data, length);
+	else
+		unwritten = __copy_to_user(user_data, vaddr, length);
+
+	io_mapping_unmap(ioaddr);
+	return unwritten;
+}
+
+static int
+i915_gem_gtt_pread(struct drm_device *dev,
+		   struct drm_i915_gem_object *obj, uint64_t size,
+		   uint64_t data_offset, uint64_t data_ptr)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct drm_mm_node node;
+	char __user *user_data;
+	uint64_t remain;
+	uint64_t offset;
+	int ret;
+
+	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+	if (ret) {
+		ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
+		if (ret)
+			goto out;
+
+		ret = i915_gem_object_get_pages(obj);
+		if (ret) {
+			remove_mappable_node(&node);
+			goto out;
+		}
+
+		i915_gem_object_pin_pages(obj);
+	} else {
+		node.start = i915_gem_obj_ggtt_offset(obj);
+		node.allocated = false;
+		ret = i915_gem_object_put_fence(obj);
+		if (ret)
+			goto out_unpin;
+	}
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (ret)
+		goto out_unpin;
+
+	user_data = u64_to_user_ptr(data_ptr);
+	remain = size;
+	offset = data_offset;
+
+	mutex_unlock(&dev->struct_mutex);
+	if (likely(!i915.prefault_disable)) {
+		ret = fault_in_multipages_writeable(user_data, remain);
+		if (ret) {
+			mutex_lock(&dev->struct_mutex);
+			goto out_unpin;
+		}
+	}
+
+	while (remain > 0) {
+		/* Operation in this page
+		 *
+		 * page_base = page offset within aperture
+		 * page_offset = offset within page
+		 * page_length = bytes to copy for this page
+		 */
+		u32 page_base = node.start;
+		unsigned page_offset = offset_in_page(offset);
+		unsigned page_length = PAGE_SIZE - page_offset;
+		page_length = remain < page_length ? remain : page_length;
+		if (node.allocated) {
+			wmb();
+			ggtt->base.insert_page(&ggtt->base,
+					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
+					       node.start,
+					       I915_CACHE_NONE, 0);
+			wmb();
+		} else {
+			page_base += offset & PAGE_MASK;
+		}
+		/* This is a slow read/write as it tries to read from
+		 * and write to user memory which may result into page
+		 * faults, and so we cannot perform this under struct_mutex.
+		 */
+		if (slow_user_access(ggtt->mappable, page_base,
+				     page_offset, user_data,
+				     page_length, false)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		remain -= page_length;
+		user_data += page_length;
+		offset += page_length;
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
+		/* The user has modified the object whilst we tried
+		 * reading from it, and we now have no idea what domain
+		 * the pages should be in. As we have just been touching
+		 * them directly, flush everything back to the GTT
+		 * domain.
+		 */
+		ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	}
+
+out_unpin:
+	if (node.allocated) {
+		wmb();
+		ggtt->base.clear_range(&ggtt->base,
+				       node.start, node.size,
+				       true);
+		i915_gem_object_unpin_pages(obj);
+		remove_mappable_node(&node);
+	} else {
+		i915_gem_object_ggtt_unpin(obj);
+	}
+out:
+	return ret;
+}
+
 static int
 i915_gem_shmem_pread(struct drm_device *dev,
 		     struct drm_i915_gem_object *obj,
@@ -600,6 +760,9 @@
 	int needs_clflush = 0;
 	struct sg_page_iter sg_iter;
 
+	if (!obj->base.filp)
+		return -ENODEV;
+
 	user_data = u64_to_user_ptr(args->data_ptr);
 	remain = args->size;
 
@@ -672,6 +835,9 @@
 
 /**
  * Reads data from the object referenced by handle.
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
  *
  * On error, the contents of *data are undefined.
  */
@@ -708,18 +874,15 @@
 		goto out;
 	}
 
-	/* prime objects have no backing filp to GEM pread/pwrite
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	trace_i915_gem_object_pread(obj, args->offset, args->size);
 
 	ret = i915_gem_shmem_pread(dev, obj, args, file);
 
+	/* pread for non shmem backed objects */
+	if (ret == -EFAULT || ret == -ENODEV)
+		ret = i915_gem_gtt_pread(dev, obj, args->size,
+					args->offset, args->data_ptr);
+
 out:
 	drm_gem_object_unreference(&obj->base);
 unlock:
@@ -753,60 +916,99 @@
 /**
  * This is the fast pwrite path, where we copy the data directly from the
  * user into the GTT, uncached.
+ * @dev: drm device pointer
+ * @obj: i915 gem object
+ * @args: pwrite arguments structure
+ * @file: drm file pointer
  */
 static int
-i915_gem_gtt_pwrite_fast(struct drm_device *dev,
+i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 			 struct drm_i915_gem_object *obj,
 			 struct drm_i915_gem_pwrite *args,
 			 struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	ssize_t remain;
-	loff_t offset, page_base;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct drm_device *dev = obj->base.dev;
+	struct drm_mm_node node;
+	uint64_t remain, offset;
 	char __user *user_data;
-	int page_offset, page_length, ret;
+	int ret;
+	bool hit_slow_path = false;
+
+	if (obj->tiling_mode != I915_TILING_NONE)
+		return -EFAULT;
 
 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
-	if (ret)
-		goto out;
+	if (ret) {
+		ret = insert_mappable_node(i915, &node, PAGE_SIZE);
+		if (ret)
+			goto out;
+
+		ret = i915_gem_object_get_pages(obj);
+		if (ret) {
+			remove_mappable_node(&node);
+			goto out;
+		}
+
+		i915_gem_object_pin_pages(obj);
+	} else {
+		node.start = i915_gem_obj_ggtt_offset(obj);
+		node.allocated = false;
+		ret = i915_gem_object_put_fence(obj);
+		if (ret)
+			goto out_unpin;
+	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
 		goto out_unpin;
 
-	ret = i915_gem_object_put_fence(obj);
-	if (ret)
-		goto out_unpin;
+	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+	obj->dirty = true;
 
 	user_data = u64_to_user_ptr(args->data_ptr);
+	offset = args->offset;
 	remain = args->size;
-
-	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
-	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
-
-	while (remain > 0) {
+	while (remain) {
 		/* Operation in this page
 		 *
 		 * page_base = page offset within aperture
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_base = offset & PAGE_MASK;
-		page_offset = offset_in_page(offset);
-		page_length = remain;
-		if ((page_offset + remain) > PAGE_SIZE)
-			page_length = PAGE_SIZE - page_offset;
-
+		u32 page_base = node.start;
+		unsigned page_offset = offset_in_page(offset);
+		unsigned page_length = PAGE_SIZE - page_offset;
+		page_length = remain < page_length ? remain : page_length;
+		if (node.allocated) {
+			wmb(); /* flush the write before we modify the GGTT */
+			ggtt->base.insert_page(&ggtt->base,
+					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
+					       node.start, I915_CACHE_NONE, 0);
+			wmb(); /* flush modifications to the GGTT (insert_page) */
+		} else {
+			page_base += offset & PAGE_MASK;
+		}
 		/* If we get a fault while copying data, then (presumably) our
 		 * source page isn't available.  Return the error and we'll
 		 * retry in the slow path.
+		 * If the object is non-shmem backed, we retry again with the
+		 * path that handles page fault.
 		 */
 		if (fast_user_write(ggtt->mappable, page_base,
 				    page_offset, user_data, page_length)) {
-			ret = -EFAULT;
-			goto out_flush;
+			hit_slow_path = true;
+			mutex_unlock(&dev->struct_mutex);
+			if (slow_user_access(ggtt->mappable,
+					     page_base,
+					     page_offset, user_data,
+					     page_length, true)) {
+				ret = -EFAULT;
+				mutex_lock(&dev->struct_mutex);
+				goto out_flush;
+			}
+
+			mutex_lock(&dev->struct_mutex);
 		}
 
 		remain -= page_length;
@@ -815,9 +1017,31 @@
 	}
 
 out_flush:
+	if (hit_slow_path) {
+		if (ret == 0 &&
+		    (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
+			/* The user has modified the object whilst we tried
+			 * reading from it, and we now have no idea what domain
+			 * the pages should be in. As we have just been touching
+			 * them directly, flush everything back to the GTT
+			 * domain.
+			 */
+			ret = i915_gem_object_set_to_gtt_domain(obj, false);
+		}
+	}
+
 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 out_unpin:
-	i915_gem_object_ggtt_unpin(obj);
+	if (node.allocated) {
+		wmb();
+		ggtt->base.clear_range(&ggtt->base,
+				       node.start, node.size,
+				       true);
+		i915_gem_object_unpin_pages(obj);
+		remove_mappable_node(&node);
+	} else {
+		i915_gem_object_ggtt_unpin(obj);
+	}
 out:
 	return ret;
 }
@@ -1016,6 +1240,9 @@
 
 /**
  * Writes data to the object referenced by handle.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
  *
  * On error, the contents of the buffer that were to be modified are undefined.
  */
@@ -1062,14 +1289,6 @@
 		goto out;
 	}
 
-	/* prime objects have no backing filp to GEM pread/pwrite
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
 	ret = -EFAULT;
@@ -1079,20 +1298,20 @@
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->tiling_mode == I915_TILING_NONE &&
-	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
-	    cpu_write_needs_clflush(obj)) {
-		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
+	if (!obj->base.filp || cpu_write_needs_clflush(obj)) {
+		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
 		 * textures). Fallback to the shmem path in that case. */
 	}
 
-	if (ret == -EFAULT || ret == -ENOSPC) {
+	if (ret == -EFAULT) {
 		if (obj->phys_handle)
 			ret = i915_gem_phys_pwrite(obj, args, file);
-		else
+		else if (obj->base.filp)
 			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+		else
+			ret = -ENODEV;
 	}
 
 out:
@@ -1213,6 +1432,7 @@
  * @req: duh!
  * @interruptible: do an interruptible wait (normally yes)
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
+ * @rps: RPS client
  *
  * Note: It is of utmost importance that the passed in seqno and reset_counter
  * values have been read by the caller in an smp safe manner. Where read-side
@@ -1446,6 +1666,7 @@
 /**
  * Waits for a request to be signaled, and cleans up the
  * request and object lists appropriately for that event.
+ * @req: request to wait on
  */
 int
 i915_wait_request(struct drm_i915_gem_request *req)
@@ -1472,6 +1693,8 @@
 /**
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
+ * @obj: i915 gem object
+ * @readonly: waiting for read access or write
  */
 int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
@@ -1589,6 +1812,9 @@
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
  */
 int
 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
@@ -1652,6 +1878,9 @@
 
 /**
  * Called when user space has done writes to this buffer
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
  */
 int
 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
@@ -1682,8 +1911,11 @@
 }
 
 /**
- * Maps the contents of an object, returning the address it is mapped
- * into.
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *			 it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
  *
  * While the mapping holds a reference on the contents of the object, it doesn't
  * imply a ref on the object itself.
@@ -2001,7 +2233,10 @@
 
 /**
  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
- * @obj: object to check
+ * @dev: drm device
+ * @size: object size
+ * @tiling_mode: tiling mode
+ * @fenced: is fenced alignemned required or not
  *
  * Return the required GTT alignment for an object, taking into account
  * potential fence register mapping.
@@ -2951,6 +3186,7 @@
 
 /**
  * This function clears the request list as sequence numbers are passed.
+ * @engine: engine to retire requests on
  */
 void
 i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
@@ -3074,6 +3310,7 @@
  * Ensures that an object will eventually get non-busy by flushing any required
  * write domains, emitting any outstanding lazy request and retiring and
  * completed requests.
+ * @obj: object to flush
  */
 static int
 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
@@ -3099,7 +3336,9 @@
 
 /**
  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
- * @DRM_IOCTL_ARGS: standard ioctl arguments
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
  *
  * Returns 0 if successful, else an error is returned with the remaining time in
  * the timeout parameter.
@@ -3489,6 +3728,11 @@
 /**
  * Finds free space in the GTT aperture and binds the object or a view of it
  * there.
+ * @obj: object to bind
+ * @vm: address space to bind into
+ * @ggtt_view: global gtt view if applicable
+ * @alignment: requested alignment
+ * @flags: mask of PIN_* flags to use
  */
 static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
@@ -3746,6 +3990,8 @@
 
 /**
  * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
  *
  * This function returns when the move is complete, including waiting on
  * flushes to occur.
@@ -3817,6 +4063,8 @@
 
 /**
  * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
  *
  * After this function returns, the object will be in the new cache-level
  * across all GTT and the contents of the backing storage will be coherent,
@@ -3926,9 +4174,7 @@
 	 * object is now coherent at its new cache level (with respect
 	 * to the access domain).
 	 */
-	if (obj->cache_dirty &&
-	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
-	    cpu_write_needs_clflush(obj)) {
+	if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
 		if (i915_gem_clflush_object(obj, true))
 			i915_gem_chipset_flush(to_i915(obj->base.dev));
 	}
@@ -4098,6 +4344,8 @@
 
 /**
  * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
  *
  * This function returns when the move is complete, including waiting on
  * flushes to occur.
@@ -4886,11 +5134,9 @@
 	intel_mocs_init_l3cc_table(dev);
 
 	/* We can't enable contexts until all firmware is loaded */
-	if (HAS_GUC(dev)) {
-		ret = intel_guc_setup(dev);
-		if (ret)
-			goto out;
-	}
+	ret = intel_guc_setup(dev);
+	if (ret)
+		goto out;
 
 	/*
 	 * Increment the next seqno by 0x100 so we have a visible break
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a3b11aa..30d9b4f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -295,6 +295,10 @@
 	ctx->remap_slice = ALL_L3_SLICES(dev_priv);
 
 	ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
+	ctx->ring_size = 4 * PAGE_SIZE;
+	ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
+			     GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
 
 	return ctx;
 
@@ -339,6 +343,40 @@
 	return ctx;
 }
 
+/**
+ * i915_gem_context_create_gvt - create a GVT GEM context
+ * @dev: drm device *
+ *
+ * This function is used to create a GVT specific GEM context.
+ *
+ * Returns:
+ * pointer to i915_gem_context on success, error pointer if failed
+ *
+ */
+struct i915_gem_context *
+i915_gem_context_create_gvt(struct drm_device *dev)
+{
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return ERR_PTR(-ENODEV);
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ctx = i915_gem_create_context(dev, NULL);
+	if (IS_ERR(ctx))
+		goto out;
+
+	ctx->execlists_force_single_submission = true;
+	ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ctx;
+}
+
 static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 				   struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/i915_gem_dmabuf.h
new file mode 100644
index 0000000..9131555
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _I915_GEM_DMABUF_H_
+#define _I915_GEM_DMABUF_H_
+
+#include <linux/dma-buf.h>
+
+static inline struct reservation_object *
+i915_gem_object_get_dmabuf_resv(struct drm_i915_gem_object *obj)
+{
+	struct dma_buf *dma_buf;
+
+	if (obj->base.dma_buf)
+		dma_buf = obj->base.dma_buf;
+	else if (obj->base.import_attach)
+		dma_buf = obj->base.import_attach->dmabuf;
+	else
+		return NULL;
+
+	return dma_buf->resv;
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4668477..5890017 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2355,6 +2355,28 @@
 #endif
 }
 
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 unused)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen8_pte_t __iomem *pte =
+		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
+		(offset >> PAGE_SHIFT);
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
@@ -2424,6 +2446,28 @@
 	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
 }
 
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen6_pte_t __iomem *pte =
+		(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
+		(offset >> PAGE_SHIFT);
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 /*
  * Binds an object into the global gtt with the specified cache level. The object
  * will be accessible to the GPU via commands whose operands reference offsets
@@ -2543,6 +2587,24 @@
 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level cache_level,
+				  u32 unused)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *pages,
 				     uint64_t start,
@@ -2732,11 +2794,9 @@
 	i915_address_space_init(&ggtt->base, dev_priv);
 	ggtt->base.total += PAGE_SIZE;
 
-	if (intel_vgpu_active(dev_priv)) {
-		ret = intel_vgt_balloon(dev);
-		if (ret)
-			return ret;
-	}
+	ret = intel_vgt_balloon(dev_priv);
+	if (ret)
+		return ret;
 
 	if (!HAS_LLC(dev))
 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
@@ -2836,8 +2896,7 @@
 	i915_gem_cleanup_stolen(dev);
 
 	if (drm_mm_initialized(&ggtt->base.mm)) {
-		if (intel_vgpu_active(dev_priv))
-			intel_vgt_deballoon();
+		intel_vgt_deballoon(dev_priv);
 
 		drm_mm_takedown(&ggtt->base.mm);
 		list_del(&ggtt->base.global_link);
@@ -3076,7 +3135,7 @@
 
 	ggtt->base.bind_vma = ggtt_bind_vma;
 	ggtt->base.unbind_vma = ggtt_unbind_vma;
-
+	ggtt->base.insert_page = gen8_ggtt_insert_page;
 	ggtt->base.clear_range = nop_clear_range;
 	if (!USES_FULL_PPGTT(dev_priv))
 		ggtt->base.clear_range = gen8_ggtt_clear_range;
@@ -3116,6 +3175,7 @@
 	ret = ggtt_probe_common(dev, ggtt->size);
 
 	ggtt->base.clear_range = gen6_ggtt_clear_range;
+	ggtt->base.insert_page = gen6_ggtt_insert_page;
 	ggtt->base.insert_entries = gen6_ggtt_insert_entries;
 	ggtt->base.bind_vma = ggtt_bind_vma;
 	ggtt->base.unbind_vma = ggtt_unbind_vma;
@@ -3147,6 +3207,7 @@
 		      &ggtt->mappable_base, &ggtt->mappable_end);
 
 	ggtt->do_idle_maps = needs_idle_maps(dev_priv->dev);
+	ggtt->base.insert_page = i915_ggtt_insert_page;
 	ggtt->base.insert_entries = i915_ggtt_insert_entries;
 	ggtt->base.clear_range = i915_ggtt_clear_range;
 	ggtt->base.bind_vma = ggtt_bind_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 62be77c..163b564 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -319,6 +319,11 @@
 			    uint64_t start,
 			    uint64_t length,
 			    bool use_scratch);
+	void (*insert_page)(struct i915_address_space *vm,
+			    dma_addr_t addr,
+			    uint64_t offset,
+			    enum i915_cache_level cache_level,
+			    u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       uint64_t start,
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 7c93327..b7c1b5f 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -94,6 +94,7 @@
 
 static int render_state_setup(struct render_state *so)
 {
+	struct drm_device *dev = so->obj->base.dev;
 	const struct intel_renderstate_rodata *rodata = so->rodata;
 	unsigned int i = 0, reloc_index = 0;
 	struct page *page;
@@ -135,6 +136,33 @@
 
 	so->aux_batch_offset = i * sizeof(u32);
 
+	if (HAS_POOLED_EU(dev)) {
+		/*
+		 * We always program 3x6 pool config but depending upon which
+		 * subslice is disabled HW drops down to appropriate config
+		 * shown below.
+		 *
+		 * In the below table 2x6 config always refers to
+		 * fused-down version, native 2x6 is not available and can
+		 * be ignored
+		 *
+		 * SNo  subslices config                eu pool configuration
+		 * -----------------------------------------------------------
+		 * 1    3 subslices enabled (3x6)  -    0x00777000  (9+9)
+		 * 2    ss0 disabled (2x6)         -    0x00777000  (3+9)
+		 * 3    ss1 disabled (2x6)         -    0x00770000  (6+6)
+		 * 4    ss2 disabled (2x6)         -    0x00007000  (9+3)
+		 */
+		u32 eu_pool_config = 0x00777000;
+
+		OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
+		OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
+		OUT_BATCH(d, i, eu_pool_config);
+		OUT_BATCH(d, i, 0);
+		OUT_BATCH(d, i, 0);
+		OUT_BATCH(d, i, 0);
+	}
+
 	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
 	so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f9253f2..e9cd822 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -55,8 +55,10 @@
 		return -ENODEV;
 
 	/* See the comment at the drm_mm_init() call for more about this check.
-	 * WaSkipStolenMemoryFirstPage:bdw,chv (incomplete) */
-	if (IS_GEN8(dev_priv) && start < 4096)
+	 * WaSkipStolenMemoryFirstPage:bdw,chv,kbl (incomplete)
+	 */
+	if (start < 4096 && (IS_GEN8(dev_priv) ||
+			     IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)))
 		start = 4096;
 
 	mutex_lock(&dev_priv->mm.stolen_lock);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index ac72451..22a55ac 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -174,96 +174,90 @@
  * client object which contains the page being used for the doorbell
  */
 
-static void guc_init_doorbell(struct intel_guc *guc,
-			      struct i915_guc_client *client)
+static int guc_update_doorbell_id(struct intel_guc *guc,
+				  struct i915_guc_client *client,
+				  u16 new_id)
 {
+	struct sg_table *sg = guc->ctx_pool_obj->pages;
+	void *doorbell_bitmap = guc->doorbell_bitmap;
 	struct guc_doorbell_info *doorbell;
+	struct guc_context_desc desc;
+	size_t len;
 
 	doorbell = client->client_base + client->doorbell_offset;
 
-	doorbell->db_status = GUC_DOORBELL_ENABLED;
-	doorbell->cookie = 0;
-}
-
-static int guc_ring_doorbell(struct i915_guc_client *gc)
-{
-	struct guc_process_desc *desc;
-	union guc_doorbell_qw db_cmp, db_exc, db_ret;
-	union guc_doorbell_qw *db;
-	int attempt = 2, ret = -EAGAIN;
-
-	desc = gc->client_base + gc->proc_desc_offset;
-
-	/* Update the tail so it is visible to GuC */
-	desc->tail = gc->wq_tail;
-
-	/* current cookie */
-	db_cmp.db_status = GUC_DOORBELL_ENABLED;
-	db_cmp.cookie = gc->cookie;
-
-	/* cookie to be updated */
-	db_exc.db_status = GUC_DOORBELL_ENABLED;
-	db_exc.cookie = gc->cookie + 1;
-	if (db_exc.cookie == 0)
-		db_exc.cookie = 1;
-
-	/* pointer of current doorbell cacheline */
-	db = gc->client_base + gc->doorbell_offset;
-
-	while (attempt--) {
-		/* lets ring the doorbell */
-		db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
-			db_cmp.value_qw, db_exc.value_qw);
-
-		/* if the exchange was successfully executed */
-		if (db_ret.value_qw == db_cmp.value_qw) {
-			/* db was successfully rung */
-			gc->cookie = db_exc.cookie;
-			ret = 0;
-			break;
-		}
-
-		/* XXX: doorbell was lost and need to acquire it again */
-		if (db_ret.db_status == GUC_DOORBELL_DISABLED)
-			break;
-
-		DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
-			  db_cmp.cookie, db_ret.cookie);
-
-		/* update the cookie to newly read cookie from GuC */
-		db_cmp.cookie = db_ret.cookie;
-		db_exc.cookie = db_ret.cookie + 1;
-		if (db_exc.cookie == 0)
-			db_exc.cookie = 1;
+	if (client->doorbell_id != GUC_INVALID_DOORBELL_ID &&
+	    test_bit(client->doorbell_id, doorbell_bitmap)) {
+		/* Deactivate the old doorbell */
+		doorbell->db_status = GUC_DOORBELL_DISABLED;
+		(void)host2guc_release_doorbell(guc, client);
+		__clear_bit(client->doorbell_id, doorbell_bitmap);
 	}
 
-	return ret;
+	/* Update the GuC's idea of the doorbell ID */
+	len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
+			     sizeof(desc) * client->ctx_index);
+	if (len != sizeof(desc))
+		return -EFAULT;
+	desc.db_id = new_id;
+	len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
+			     sizeof(desc) * client->ctx_index);
+	if (len != sizeof(desc))
+		return -EFAULT;
+
+	client->doorbell_id = new_id;
+	if (new_id == GUC_INVALID_DOORBELL_ID)
+		return 0;
+
+	/* Activate the new doorbell */
+	__set_bit(new_id, doorbell_bitmap);
+	doorbell->cookie = 0;
+	doorbell->db_status = GUC_DOORBELL_ENABLED;
+	return host2guc_allocate_doorbell(guc, client);
+}
+
+static int guc_init_doorbell(struct intel_guc *guc,
+			      struct i915_guc_client *client,
+			      uint16_t db_id)
+{
+	return guc_update_doorbell_id(guc, client, db_id);
 }
 
 static void guc_disable_doorbell(struct intel_guc *guc,
 				 struct i915_guc_client *client)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct guc_doorbell_info *doorbell;
-	i915_reg_t drbreg = GEN8_DRBREGL(client->doorbell_id);
-	int value;
-
-	doorbell = client->client_base + client->doorbell_offset;
-
-	doorbell->db_status = GUC_DOORBELL_DISABLED;
-
-	I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID);
-
-	value = I915_READ(drbreg);
-	WARN_ON((value & GEN8_DRB_VALID) != 0);
-
-	I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0);
-	I915_WRITE(drbreg, 0);
+	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
 
 	/* XXX: wait for any interrupts */
 	/* XXX: wait for workqueue to drain */
 }
 
+static uint16_t
+select_doorbell_register(struct intel_guc *guc, uint32_t priority)
+{
+	/*
+	 * The bitmap tracks which doorbell registers are currently in use.
+	 * It is split into two halves; the first half is used for normal
+	 * priority contexts, the second half for high-priority ones.
+	 * Note that logically higher priorities are numerically less than
+	 * normal ones, so the test below means "is it high-priority?"
+	 */
+	const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
+	const uint16_t half = GUC_MAX_DOORBELLS / 2;
+	const uint16_t start = hi_pri ? half : 0;
+	const uint16_t end = start + half;
+	uint16_t id;
+
+	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
+	if (id == end)
+		id = GUC_INVALID_DOORBELL_ID;
+
+	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
+			hi_pri ? "high" : "normal", id);
+
+	return id;
+}
+
 /*
  * Select, assign and relase doorbell cachelines
  *
@@ -288,37 +282,6 @@
 	return offset;
 }
 
-static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
-{
-	/*
-	 * The bitmap is split into two halves; the first half is used for
-	 * normal priority contexts, the second half for high-priority ones.
-	 * Note that logically higher priorities are numerically less than
-	 * normal ones, so the test below means "is it high-priority?"
-	 */
-	const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
-	const uint16_t half = GUC_MAX_DOORBELLS / 2;
-	const uint16_t start = hi_pri ? half : 0;
-	const uint16_t end = start + half;
-	uint16_t id;
-
-	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
-	if (id == end)
-		id = GUC_INVALID_DOORBELL_ID;
-	else
-		bitmap_set(guc->doorbell_bitmap, id, 1);
-
-	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
-			hi_pri ? "high" : "normal", id);
-
-	return id;
-}
-
-static void release_doorbell(struct intel_guc *guc, uint16_t id)
-{
-	bitmap_clear(guc->doorbell_bitmap, id, 1);
-}
-
 /*
  * Initialise the process descriptor shared with the GuC firmware.
  */
@@ -543,6 +506,61 @@
 	kunmap_atomic(base);
 }
 
+static int guc_ring_doorbell(struct i915_guc_client *gc)
+{
+	struct guc_process_desc *desc;
+	union guc_doorbell_qw db_cmp, db_exc, db_ret;
+	union guc_doorbell_qw *db;
+	int attempt = 2, ret = -EAGAIN;
+
+	desc = gc->client_base + gc->proc_desc_offset;
+
+	/* Update the tail so it is visible to GuC */
+	desc->tail = gc->wq_tail;
+
+	/* current cookie */
+	db_cmp.db_status = GUC_DOORBELL_ENABLED;
+	db_cmp.cookie = gc->cookie;
+
+	/* cookie to be updated */
+	db_exc.db_status = GUC_DOORBELL_ENABLED;
+	db_exc.cookie = gc->cookie + 1;
+	if (db_exc.cookie == 0)
+		db_exc.cookie = 1;
+
+	/* pointer of current doorbell cacheline */
+	db = gc->client_base + gc->doorbell_offset;
+
+	while (attempt--) {
+		/* lets ring the doorbell */
+		db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
+			db_cmp.value_qw, db_exc.value_qw);
+
+		/* if the exchange was successfully executed */
+		if (db_ret.value_qw == db_cmp.value_qw) {
+			/* db was successfully rung */
+			gc->cookie = db_exc.cookie;
+			ret = 0;
+			break;
+		}
+
+		/* XXX: doorbell was lost and need to acquire it again */
+		if (db_ret.db_status == GUC_DOORBELL_DISABLED)
+			break;
+
+		DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n",
+			  db_cmp.cookie, db_ret.cookie);
+
+		/* update the cookie to newly read cookie from GuC */
+		db_cmp.cookie = db_ret.cookie;
+		db_exc.cookie = db_ret.cookie + 1;
+		if (db_exc.cookie == 0)
+			db_exc.cookie = 1;
+	}
+
+	return ret;
+}
+
 /**
  * i915_guc_submit() - Submit commands through GuC
  * @rq:		request associated with the commands
@@ -591,7 +609,7 @@
 
 /**
  * gem_allocate_guc_obj() - Allocate gem object for GuC usage
- * @dev:	drm device
+ * @dev_priv:	driver private data structure
  * @size:	size of object
  *
  * This is a wrapper to create a gem obj. In order to use it inside GuC, the
@@ -600,13 +618,12 @@
  *
  * Return:	A drm_i915_gem_object if successful, otherwise NULL.
  */
-static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
-							u32 size)
+static struct drm_i915_gem_object *
+gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 
-	obj = i915_gem_object_create(dev, size);
+	obj = i915_gem_object_create(dev_priv->dev, size);
 	if (IS_ERR(obj))
 		return NULL;
 
@@ -642,10 +659,10 @@
 	drm_gem_object_unreference(&obj->base);
 }
 
-static void guc_client_free(struct drm_device *dev,
-			    struct i915_guc_client *client)
+static void
+guc_client_free(struct drm_i915_private *dev_priv,
+		struct i915_guc_client *client)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
 
 	if (!client)
@@ -658,17 +675,10 @@
 
 	if (client->client_base) {
 		/*
-		 * If we got as far as setting up a doorbell, make sure
-		 * we shut it down before unmapping & deallocating the
-		 * memory. So first disable the doorbell, then tell the
-		 * GuC that we've finished with it, finally deallocate
-		 * it in our bitmap
+		 * If we got as far as setting up a doorbell, make sure we
+		 * shut it down before unmapping & deallocating the memory.
 		 */
-		if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) {
-			guc_disable_doorbell(guc, client);
-			host2guc_release_doorbell(guc, client);
-			release_doorbell(guc, client->doorbell_id);
-		}
+		guc_disable_doorbell(guc, client);
 
 		kunmap(kmap_to_page(client->client_base));
 	}
@@ -683,9 +693,51 @@
 	kfree(client);
 }
 
+/*
+ * Borrow the first client to set up & tear down every doorbell
+ * in turn, to ensure that all doorbell h/w is (re)initialised.
+ */
+static void guc_init_doorbell_hw(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct i915_guc_client *client = guc->execbuf_client;
+	uint16_t db_id, i;
+	int err;
+
+	db_id = client->doorbell_id;
+
+	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
+		i915_reg_t drbreg = GEN8_DRBREGL(i);
+		u32 value = I915_READ(drbreg);
+
+		err = guc_update_doorbell_id(guc, client, i);
+
+		/* Report update failure or unexpectedly active doorbell */
+		if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED)))
+			DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n",
+					  i, drbreg.reg, value, err);
+	}
+
+	/* Restore to original value */
+	err = guc_update_doorbell_id(guc, client, db_id);
+	if (err)
+		DRM_ERROR("Failed to restore doorbell to %d, err %d\n",
+			db_id, err);
+
+	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
+		i915_reg_t drbreg = GEN8_DRBREGL(i);
+		u32 value = I915_READ(drbreg);
+
+		if (i != db_id && (value & GUC_DOORBELL_ENABLED))
+			DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n",
+					  i, drbreg.reg, value);
+
+	}
+}
+
 /**
  * guc_client_alloc() - Allocate an i915_guc_client
- * @dev:	drm device
+ * @dev_priv:	driver private data structure
  * @priority:	four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
  * 		The kernel client to replace ExecList submission is created with
  * 		NORMAL priority. Priority of a client for scheduler can be HIGH,
@@ -695,14 +747,15 @@
  *
  * Return:	An i915_guc_client object if success, else NULL.
  */
-static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
-						uint32_t priority,
-						struct i915_gem_context *ctx)
+static struct i915_guc_client *
+guc_client_alloc(struct drm_i915_private *dev_priv,
+		 uint32_t priority,
+		 struct i915_gem_context *ctx)
 {
 	struct i915_guc_client *client;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
 	struct drm_i915_gem_object *obj;
+	uint16_t db_id;
 
 	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
@@ -721,7 +774,7 @@
 	}
 
 	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
-	obj = gem_allocate_guc_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE);
+	obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE);
 	if (!obj)
 		goto err;
 
@@ -731,6 +784,11 @@
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
 
+	db_id = select_doorbell_register(guc, client->priority);
+	if (db_id == GUC_INVALID_DOORBELL_ID)
+		/* XXX: evict a doorbell instead? */
+		goto err;
+
 	client->doorbell_offset = select_doorbell_cacheline(guc);
 
 	/*
@@ -743,29 +801,22 @@
 	else
 		client->proc_desc_offset = (GUC_DB_SIZE / 2);
 
-	client->doorbell_id = assign_doorbell(guc, client->priority);
-	if (client->doorbell_id == GUC_INVALID_DOORBELL_ID)
-		/* XXX: evict a doorbell instead */
-		goto err;
-
 	guc_init_proc_desc(guc, client);
 	guc_init_ctx_desc(guc, client);
-	guc_init_doorbell(guc, client);
-
-	/* XXX: Any cache flushes needed? General domain mgmt calls? */
-
-	if (host2guc_allocate_doorbell(guc, client))
+	if (guc_init_doorbell(guc, client, db_id))
 		goto err;
 
-	DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u db_id %u\n",
-		priority, client, client->ctx_index, client->doorbell_id);
+	DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n",
+		priority, client, client->ctx_index);
+	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
+		client->doorbell_id, client->doorbell_offset);
 
 	return client;
 
 err:
 	DRM_ERROR("FAILED to create priority %u GuC client!\n", priority);
 
-	guc_client_free(dev, client);
+	guc_client_free(dev_priv, client);
 	return NULL;
 }
 
@@ -790,7 +841,7 @@
 
 	obj = guc->log_obj;
 	if (!obj) {
-		obj = gem_allocate_guc_obj(dev_priv->dev, size);
+		obj = gem_allocate_guc_obj(dev_priv, size);
 		if (!obj) {
 			/* logging will be off */
 			i915.guc_log_level = -1;
@@ -850,7 +901,7 @@
 
 	obj = guc->ads_obj;
 	if (!obj) {
-		obj = gem_allocate_guc_obj(dev_priv->dev, PAGE_ALIGN(size));
+		obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size));
 		if (!obj)
 			return;
 
@@ -904,41 +955,41 @@
  * Set up the memory resources to be shared with the GuC.  At this point,
  * we require just one object that can be mapped through the GGTT.
  */
-int i915_guc_submission_init(struct drm_device *dev)
+int i915_guc_submission_init(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	const size_t ctxsize = sizeof(struct guc_context_desc);
 	const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
 	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
 	struct intel_guc *guc = &dev_priv->guc;
 
+	/* Wipe bitmap & delete client in case of reinitialisation */
+	bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
+	i915_guc_submission_disable(dev_priv);
+
 	if (!i915.enable_guc_submission)
 		return 0; /* not enabled  */
 
 	if (guc->ctx_pool_obj)
 		return 0; /* already allocated */
 
-	guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv->dev, gemsize);
+	guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize);
 	if (!guc->ctx_pool_obj)
 		return -ENOMEM;
 
 	ida_init(&guc->ctx_ids);
-
 	guc_create_log(guc);
-
 	guc_create_ads(guc);
 
 	return 0;
 }
 
-int i915_guc_submission_enable(struct drm_device *dev)
+int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client;
 
 	/* client for execbuf submission */
-	client = guc_client_alloc(dev,
+	client = guc_client_alloc(dev_priv,
 				  GUC_CTX_PRIORITY_KMD_NORMAL,
 				  dev_priv->kernel_context);
 	if (!client) {
@@ -947,24 +998,22 @@
 	}
 
 	guc->execbuf_client = client;
-
 	host2guc_sample_forcewake(guc, client);
+	guc_init_doorbell_hw(guc);
 
 	return 0;
 }
 
-void i915_guc_submission_disable(struct drm_device *dev)
+void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
 
-	guc_client_free(dev, guc->execbuf_client);
+	guc_client_free(dev_priv, guc->execbuf_client);
 	guc->execbuf_client = NULL;
 }
 
-void i915_guc_submission_fini(struct drm_device *dev)
+void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
 
 	gem_release_guc_obj(dev_priv->guc.ads_obj);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 5c73783..4378a65 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -588,7 +588,7 @@
 
 /**
  * i915_enable_asle_pipestat - enable ASLE pipestat for OpRegion
- * @dev: drm device
+ * @dev_priv: i915 device private
  */
 static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv)
 {
@@ -2517,7 +2517,7 @@
 
 /**
  * i915_reset_and_wakeup - do process context error handling work
- * @dev: drm device
+ * @dev_priv: i915 device private
  *
  * Fire an error uevent so userspace can see that a hang or error
  * was detected.
@@ -2674,13 +2674,14 @@
 
 /**
  * i915_handle_error - handle a gpu error
- * @dev: drm device
+ * @dev_priv: i915 device private
  * @engine_mask: mask representing engines that are hung
  * Do some basic checking of register state at error time and
  * dump it to the syslog.  Also call i915_capture_error_state() to make
  * sure we get a record and make it available in debugfs.  Fire a uevent
  * so userspace knows something bad happened (should trigger collection
  * of a ring dump etc.).
+ * @fmt: Error message format string
  */
 void i915_handle_error(struct drm_i915_private *dev_priv,
 		       u32 engine_mask,
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 5e18cf9..7effe68 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -54,12 +54,13 @@
 	.verbose_state_checks = 1,
 	.nuclear_pageflip = 0,
 	.edp_vswing = 0,
-	.enable_guc_loading = 0,
-	.enable_guc_submission = 0,
+	.enable_guc_loading = -1,
+	.enable_guc_submission = -1,
 	.guc_log_level = -1,
 	.enable_dp_mst = true,
 	.inject_load_failure = 0,
 	.enable_dpcd_backlight = false,
+	.enable_gvt = false,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -202,12 +203,12 @@
 module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 0400);
 MODULE_PARM_DESC(enable_guc_loading,
 		"Enable GuC firmware loading "
-		"(-1=auto, 0=never [default], 1=if available, 2=required)");
+		"(-1=auto [default], 0=never, 1=if available, 2=required)");
 
 module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400);
 MODULE_PARM_DESC(enable_guc_submission,
 		"Enable GuC submission "
-		"(-1=auto, 0=never [default], 1=if available, 2=required)");
+		"(-1=auto [default], 0=never, 1=if available, 2=required)");
 
 module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
 MODULE_PARM_DESC(guc_log_level,
@@ -222,3 +223,7 @@
 module_param_named(enable_dpcd_backlight, i915.enable_dpcd_backlight, bool, 0600);
 MODULE_PARM_DESC(enable_dpcd_backlight,
 	"Enable support for DPCD backlight control (default:false)");
+
+module_param_named(enable_gvt, i915.enable_gvt, bool, 0600);
+MODULE_PARM_DESC(enable_gvt,
+	"Enable support for Intel GVT-g graphics virtualization host support(default:false)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 1323261..0ad020b 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -63,6 +63,7 @@
 	bool nuclear_pageflip;
 	bool enable_dp_mst;
 	bool enable_dpcd_backlight;
+	bool enable_gvt;
 };
 
 extern struct i915_params i915 __read_mostly;
diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
new file mode 100644
index 0000000..c0cb297
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _I915_PVINFO_H_
+#define _I915_PVINFO_H_
+
+/* The MMIO offset of the shared info between guest and host emulator */
+#define VGT_PVINFO_PAGE	0x78000
+#define VGT_PVINFO_SIZE	0x1000
+
+/*
+ * The following structure pages are defined in GEN MMIO space
+ * for virtualization. (One page for now)
+ */
+#define VGT_MAGIC         0x4776544776544776ULL	/* 'vGTvGTvG' */
+#define VGT_VERSION_MAJOR 1
+#define VGT_VERSION_MINOR 0
+
+#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor))
+#define INTEL_VGT_IF_VERSION \
+	INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR)
+
+/*
+ * notifications from guest to vgpu device model
+ */
+enum vgt_g2v_type {
+	VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE = 2,
+	VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY,
+	VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE,
+	VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY,
+	VGT_G2V_EXECLIST_CONTEXT_CREATE,
+	VGT_G2V_EXECLIST_CONTEXT_DESTROY,
+	VGT_G2V_MAX,
+};
+
+struct vgt_if {
+	u64 magic;		/* VGT_MAGIC */
+	uint16_t version_major;
+	uint16_t version_minor;
+	u32 vgt_id;		/* ID of vGT instance */
+	u32 rsv1[12];		/* pad to offset 0x40 */
+	/*
+	 *  Data structure to describe the balooning info of resources.
+	 *  Each VM can only have one portion of continuous area for now.
+	 *  (May support scattered resource in future)
+	 *  (starting from offset 0x40)
+	 */
+	struct {
+		/* Aperture register balooning */
+		struct {
+			u32 base;
+			u32 size;
+		} mappable_gmadr;	/* aperture */
+		/* GMADR register balooning */
+		struct {
+			u32 base;
+			u32 size;
+		} nonmappable_gmadr;	/* non aperture */
+		/* allowed fence registers */
+		u32 fence_num;
+		u32 rsv2[3];
+	} avail_rs;		/* available/assigned resource */
+	u32 rsv3[0x200 - 24];	/* pad to half page */
+	/*
+	 * The bottom half page is for response from Gfx driver to hypervisor.
+	 */
+	u32 rsv4;
+	u32 display_ready;	/* ready for display owner switch */
+
+	u32 rsv5[4];
+
+	u32 g2v_notify;
+	u32 rsv6[7];
+
+	struct {
+		u32 lo;
+		u32 hi;
+	} pdp[4];
+
+	u32 execlist_context_descriptor_lo;
+	u32 execlist_context_descriptor_hi;
+
+	u32  rsv7[0x200 - 24];    /* pad to one page */
+} __packed;
+
+#define vgtif_reg(x) \
+	_MMIO((VGT_PVINFO_PAGE + offsetof(struct vgt_if, x)))
+
+/* vGPU display status to be used by the host side */
+#define VGT_DRV_DISPLAY_NOT_READY 0
+#define VGT_DRV_DISPLAY_READY     1  /* ready for display switch */
+
+#endif /* _I915_PVINFO_H_ */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index dfb4c7a..c6bfbf8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -220,6 +220,9 @@
 #define   ECOCHK_PPGTT_WT_HSW		(0x2<<3)
 #define   ECOCHK_PPGTT_WB_HSW		(0x3<<3)
 
+#define GEN8_CONFIG0			_MMIO(0xD00)
+#define  GEN9_DEFAULT_FIXES		(1 << 3 | 1 << 2 | 1 << 1)
+
 #define GAC_ECO_BITS			_MMIO(0x14090)
 #define   ECOBITS_SNB_BIT		(1<<13)
 #define   ECOBITS_PPGTT_CACHE64B	(3<<8)
@@ -442,6 +445,8 @@
  */
 #define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags))
 
+#define GEN9_MEDIA_POOL_STATE     ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4)
+#define   GEN9_MEDIA_POOL_ENABLE  (1 << 31)
 #define GFX_OP_RASTER_RULES    ((0x3<<29)|(0x7<<24))
 #define GFX_OP_SCISSOR         ((0x3<<29)|(0x1c<<24)|(0x10<<19))
 #define   SC_UPDATE_SCISSOR       (0x1<<1)
@@ -713,6 +718,9 @@
 	/* Not actual bit groups. Used as IDs for lookup_power_well() */
 	SKL_DISP_PW_ALWAYS_ON,
 	SKL_DISP_PW_DC_OFF,
+
+	BXT_DPIO_CMN_A,
+	BXT_DPIO_CMN_BC,
 };
 
 #define SKL_POWER_WELL_STATE(pw) (1 << ((pw) * 2))
@@ -1273,6 +1281,15 @@
 #define BXT_P_CR_GT_DISP_PWRON		_MMIO(0x138090)
 #define   GT_DISPLAY_POWER_ON(phy)	(1 << (phy))
 
+#define _BXT_PHY_CTL_DDI_A		0x64C00
+#define _BXT_PHY_CTL_DDI_B		0x64C10
+#define _BXT_PHY_CTL_DDI_C		0x64C20
+#define   BXT_PHY_CMNLANE_POWERDOWN_ACK	(1 << 10)
+#define   BXT_PHY_LANE_POWERDOWN_ACK	(1 << 9)
+#define   BXT_PHY_LANE_ENABLED		(1 << 8)
+#define BXT_PHY_CTL(port)		_MMIO_PORT(port, _BXT_PHY_CTL_DDI_A, \
+							 _BXT_PHY_CTL_DDI_B)
+
 #define _PHY_CTL_FAMILY_EDP		0x64C80
 #define _PHY_CTL_FAMILY_DDI		0x64C90
 #define   COMMON_RESET_DIS		(1 << 31)
@@ -1669,6 +1686,9 @@
 
 #define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
 
+#define GAMT_CHKN_BIT_REG	_MMIO(0x4ab8)
+#define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1<<28)
+
 #if 0
 #define PRB0_TAIL	_MMIO(0x2030)
 #define PRB0_HEAD	_MMIO(0x2034)
@@ -1804,6 +1824,10 @@
 #define   GEN9_IZ_HASHING_MASK(slice)			(0x3 << ((slice) * 2))
 #define   GEN9_IZ_HASHING(slice, val)			((val) << ((slice) * 2))
 
+/* chicken reg for WaConextSwitchWithConcurrentTLBInvalidate */
+#define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20D4)
+#define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
+
 /* WaClearTdlStateAckDirtyBits */
 #define GEN8_STATE_ACK		_MMIO(0x20F0)
 #define GEN9_STATE_ACK_SLICE1	_MMIO(0x20F8)
@@ -2161,6 +2185,9 @@
 
 #define FBC_LL_SIZE		(1536)
 
+#define FBC_LLC_READ_CTRL	_MMIO(0x9044)
+#define   FBC_LLC_FULLY_OPEN	(1<<30)
+
 /* Framebuffer compression for GM45+ */
 #define DPFC_CB_BASE		_MMIO(0x3200)
 #define DPFC_CONTROL		_MMIO(0x3208)
@@ -2200,6 +2227,8 @@
 #define ILK_DPFC_STATUS		_MMIO(0x43210)
 #define ILK_DPFC_FENCE_YOFF	_MMIO(0x43218)
 #define ILK_DPFC_CHICKEN	_MMIO(0x43224)
+#define   ILK_DPFC_DISABLE_DUMMY0 (1<<8)
+#define   ILK_DPFC_NUKE_ON_ANY_MODIFICATION	(1<<23)
 #define ILK_FBC_RT_BASE		_MMIO(0x2128)
 #define   ILK_FBC_RT_VALID	(1<<0)
 #define   SNB_FBC_FRONT_BUFFER	(1<<1)
@@ -3022,6 +3051,18 @@
 /* Same as Haswell, but 72064 bytes now. */
 #define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
 
+enum {
+	INTEL_ADVANCED_CONTEXT = 0,
+	INTEL_LEGACY_32B_CONTEXT,
+	INTEL_ADVANCED_AD_CONTEXT,
+	INTEL_LEGACY_64B_CONTEXT
+};
+
+#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
+#define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\
+				INTEL_LEGACY_64B_CONTEXT : \
+				INTEL_LEGACY_32B_CONTEXT)
+
 #define CHV_CLK_CTL1			_MMIO(0x101100)
 #define VLV_CLK_CTL2			_MMIO(0x101104)
 #define   CLK_CTL2_CZCOUNT_30NS_SHIFT	28
@@ -6035,6 +6076,9 @@
 #define  FORCE_ARB_IDLE_PLANES	(1 << 14)
 #define  SKL_EDP_PSR_FIX_RDWRAP	(1 << 3)
 
+#define CHICKEN_PAR2_1		_MMIO(0x42090)
+#define  KVM_CONFIG_CHANGE_NOTIFICATION_SELECT	(1 << 14)
+
 #define _CHICKEN_PIPESL_1_A	0x420b0
 #define _CHICKEN_PIPESL_1_B	0x420b4
 #define  HSW_FBCQ_DIS			(1 << 22)
@@ -6042,6 +6086,7 @@
 #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
 
 #define DISP_ARB_CTL	_MMIO(0x45000)
+#define  DISP_FBC_MEMORY_WAKE		(1<<31)
 #define  DISP_TILE_SURFACE_SWIZZLING	(1<<13)
 #define  DISP_FBC_WM_DIS		(1<<15)
 #define DISP_ARB_CTL2	_MMIO(0x45004)
@@ -6055,6 +6100,9 @@
 #define HSW_NDE_RSTWRN_OPT	_MMIO(0x46408)
 #define  RESET_PCH_HANDSHAKE_ENABLE	(1<<4)
 
+#define GEN8_CHICKEN_DCPR_1		_MMIO(0x46430)
+#define   MASK_WAKEMEM			(1<<13)
+
 #define SKL_DFSM			_MMIO(0x51000)
 #define SKL_DFSM_CDCLK_LIMIT_MASK	(3 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_675	(0 << 23)
@@ -6070,8 +6118,10 @@
 
 #define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
 #define  GEN9_TSG_BARRIER_ACK_DISABLE		(1<<8)
+#define  GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE  (1<<10)
 
 #define GEN9_CS_DEBUG_MODE1		_MMIO(0x20ec)
+#define GEN9_CTX_PREEMPT_REG		_MMIO(0x2248)
 #define GEN8_CS_CHICKEN1		_MMIO(0x2580)
 
 /* GEN7 chicken */
@@ -6079,6 +6129,7 @@
 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	((1<<10) | (1<<26))
 # define GEN9_RHWO_OPTIMIZATION_DISABLE		(1<<14)
 #define COMMON_SLICE_CHICKEN2			_MMIO(0x7014)
+# define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8)
 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE	(1<<0)
 
 #define HIZ_CHICKEN					_MMIO(0x7018)
@@ -6931,6 +6982,7 @@
 #define    EDRAM_SETS_IDX(cap)			(((cap) >> 8) & 0x3)
 
 #define GEN6_UCGCTL1				_MMIO(0x9400)
+# define GEN6_GAMUNIT_CLOCK_GATE_DISABLE		(1 << 22)
 # define GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE		(1 << 16)
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
@@ -6947,6 +6999,7 @@
 
 #define GEN7_UCGCTL4				_MMIO(0x940c)
 #define  GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1<<25)
+#define  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE	(1<<14)
 
 #define GEN6_RCGCTL1				_MMIO(0x9410)
 #define GEN6_RCGCTL2				_MMIO(0x9414)
@@ -8151,6 +8204,8 @@
 #define _MIPIA_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb05c)
 #define _MIPIC_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb85c)
 #define MIPI_EOT_DISABLE(port)		_MMIO_MIPI(port, _MIPIA_EOT_DISABLE, _MIPIC_EOT_DISABLE)
+#define  BXT_DEFEATURE_DPI_FIFO_CTR			(1 << 9)
+#define  BXT_DPHY_DEFEATURE_EN				(1 << 8)
 #define  LP_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 7)
 #define  HS_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 6)
 #define  LOW_CONTENTION_RECOVERY_DISABLE		(1 << 5)
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 0043262..f6acb5a 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -53,7 +53,7 @@
 
 /**
  * i915_check_vgpu - detect virtual GPU
- * @dev: drm device *
+ * @dev_priv: i915 device private
  *
  * This function is called at the initialization stage, to detect whether
  * running on a vGPU.
@@ -101,10 +101,13 @@
  * This function is called to deallocate the ballooned-out graphic memory, when
  * driver is unloaded or when ballooning fails.
  */
-void intel_vgt_deballoon(void)
+void intel_vgt_deballoon(struct drm_i915_private *dev_priv)
 {
 	int i;
 
+	if (!intel_vgpu_active(dev_priv))
+		return;
+
 	DRM_DEBUG("VGT deballoon.\n");
 
 	for (i = 0; i < 4; i++) {
@@ -135,7 +138,7 @@
 
 /**
  * intel_vgt_balloon - balloon out reserved graphics address trunks
- * @dev_priv: i915 device
+ * @dev: drm device
  *
  * This function is called at the initialization stage, to balloon out the
  * graphic address space allocated to other vGPUs, by marking these spaces as
@@ -177,9 +180,8 @@
  * Returns:
  * zero on success, non-zero if configuration invalid or ballooning failed
  */
-int intel_vgt_balloon(struct drm_device *dev)
+int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	unsigned long ggtt_end = ggtt->base.start + ggtt->base.total;
 
@@ -187,6 +189,9 @@
 	unsigned long unmappable_base, unmappable_size, unmappable_end;
 	int ret;
 
+	if (!intel_vgpu_active(dev_priv))
+		return 0;
+
 	mappable_base = I915_READ(vgtif_reg(avail_rs.mappable_gmadr.base));
 	mappable_size = I915_READ(vgtif_reg(avail_rs.mappable_gmadr.size));
 	unmappable_base = I915_READ(vgtif_reg(avail_rs.nonmappable_gmadr.base));
@@ -258,6 +263,6 @@
 
 err:
 	DRM_ERROR("VGT balloon fail\n");
-	intel_vgt_deballoon();
+	intel_vgt_deballoon(dev_priv);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h
index 21ffcfe..3c3b2d2 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.h
+++ b/drivers/gpu/drm/i915/i915_vgpu.h
@@ -24,94 +24,10 @@
 #ifndef _I915_VGPU_H_
 #define _I915_VGPU_H_
 
-/* The MMIO offset of the shared info between guest and host emulator */
-#define VGT_PVINFO_PAGE	0x78000
-#define VGT_PVINFO_SIZE	0x1000
+#include "i915_pvinfo.h"
 
-/*
- * The following structure pages are defined in GEN MMIO space
- * for virtualization. (One page for now)
- */
-#define VGT_MAGIC         0x4776544776544776ULL	/* 'vGTvGTvG' */
-#define VGT_VERSION_MAJOR 1
-#define VGT_VERSION_MINOR 0
-
-#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor))
-#define INTEL_VGT_IF_VERSION \
-	INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR)
-
-/*
- * notifications from guest to vgpu device model
- */
-enum vgt_g2v_type {
-	VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE = 2,
-	VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY,
-	VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE,
-	VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY,
-	VGT_G2V_EXECLIST_CONTEXT_CREATE,
-	VGT_G2V_EXECLIST_CONTEXT_DESTROY,
-	VGT_G2V_MAX,
-};
-
-struct vgt_if {
-	uint64_t magic;		/* VGT_MAGIC */
-	uint16_t version_major;
-	uint16_t version_minor;
-	uint32_t vgt_id;	/* ID of vGT instance */
-	uint32_t rsv1[12];	/* pad to offset 0x40 */
-	/*
-	 *  Data structure to describe the balooning info of resources.
-	 *  Each VM can only have one portion of continuous area for now.
-	 *  (May support scattered resource in future)
-	 *  (starting from offset 0x40)
-	 */
-	struct {
-		/* Aperture register balooning */
-		struct {
-			uint32_t base;
-			uint32_t size;
-		} mappable_gmadr;	/* aperture */
-		/* GMADR register balooning */
-		struct {
-			uint32_t base;
-			uint32_t size;
-		} nonmappable_gmadr;	/* non aperture */
-		/* allowed fence registers */
-		uint32_t fence_num;
-		uint32_t rsv2[3];
-	} avail_rs;		/* available/assigned resource */
-	uint32_t rsv3[0x200 - 24];	/* pad to half page */
-	/*
-	 * The bottom half page is for response from Gfx driver to hypervisor.
-	 */
-	uint32_t rsv4;
-	uint32_t display_ready;	/* ready for display owner switch */
-
-	uint32_t rsv5[4];
-
-	uint32_t g2v_notify;
-	uint32_t rsv6[7];
-
-	struct {
-		uint32_t lo;
-		uint32_t hi;
-	} pdp[4];
-
-	uint32_t execlist_context_descriptor_lo;
-	uint32_t execlist_context_descriptor_hi;
-
-	uint32_t  rsv7[0x200 - 24];    /* pad to one page */
-} __packed;
-
-#define vgtif_reg(x) \
-	_MMIO((VGT_PVINFO_PAGE + (long)&((struct vgt_if *)NULL)->x))
-
-/* vGPU display status to be used by the host side */
-#define VGT_DRV_DISPLAY_NOT_READY 0
-#define VGT_DRV_DISPLAY_READY     1  /* ready for display switch */
-
-extern void i915_check_vgpu(struct drm_i915_private *dev_priv);
-extern int intel_vgt_balloon(struct drm_device *dev);
-extern void intel_vgt_deballoon(void);
+void i915_check_vgpu(struct drm_i915_private *dev_priv);
+int intel_vgt_balloon(struct drm_i915_private *dev_priv);
+void intel_vgt_deballoon(struct drm_i915_private *dev_priv);
 
 #endif /* _I915_VGPU_H_ */
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 713a02d..da5ed4a 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -1570,6 +1570,45 @@
 }
 
 /**
+ * intel_bios_is_port_present - is the specified digital port present
+ * @dev_priv:	i915 device instance
+ * @port:	port to check
+ *
+ * Return true if the device in %port is present.
+ */
+bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port)
+{
+	static const struct {
+		u16 dp, hdmi;
+	} port_mapping[] = {
+		[PORT_B] = { DVO_PORT_DPB, DVO_PORT_HDMIB, },
+		[PORT_C] = { DVO_PORT_DPC, DVO_PORT_HDMIC, },
+		[PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, },
+		[PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, },
+	};
+	int i;
+
+	/* FIXME maybe deal with port A as well? */
+	if (WARN_ON(port == PORT_A) || port >= ARRAY_SIZE(port_mapping))
+		return false;
+
+	if (!dev_priv->vbt.child_dev_num)
+		return false;
+
+	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
+		const union child_device_config *p_child =
+			&dev_priv->vbt.child_dev[i];
+		if ((p_child->common.dvo_port == port_mapping[port].dp ||
+		     p_child->common.dvo_port == port_mapping[port].hdmi) &&
+		    (p_child->common.device_type & (DEVICE_TYPE_TMDS_DVI_SIGNALING |
+						    DEVICE_TYPE_DISPLAYPORT_OUTPUT)))
+			return true;
+	}
+
+	return false;
+}
+
+/**
  * intel_bios_is_port_edp - is the device in given port eDP
  * @dev_priv:	i915 device instance
  * @port:	port to check
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 022b41d..ad3b0ee5e 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1342,6 +1342,14 @@
 	DRM_DEBUG_KMS("No pipe for ddi port %c found\n", port_name(port));
 
 out:
+	if (ret && IS_BROXTON(dev_priv)) {
+		tmp = I915_READ(BXT_PHY_CTL(port));
+		if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK |
+			    BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED)
+			DRM_ERROR("Port %c enabled but PHY powered down? "
+				  "(PHY_CTL %08x)\n", port_name(port), tmp);
+	}
+
 	intel_display_power_put(dev_priv, power_domain);
 
 	return ret;
@@ -1742,9 +1750,11 @@
 	}
 }
 
-static bool broxton_phy_is_enabled(struct drm_i915_private *dev_priv,
-				   enum dpio_phy phy)
+bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv,
+			    enum dpio_phy phy)
 {
+	enum port port;
+
 	if (!(I915_READ(BXT_P_CR_GT_DISP_PWRON) & GT_DISPLAY_POWER_ON(phy)))
 		return false;
 
@@ -1770,38 +1780,48 @@
 		return false;
 	}
 
+	for_each_port_masked(port,
+			     phy == DPIO_PHY0 ? BIT(PORT_B) | BIT(PORT_C) :
+						BIT(PORT_A)) {
+		u32 tmp = I915_READ(BXT_PHY_CTL(port));
+
+		if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) {
+			DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane "
+					 "for port %c powered down "
+					 "(PHY_CTL %08x)\n",
+					 phy, port_name(port), tmp);
+
+			return false;
+		}
+	}
+
 	return true;
 }
 
-static u32 broxton_get_grc(struct drm_i915_private *dev_priv, enum dpio_phy phy)
+static u32 bxt_get_grc(struct drm_i915_private *dev_priv, enum dpio_phy phy)
 {
 	u32 val = I915_READ(BXT_PORT_REF_DW6(phy));
 
 	return (val & GRC_CODE_MASK) >> GRC_CODE_SHIFT;
 }
 
-static void broxton_phy_wait_grc_done(struct drm_i915_private *dev_priv,
-				      enum dpio_phy phy)
+static void bxt_phy_wait_grc_done(struct drm_i915_private *dev_priv,
+				  enum dpio_phy phy)
 {
 	if (wait_for(I915_READ(BXT_PORT_REF_DW3(phy)) & GRC_DONE, 10))
 		DRM_ERROR("timeout waiting for PHY%d GRC\n", phy);
 }
 
-static bool broxton_phy_verify_state(struct drm_i915_private *dev_priv,
-				     enum dpio_phy phy);
-
-static void broxton_phy_init(struct drm_i915_private *dev_priv,
-			     enum dpio_phy phy)
+void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy)
 {
-	enum port port;
-	u32 ports, val;
+	u32 val;
 
-	if (broxton_phy_is_enabled(dev_priv, phy)) {
+	if (bxt_ddi_phy_is_enabled(dev_priv, phy)) {
 		/* Still read out the GRC value for state verification */
 		if (phy == DPIO_PHY0)
-			dev_priv->bxt_phy_grc = broxton_get_grc(dev_priv, phy);
+			dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, phy);
 
-		if (broxton_phy_verify_state(dev_priv, phy)) {
+		if (bxt_ddi_phy_verify_state(dev_priv, phy)) {
 			DRM_DEBUG_DRIVER("DDI PHY %d already enabled, "
 					 "won't reprogram it\n", phy);
 
@@ -1810,8 +1830,6 @@
 
 		DRM_DEBUG_DRIVER("DDI PHY %d enabled with invalid state, "
 				 "force reprogramming it\n", phy);
-	} else {
-		DRM_DEBUG_DRIVER("DDI PHY %d not enabled, enabling it\n", phy);
 	}
 
 	val = I915_READ(BXT_P_CR_GT_DISP_PWRON);
@@ -1831,28 +1849,6 @@
 		DRM_ERROR("timeout during PHY%d power on\n", phy);
 	}
 
-	if (phy == DPIO_PHY0)
-		ports = BIT(PORT_B) | BIT(PORT_C);
-	else
-		ports = BIT(PORT_A);
-
-	for_each_port_masked(port, ports) {
-		int lane;
-
-		for (lane = 0; lane < 4; lane++) {
-			val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane));
-			/*
-			 * Note that on CHV this flag is called UPAR, but has
-			 * the same function.
-			 */
-			val &= ~LATENCY_OPTIM;
-			if (lane != 1)
-				val |= LATENCY_OPTIM;
-
-			I915_WRITE(BXT_PORT_TX_DW14_LN(port, lane), val);
-		}
-	}
-
 	/* Program PLL Rcomp code offset */
 	val = I915_READ(BXT_PORT_CL1CM_DW9(phy));
 	val &= ~IREF0RC_OFFSET_MASK;
@@ -1899,10 +1895,7 @@
 		 * the corresponding calibrated value from PHY1, and disable
 		 * the automatic calibration on PHY0.
 		 */
-		broxton_phy_wait_grc_done(dev_priv, DPIO_PHY1);
-
-		val = dev_priv->bxt_phy_grc = broxton_get_grc(dev_priv,
-							      DPIO_PHY1);
+		val = dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, DPIO_PHY1);
 		grc_code = val << GRC_CODE_FAST_SHIFT |
 			   val << GRC_CODE_SLOW_SHIFT |
 			   val;
@@ -1912,31 +1905,16 @@
 		val |= GRC_DIS | GRC_RDY_OVRD;
 		I915_WRITE(BXT_PORT_REF_DW8(DPIO_PHY0), val);
 	}
-	/*
-	 * During PHY1 init delay waiting for GRC calibration to finish, since
-	 * it can happen in parallel with the subsequent PHY0 init.
-	 */
 
 	val = I915_READ(BXT_PHY_CTL_FAMILY(phy));
 	val |= COMMON_RESET_DIS;
 	I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val);
+
+	if (phy == DPIO_PHY1)
+		bxt_phy_wait_grc_done(dev_priv, DPIO_PHY1);
 }
 
-void broxton_ddi_phy_init(struct drm_i915_private *dev_priv)
-{
-	/* Enable PHY1 first since it provides Rcomp for PHY0 */
-	broxton_phy_init(dev_priv, DPIO_PHY1);
-	broxton_phy_init(dev_priv, DPIO_PHY0);
-
-	/*
-	 * If BIOS enabled only PHY0 and not PHY1, we skipped waiting for the
-	 * PHY1 GRC calibration to finish, so wait for it here.
-	 */
-	broxton_phy_wait_grc_done(dev_priv, DPIO_PHY1);
-}
-
-static void broxton_phy_uninit(struct drm_i915_private *dev_priv,
-			       enum dpio_phy phy)
+void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy)
 {
 	uint32_t val;
 
@@ -1949,12 +1927,6 @@
 	I915_WRITE(BXT_P_CR_GT_DISP_PWRON, val);
 }
 
-void broxton_ddi_phy_uninit(struct drm_i915_private *dev_priv)
-{
-	broxton_phy_uninit(dev_priv, DPIO_PHY1);
-	broxton_phy_uninit(dev_priv, DPIO_PHY0);
-}
-
 static bool __printf(6, 7)
 __phy_reg_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy,
 		       i915_reg_t reg, u32 mask, u32 expected,
@@ -1982,11 +1954,9 @@
 	return false;
 }
 
-static bool broxton_phy_verify_state(struct drm_i915_private *dev_priv,
-				     enum dpio_phy phy)
+bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv,
+			      enum dpio_phy phy)
 {
-	enum port port;
-	u32 ports;
 	uint32_t mask;
 	bool ok;
 
@@ -1994,27 +1964,11 @@
 	__phy_reg_verify_state(dev_priv, phy, reg, mask, exp, fmt,	\
 			       ## __VA_ARGS__)
 
-	/* We expect the PHY to be always enabled */
-	if (!broxton_phy_is_enabled(dev_priv, phy))
+	if (!bxt_ddi_phy_is_enabled(dev_priv, phy))
 		return false;
 
 	ok = true;
 
-	if (phy == DPIO_PHY0)
-		ports = BIT(PORT_B) | BIT(PORT_C);
-	else
-		ports = BIT(PORT_A);
-
-	for_each_port_masked(port, ports) {
-		int lane;
-
-		for (lane = 0; lane < 4; lane++)
-			ok &= _CHK(BXT_PORT_TX_DW14_LN(port, lane),
-				    LATENCY_OPTIM,
-				    lane != 1 ? LATENCY_OPTIM : 0,
-				    "BXT_PORT_TX_DW14_LN(%d, %d)", port, lane);
-	}
-
 	/* PLL Rcomp code offset */
 	ok &= _CHK(BXT_PORT_CL1CM_DW9(phy),
 		    IREF0RC_OFFSET_MASK, 0xe4 << IREF0RC_OFFSET_SHIFT,
@@ -2058,11 +2012,65 @@
 #undef _CHK
 }
 
-void broxton_ddi_phy_verify_state(struct drm_i915_private *dev_priv)
+static uint8_t
+bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder,
+				     struct intel_crtc_state *pipe_config)
 {
-	if (!broxton_phy_verify_state(dev_priv, DPIO_PHY0) ||
-	    !broxton_phy_verify_state(dev_priv, DPIO_PHY1))
-		i915_report_error(dev_priv, "DDI PHY state mismatch\n");
+	switch (pipe_config->lane_count) {
+	case 1:
+		return 0;
+	case 2:
+		return BIT(2) | BIT(0);
+	case 4:
+		return BIT(3) | BIT(2) | BIT(0);
+	default:
+		MISSING_CASE(pipe_config->lane_count);
+
+		return 0;
+	}
+}
+
+static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder)
+{
+	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev);
+	enum port port = dport->port;
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
+	int lane;
+
+	for (lane = 0; lane < 4; lane++) {
+		u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane));
+
+		/*
+		 * Note that on CHV this flag is called UPAR, but has
+		 * the same function.
+		 */
+		val &= ~LATENCY_OPTIM;
+		if (intel_crtc->config->lane_lat_optim_mask & BIT(lane))
+			val |= LATENCY_OPTIM;
+
+		I915_WRITE(BXT_PORT_TX_DW14_LN(port, lane), val);
+	}
+}
+
+static uint8_t
+bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder)
+{
+	struct intel_digital_port *dport = enc_to_dig_port(&encoder->base);
+	struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev);
+	enum port port = dport->port;
+	int lane;
+	uint8_t mask;
+
+	mask = 0;
+	for (lane = 0; lane < 4; lane++) {
+		u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane));
+
+		if (val & LATENCY_OPTIM)
+			mask |= BIT(lane);
+	}
+
+	return mask;
 }
 
 void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
@@ -2236,13 +2244,19 @@
 	}
 
 	intel_ddi_clock_get(encoder, pipe_config);
+
+	if (IS_BROXTON(dev_priv))
+		pipe_config->lane_lat_optim_mask =
+			bxt_ddi_phy_get_lane_lat_optim_mask(encoder);
 }
 
 static bool intel_ddi_compute_config(struct intel_encoder *encoder,
 				     struct intel_crtc_state *pipe_config)
 {
+	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	int type = encoder->type;
 	int port = intel_ddi_get_encoder_port(encoder);
+	int ret;
 
 	WARN(type == INTEL_OUTPUT_UNKNOWN, "compute_config() on unknown output!\n");
 
@@ -2250,9 +2264,17 @@
 		pipe_config->cpu_transcoder = TRANSCODER_EDP;
 
 	if (type == INTEL_OUTPUT_HDMI)
-		return intel_hdmi_compute_config(encoder, pipe_config);
+		ret = intel_hdmi_compute_config(encoder, pipe_config);
 	else
-		return intel_dp_compute_config(encoder, pipe_config);
+		ret = intel_dp_compute_config(encoder, pipe_config);
+
+	if (IS_BROXTON(dev_priv) && ret)
+		pipe_config->lane_lat_optim_mask =
+			bxt_ddi_phy_calc_lane_lat_optim_mask(encoder,
+							     pipe_config);
+
+	return ret;
+
 }
 
 static const struct drm_encoder_funcs intel_ddi_funcs = {
@@ -2351,6 +2373,8 @@
 
 	intel_encoder->compute_config = intel_ddi_compute_config;
 	intel_encoder->enable = intel_enable_ddi;
+	if (IS_BROXTON(dev_priv))
+		intel_encoder->pre_pll_enable = bxt_ddi_pre_pll_enable;
 	intel_encoder->pre_enable = intel_ddi_pre_enable;
 	intel_encoder->disable = intel_disable_ddi;
 	intel_encoder->post_disable = intel_ddi_post_disable;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b12085a..0b2cd66 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -36,6 +36,7 @@
 #include "intel_drv.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_dmabuf.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -46,7 +47,6 @@
 #include <drm/drm_rect.h>
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
-#include <linux/dma-buf.h>
 
 static bool is_mmio_work(struct intel_flip_work *work)
 {
@@ -123,7 +123,7 @@
 static void intel_modeset_setup_hw_state(struct drm_device *dev);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 static int ilk_max_pixel_rate(struct drm_atomic_state *state);
-static int broxton_calc_cdclk(int max_pixclk);
+static int bxt_calc_cdclk(int max_pixclk);
 
 struct intel_limit {
 	struct {
@@ -4641,14 +4641,14 @@
 		struct intel_plane_state *old_primary_state =
 			to_intel_plane_state(old_pri_state);
 
-		intel_fbc_pre_update(crtc);
+		intel_fbc_pre_update(crtc, pipe_config, primary_state);
 
 		if (old_primary_state->visible &&
 		    (modeset || !primary_state->visible))
 			intel_pre_disable_primary(&crtc->base);
 	}
 
-	if (pipe_config->disable_cxsr) {
+	if (pipe_config->disable_cxsr && HAS_GMCH_DISPLAY(dev)) {
 		crtc->wm.cxsr_allowed = false;
 
 		/*
@@ -4841,6 +4841,10 @@
 		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
 						      false);
 
+	for_each_encoder_on_crtc(dev, crtc, encoder)
+		if (encoder->pre_pll_enable)
+			encoder->pre_pll_enable(encoder);
+
 	if (intel_crtc->config->shared_dpll)
 		intel_enable_shared_dpll(intel_crtc);
 
@@ -5416,7 +5420,7 @@
 	dev_priv->cdclk_pll.vco = vco;
 }
 
-static void broxton_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
+static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk)
 {
 	u32 val, divider;
 	int vco, ret;
@@ -5541,7 +5545,7 @@
 	dev_priv->cdclk_pll.vco = -1;
 }
 
-void broxton_init_cdclk(struct drm_i915_private *dev_priv)
+void bxt_init_cdclk(struct drm_i915_private *dev_priv)
 {
 	bxt_sanitize_cdclk(dev_priv);
 
@@ -5553,12 +5557,12 @@
 	 * - The initial CDCLK needs to be read from VBT.
 	 *   Need to make this change after VBT has changes for BXT.
 	 */
-	broxton_set_cdclk(dev_priv, broxton_calc_cdclk(0));
+	bxt_set_cdclk(dev_priv, bxt_calc_cdclk(0));
 }
 
-void broxton_uninit_cdclk(struct drm_i915_private *dev_priv)
+void bxt_uninit_cdclk(struct drm_i915_private *dev_priv)
 {
-	broxton_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref);
+	bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref);
 }
 
 static int skl_calc_cdclk(int max_pixclk, int vco)
@@ -5984,7 +5988,7 @@
 		return 200000;
 }
 
-static int broxton_calc_cdclk(int max_pixclk)
+static int bxt_calc_cdclk(int max_pixclk)
 {
 	if (max_pixclk > 576000)
 		return 624000;
@@ -6044,17 +6048,17 @@
 	return 0;
 }
 
-static int broxton_modeset_calc_cdclk(struct drm_atomic_state *state)
+static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state)
 {
 	int max_pixclk = ilk_max_pixel_rate(state);
 	struct intel_atomic_state *intel_state =
 		to_intel_atomic_state(state);
 
 	intel_state->cdclk = intel_state->dev_cdclk =
-		broxton_calc_cdclk(max_pixclk);
+		bxt_calc_cdclk(max_pixclk);
 
 	if (!intel_state->active_crtcs)
-		intel_state->dev_cdclk = broxton_calc_cdclk(0);
+		intel_state->dev_cdclk = bxt_calc_cdclk(0);
 
 	return 0;
 }
@@ -8430,12 +8434,9 @@
 	else
 		final |= DREF_NONSPREAD_SOURCE_ENABLE;
 
+	final &= ~DREF_SSC_SOURCE_MASK;
 	final &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
-
-	if (!using_ssc_source) {
-		final &= ~DREF_SSC_SOURCE_MASK;
-		final &= ~DREF_SSC1_ENABLE;
-	}
+	final &= ~DREF_SSC1_ENABLE;
 
 	if (has_panel) {
 		final |= DREF_SSC_SOURCE_ENABLE;
@@ -8450,9 +8451,9 @@
 				final |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
 		} else
 			final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
-	} else {
-		final |= DREF_SSC_SOURCE_DISABLE;
-		final |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
+	} else if (using_ssc_source) {
+		final |= DREF_SSC_SOURCE_ENABLE;
+		final |= DREF_SSC1_ENABLE;
 	}
 
 	if (final == val)
@@ -9673,14 +9674,14 @@
 	}
 }
 
-static void broxton_modeset_commit_cdclk(struct drm_atomic_state *old_state)
+static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state)
 {
 	struct drm_device *dev = old_state->dev;
 	struct intel_atomic_state *old_intel_state =
 		to_intel_atomic_state(old_state);
 	unsigned int req_cdclk = old_intel_state->dev_cdclk;
 
-	broxton_set_cdclk(to_i915(dev), req_cdclk);
+	bxt_set_cdclk(to_i915(dev), req_cdclk);
 }
 
 /* compute the max rate for new configuration */
@@ -11428,6 +11429,8 @@
 static bool use_mmio_flip(struct intel_engine_cs *engine,
 			  struct drm_i915_gem_object *obj)
 {
+	struct reservation_object *resv;
+
 	/*
 	 * This is not being used for older platforms, because
 	 * non-availability of flip done interrupt forces us to use
@@ -11448,12 +11451,12 @@
 		return true;
 	else if (i915.enable_execlists)
 		return true;
-	else if (obj->base.dma_buf &&
-		 !reservation_object_test_signaled_rcu(obj->base.dma_buf->resv,
-						       false))
+
+	resv = i915_gem_object_get_dmabuf_resv(obj);
+	if (resv && !reservation_object_test_signaled_rcu(resv, false))
 		return true;
-	else
-		return engine != i915_gem_request_get_engine(obj->last_write_req);
+
+	return engine != i915_gem_request_get_engine(obj->last_write_req);
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
@@ -11542,6 +11545,7 @@
 	struct intel_framebuffer *intel_fb =
 		to_intel_framebuffer(crtc->base.primary->fb);
 	struct drm_i915_gem_object *obj = intel_fb->obj;
+	struct reservation_object *resv;
 
 	if (work->flip_queued_req)
 		WARN_ON(__i915_wait_request(work->flip_queued_req,
@@ -11549,9 +11553,9 @@
 					    &dev_priv->rps.mmioflips));
 
 	/* For framebuffer backed by dmabuf, wait for fence */
-	if (obj->base.dma_buf)
-		WARN_ON(reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
-							    false, false,
+	resv = i915_gem_object_get_dmabuf_resv(obj);
+	if (resv)
+		WARN_ON(reservation_object_wait_timeout_rcu(resv, false, false,
 							    MAX_SCHEDULE_TIMEOUT) < 0);
 
 	intel_pipe_update_start(crtc);
@@ -11642,6 +11646,7 @@
 	spin_unlock(&dev->event_lock);
 }
 
+__maybe_unused
 static int intel_crtc_page_flip(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
 				struct drm_pending_vblank_event *event,
@@ -11727,7 +11732,9 @@
 
 	crtc->primary->fb = fb;
 	update_state_fb(crtc->primary);
-	intel_fbc_pre_update(intel_crtc);
+
+	intel_fbc_pre_update(intel_crtc, intel_crtc->config,
+			     to_intel_plane_state(primary->state));
 
 	work->pending_flip_obj = obj;
 
@@ -12816,6 +12823,7 @@
 
 	PIPE_CONF_CHECK_I(has_dp_encoder);
 	PIPE_CONF_CHECK_I(lane_count);
+	PIPE_CONF_CHECK_X(lane_lat_optim_mask);
 
 	if (INTEL_INFO(dev)->gen < 8) {
 		PIPE_CONF_CHECK_M_N(dp_m_n);
@@ -13567,11 +13575,6 @@
 	struct drm_crtc *crtc;
 	int i, ret;
 
-	if (nonblock) {
-		DRM_DEBUG_KMS("i915 does not yet support nonblocking commit\n");
-		return -EINVAL;
-	}
-
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		if (state->legacy_cursor_update)
 			continue;
@@ -13690,46 +13693,36 @@
 	return false;
 }
 
-/**
- * intel_atomic_commit - commit validated state object
- * @dev: DRM device
- * @state: the top-level driver state object
- * @nonblock: nonblocking commit
- *
- * This function commits a top-level state object that has been validated
- * with drm_atomic_helper_check().
- *
- * FIXME:  Atomic modeset support for i915 is not yet complete.  At the moment
- * we can only handle plane-related operations and do not yet support
- * nonblocking commit.
- *
- * RETURNS
- * Zero for success or -errno.
- */
-static int intel_atomic_commit(struct drm_device *dev,
-			       struct drm_atomic_state *state,
-			       bool nonblock)
+static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *intel_cstate;
-	int ret = 0, i;
+	struct drm_plane *plane;
+	struct drm_plane_state *plane_state;
 	bool hw_check = intel_state->modeset;
 	unsigned long put_domains[I915_MAX_PIPES] = {};
 	unsigned crtc_vblank_mask = 0;
+	int i, ret;
 
-	ret = intel_atomic_prepare_commit(dev, state, nonblock);
-	if (ret) {
-		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
-		return ret;
+	for_each_plane_in_state(state, plane, plane_state, i) {
+		struct intel_plane_state *intel_plane_state =
+			to_intel_plane_state(plane_state);
+
+		if (!intel_plane_state->wait_req)
+			continue;
+
+		ret = __i915_wait_request(intel_plane_state->wait_req,
+					  true, NULL, NULL);
+		/* EIO should be eaten, and we can't get interrupted in the
+		 * worker, and blocking commits have waited already. */
+		WARN_ON(ret);
 	}
 
-	drm_atomic_helper_swap_state(state, true);
-	dev_priv->wm.distrust_bios_wm = false;
-	dev_priv->wm.skl_results = intel_state->wm_results;
-	intel_shared_dpll_commit(state);
+	drm_atomic_helper_wait_for_dependencies(state);
 
 	if (intel_state->modeset) {
 		memcpy(dev_priv->min_pixclk, intel_state->min_pixclk,
@@ -13797,30 +13790,44 @@
 		bool modeset = needs_modeset(crtc->state);
 		struct intel_crtc_state *pipe_config =
 			to_intel_crtc_state(crtc->state);
-		bool update_pipe = !modeset && pipe_config->update_pipe;
 
 		if (modeset && crtc->state->active) {
 			update_scanline_offset(to_intel_crtc(crtc));
 			dev_priv->display.crtc_enable(crtc);
 		}
 
+		/* Complete events for now disable pipes here. */
+		if (modeset && !crtc->state->active && crtc->state->event) {
+			spin_lock_irq(&dev->event_lock);
+			drm_crtc_send_vblank_event(crtc, crtc->state->event);
+			spin_unlock_irq(&dev->event_lock);
+
+			crtc->state->event = NULL;
+		}
+
 		if (!modeset)
 			intel_pre_plane_update(to_intel_crtc_state(old_crtc_state));
 
 		if (crtc->state->active &&
 		    drm_atomic_get_existing_plane_state(state, crtc->primary))
-			intel_fbc_enable(intel_crtc);
+			intel_fbc_enable(intel_crtc, pipe_config, to_intel_plane_state(crtc->primary->state));
 
-		if (crtc->state->active &&
-		    (crtc->state->planes_changed || update_pipe))
+		if (crtc->state->active)
 			drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
 
 		if (pipe_config->base.active && needs_vblank_wait(pipe_config))
 			crtc_vblank_mask |= 1 << i;
 	}
 
-	/* FIXME: add subpixel order */
-
+	/* FIXME: We should call drm_atomic_helper_commit_hw_done() here
+	 * already, but still need the state for the delayed optimization. To
+	 * fix this:
+	 * - wrap the optimization/post_plane_update stuff into a per-crtc work.
+	 * - schedule that vblank worker _before_ calling hw_done
+	 * - at the start of commit_tail, cancel it _synchrously
+	 * - switch over to the vblank wait helper in the core after that since
+	 *   we don't need out special handling any more.
+	 */
 	if (!state->legacy_cursor_update)
 		intel_atomic_wait_for_vblanks(dev, dev_priv, crtc_vblank_mask);
 
@@ -13847,6 +13854,8 @@
 		intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state);
 	}
 
+	drm_atomic_helper_commit_hw_done(state);
+
 	if (intel_state->modeset)
 		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
 
@@ -13854,6 +13863,8 @@
 	drm_atomic_helper_cleanup_planes(dev, state);
 	mutex_unlock(&dev->struct_mutex);
 
+	drm_atomic_helper_commit_cleanup_done(state);
+
 	drm_atomic_state_free(state);
 
 	/* As one of the primary mmio accessors, KMS has a high likelihood
@@ -13868,6 +13879,86 @@
 	 * can happen also when the device is completely off.
 	 */
 	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+}
+
+static void intel_atomic_commit_work(struct work_struct *work)
+{
+	struct drm_atomic_state *state = container_of(work,
+						      struct drm_atomic_state,
+						      commit_work);
+	intel_atomic_commit_tail(state);
+}
+
+static void intel_atomic_track_fbs(struct drm_atomic_state *state)
+{
+	struct drm_plane_state *old_plane_state;
+	struct drm_plane *plane;
+	struct drm_i915_gem_object *obj, *old_obj;
+	struct intel_plane *intel_plane;
+	int i;
+
+	mutex_lock(&state->dev->struct_mutex);
+	for_each_plane_in_state(state, plane, old_plane_state, i) {
+		obj = intel_fb_obj(plane->state->fb);
+		old_obj = intel_fb_obj(old_plane_state->fb);
+		intel_plane = to_intel_plane(plane);
+
+		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+	}
+	mutex_unlock(&state->dev->struct_mutex);
+}
+
+/**
+ * intel_atomic_commit - commit validated state object
+ * @dev: DRM device
+ * @state: the top-level driver state object
+ * @nonblock: nonblocking commit
+ *
+ * This function commits a top-level state object that has been validated
+ * with drm_atomic_helper_check().
+ *
+ * FIXME:  Atomic modeset support for i915 is not yet complete.  At the moment
+ * nonblocking commits are only safe for pure plane updates. Everything else
+ * should work though.
+ *
+ * RETURNS
+ * Zero for success or -errno.
+ */
+static int intel_atomic_commit(struct drm_device *dev,
+			       struct drm_atomic_state *state,
+			       bool nonblock)
+{
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret = 0;
+
+	if (intel_state->modeset && nonblock) {
+		DRM_DEBUG_KMS("nonblocking commit for modeset not yet implemented.\n");
+		return -EINVAL;
+	}
+
+	ret = drm_atomic_helper_setup_commit(state, nonblock);
+	if (ret)
+		return ret;
+
+	INIT_WORK(&state->commit_work, intel_atomic_commit_work);
+
+	ret = intel_atomic_prepare_commit(dev, state, nonblock);
+	if (ret) {
+		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
+		return ret;
+	}
+
+	drm_atomic_helper_swap_state(state, true);
+	dev_priv->wm.distrust_bios_wm = false;
+	dev_priv->wm.skl_results = intel_state->wm_results;
+	intel_shared_dpll_commit(state);
+	intel_atomic_track_fbs(state);
+
+	if (nonblock)
+		queue_work(system_unbound_wq, &state->commit_work);
+	else
+		intel_atomic_commit_tail(state);
 
 	return 0;
 }
@@ -13917,7 +14008,7 @@
 	.set_config = drm_atomic_helper_set_config,
 	.set_property = drm_atomic_helper_crtc_set_property,
 	.destroy = intel_crtc_destroy,
-	.page_flip = intel_crtc_page_flip,
+	.page_flip = drm_atomic_helper_page_flip,
 	.atomic_duplicate_state = intel_crtc_duplicate_state,
 	.atomic_destroy_state = intel_crtc_destroy_state,
 };
@@ -13942,9 +14033,9 @@
 {
 	struct drm_device *dev = plane->dev;
 	struct drm_framebuffer *fb = new_state->fb;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
+	struct reservation_object *resv;
 	int ret = 0;
 
 	if (!obj && !old_obj)
@@ -13974,12 +14065,15 @@
 		}
 	}
 
+	if (!obj)
+		return 0;
+
 	/* For framebuffer backed by dmabuf, wait for fence */
-	if (obj && obj->base.dma_buf) {
+	resv = i915_gem_object_get_dmabuf_resv(obj);
+	if (resv) {
 		long lret;
 
-		lret = reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
-							   false, true,
+		lret = reservation_object_wait_timeout_rcu(resv, false, true,
 							   MAX_SCHEDULE_TIMEOUT);
 		if (lret == -ERESTARTSYS)
 			return lret;
@@ -13987,9 +14081,7 @@
 		WARN(lret < 0, "waiting returns %li\n", lret);
 	}
 
-	if (!obj) {
-		ret = 0;
-	} else if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 	    INTEL_INFO(dev)->cursor_needs_physical) {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
 		ret = i915_gem_object_attach_phys(obj, align);
@@ -14000,15 +14092,11 @@
 	}
 
 	if (ret == 0) {
-		if (obj) {
-			struct intel_plane_state *plane_state =
-				to_intel_plane_state(new_state);
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(new_state);
 
-			i915_gem_request_assign(&plane_state->wait_req,
-						obj->last_write_req);
-		}
-
-		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+		i915_gem_request_assign(&plane_state->wait_req,
+					obj->last_write_req);
 	}
 
 	return ret;
@@ -14028,7 +14116,6 @@
 		       const struct drm_plane_state *old_state)
 {
 	struct drm_device *dev = plane->dev;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct intel_plane_state *old_intel_state;
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
 	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
@@ -14042,11 +14129,6 @@
 	    !INTEL_INFO(dev)->cursor_needs_physical))
 		intel_unpin_fb_obj(old_state->fb, old_state->rotation);
 
-	/* prepare_fb aborted? */
-	if ((old_obj && (old_obj->frontbuffer_bits & intel_plane->frontbuffer_bit)) ||
-	    (obj && !(obj->frontbuffer_bits & intel_plane->frontbuffer_bit)))
-		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
-
 	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
 }
 
@@ -14704,7 +14786,7 @@
 		if (I915_READ(PCH_DP_D) & DP_DETECTED)
 			intel_dp_init(dev, PCH_DP_D, PORT_D);
 	} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
-		bool has_edp;
+		bool has_edp, has_port;
 
 		/*
 		 * The DP_DETECTED bit is the latched state of the DDC
@@ -14714,25 +14796,37 @@
 		 * Thus we can't rely on the DP_DETECTED bit alone to detect
 		 * eDP ports. Consult the VBT as well as DP_DETECTED to
 		 * detect eDP ports.
+		 *
+		 * Sadly the straps seem to be missing sometimes even for HDMI
+		 * ports (eg. on Voyo V3 - CHT x7-Z8700), so check both strap
+		 * and VBT for the presence of the port. Additionally we can't
+		 * trust the port type the VBT declares as we've seen at least
+		 * HDMI ports that the VBT claim are DP or eDP.
 		 */
 		has_edp = intel_dp_is_edp(dev, PORT_B);
-		if (I915_READ(VLV_DP_B) & DP_DETECTED || has_edp)
+		has_port = intel_bios_is_port_present(dev_priv, PORT_B);
+		if (I915_READ(VLV_DP_B) & DP_DETECTED || has_port)
 			has_edp &= intel_dp_init(dev, VLV_DP_B, PORT_B);
-		if (I915_READ(VLV_HDMIB) & SDVO_DETECTED && !has_edp)
+		if ((I915_READ(VLV_HDMIB) & SDVO_DETECTED || has_port) && !has_edp)
 			intel_hdmi_init(dev, VLV_HDMIB, PORT_B);
 
 		has_edp = intel_dp_is_edp(dev, PORT_C);
-		if (I915_READ(VLV_DP_C) & DP_DETECTED || has_edp)
+		has_port = intel_bios_is_port_present(dev_priv, PORT_C);
+		if (I915_READ(VLV_DP_C) & DP_DETECTED || has_port)
 			has_edp &= intel_dp_init(dev, VLV_DP_C, PORT_C);
-		if (I915_READ(VLV_HDMIC) & SDVO_DETECTED && !has_edp)
+		if ((I915_READ(VLV_HDMIC) & SDVO_DETECTED || has_port) && !has_edp)
 			intel_hdmi_init(dev, VLV_HDMIC, PORT_C);
 
 		if (IS_CHERRYVIEW(dev)) {
-			/* eDP not supported on port D, so don't check VBT */
-			if (I915_READ(CHV_HDMID) & SDVO_DETECTED)
-				intel_hdmi_init(dev, CHV_HDMID, PORT_D);
-			if (I915_READ(CHV_DP_D) & DP_DETECTED)
+			/*
+			 * eDP not supported on port D,
+			 * so no need to worry about it
+			 */
+			has_port = intel_bios_is_port_present(dev_priv, PORT_D);
+			if (I915_READ(CHV_DP_D) & DP_DETECTED || has_port)
 				intel_dp_init(dev, CHV_DP_D, PORT_D);
+			if (I915_READ(CHV_HDMID) & SDVO_DETECTED || has_port)
+				intel_hdmi_init(dev, CHV_HDMID, PORT_D);
 		}
 
 		intel_dsi_init(dev);
@@ -15214,9 +15308,9 @@
 			valleyview_modeset_calc_cdclk;
 	} else if (IS_BROXTON(dev_priv)) {
 		dev_priv->display.modeset_commit_cdclk =
-			broxton_modeset_commit_cdclk;
+			bxt_modeset_commit_cdclk;
 		dev_priv->display.modeset_calc_cdclk =
-			broxton_modeset_calc_cdclk;
+			bxt_modeset_calc_cdclk;
 	} else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		dev_priv->display.modeset_commit_cdclk =
 			skl_modeset_commit_cdclk;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 65e60ff..7d0e071 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -571,6 +571,12 @@
 
 	uint8_t lane_count;
 
+	/*
+	 * Used by platforms having DP/HDMI PHY with programmable lane
+	 * latency optimization.
+	 */
+	uint8_t lane_lat_optim_mask;
+
 	/* Panel fitter controls for gen2-gen4 + VLV */
 	struct {
 		u32 control;
@@ -1252,11 +1258,14 @@
 void intel_finish_reset(struct drm_i915_private *dev_priv);
 void hsw_enable_pc8(struct drm_i915_private *dev_priv);
 void hsw_disable_pc8(struct drm_i915_private *dev_priv);
-void broxton_init_cdclk(struct drm_i915_private *dev_priv);
-void broxton_uninit_cdclk(struct drm_i915_private *dev_priv);
-void broxton_ddi_phy_init(struct drm_i915_private *dev_priv);
-void broxton_ddi_phy_uninit(struct drm_i915_private *dev_priv);
-void broxton_ddi_phy_verify_state(struct drm_i915_private *dev_priv);
+void bxt_init_cdclk(struct drm_i915_private *dev_priv);
+void bxt_uninit_cdclk(struct drm_i915_private *dev_priv);
+void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy);
+void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy);
+bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv,
+			    enum dpio_phy phy);
+bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv,
+			      enum dpio_phy phy);
 void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv);
 void bxt_enable_dc9(struct drm_i915_private *dev_priv);
 void bxt_disable_dc9(struct drm_i915_private *dev_priv);
@@ -1414,11 +1423,15 @@
 void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
 			   struct drm_atomic_state *state);
 bool intel_fbc_is_active(struct drm_i915_private *dev_priv);
-void intel_fbc_pre_update(struct intel_crtc *crtc);
+void intel_fbc_pre_update(struct intel_crtc *crtc,
+			  struct intel_crtc_state *crtc_state,
+			  struct intel_plane_state *plane_state);
 void intel_fbc_post_update(struct intel_crtc *crtc);
 void intel_fbc_init(struct drm_i915_private *dev_priv);
 void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv);
-void intel_fbc_enable(struct intel_crtc *crtc);
+void intel_fbc_enable(struct intel_crtc *crtc,
+		      struct intel_crtc_state *crtc_state,
+		      struct intel_plane_state *plane_state);
 void intel_fbc_disable(struct intel_crtc *crtc);
 void intel_fbc_global_disable(struct drm_i915_private *dev_priv);
 void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index e9eda69..b444d0e3 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -1172,6 +1172,12 @@
 	if (intel_dsi->clock_stop)
 		tmp |= CLOCKSTOP;
 
+	if (IS_BROXTON(dev_priv)) {
+		tmp |= BXT_DPHY_DEFEATURE_EN;
+		if (!is_cmd_mode(intel_dsi))
+			tmp |= BXT_DEFEATURE_DPI_FIFO_CTR;
+	}
+
 	for_each_dsi_port(port, intel_dsi->ports) {
 		I915_WRITE(MIPI_DSI_FUNC_PRG(port), val);
 
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 45ee07b..ecabd59 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -481,10 +481,10 @@
 		intel_fbc_hw_deactivate(dev_priv);
 }
 
-static bool multiple_pipes_ok(struct intel_crtc *crtc)
+static bool multiple_pipes_ok(struct intel_crtc *crtc,
+			      struct intel_plane_state *plane_state)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_plane *primary = crtc->base.primary;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	enum pipe pipe = crtc->pipe;
 
@@ -492,9 +492,7 @@
 	if (!no_fbc_on_multiple_pipes(dev_priv))
 		return true;
 
-	WARN_ON(!drm_modeset_is_locked(&primary->mutex));
-
-	if (to_intel_plane_state(primary->state)->visible)
+	if (plane_state->visible)
 		fbc->visible_pipes_mask |= (1 << pipe);
 	else
 		fbc->visible_pipes_mask &= ~(1 << pipe);
@@ -709,21 +707,16 @@
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
-static void intel_fbc_update_state_cache(struct intel_crtc *crtc)
+static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
+					 struct intel_crtc_state *crtc_state,
+					 struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct intel_fbc_state_cache *cache = &fbc->state_cache;
-	struct intel_crtc_state *crtc_state =
-		to_intel_crtc_state(crtc->base.state);
-	struct intel_plane_state *plane_state =
-		to_intel_plane_state(crtc->base.primary->state);
 	struct drm_framebuffer *fb = plane_state->base.fb;
 	struct drm_i915_gem_object *obj;
 
-	WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex));
-	WARN_ON(!drm_modeset_is_locked(&crtc->base.primary->mutex));
-
 	cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags;
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		cache->crtc.hsw_bdw_pixel_rate =
@@ -888,7 +881,9 @@
 	return memcmp(params1, params2, sizeof(*params1)) == 0;
 }
 
-void intel_fbc_pre_update(struct intel_crtc *crtc)
+void intel_fbc_pre_update(struct intel_crtc *crtc,
+			  struct intel_crtc_state *crtc_state,
+			  struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
@@ -898,7 +893,7 @@
 
 	mutex_lock(&fbc->lock);
 
-	if (!multiple_pipes_ok(crtc)) {
+	if (!multiple_pipes_ok(crtc, plane_state)) {
 		fbc->no_fbc_reason = "more than one pipe active";
 		goto deactivate;
 	}
@@ -906,7 +901,7 @@
 	if (!fbc->enabled || fbc->crtc != crtc)
 		goto unlock;
 
-	intel_fbc_update_state_cache(crtc);
+	intel_fbc_update_state_cache(crtc, crtc_state, plane_state);
 
 deactivate:
 	intel_fbc_deactivate(dev_priv);
@@ -1090,7 +1085,9 @@
  * intel_fbc_enable multiple times for the same pipe without an
  * intel_fbc_disable in the middle, as long as it is deactivated.
  */
-void intel_fbc_enable(struct intel_crtc *crtc)
+void intel_fbc_enable(struct intel_crtc *crtc,
+		      struct intel_crtc_state *crtc_state,
+		      struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
@@ -1103,19 +1100,19 @@
 	if (fbc->enabled) {
 		WARN_ON(fbc->crtc == NULL);
 		if (fbc->crtc == crtc) {
-			WARN_ON(!crtc->config->enable_fbc);
+			WARN_ON(!crtc_state->enable_fbc);
 			WARN_ON(fbc->active);
 		}
 		goto out;
 	}
 
-	if (!crtc->config->enable_fbc)
+	if (!crtc_state->enable_fbc)
 		goto out;
 
 	WARN_ON(fbc->active);
 	WARN_ON(fbc->crtc != NULL);
 
-	intel_fbc_update_state_cache(crtc);
+	intel_fbc_update_state_cache(crtc, crtc_state, plane_state);
 	if (intel_fbc_alloc_cfb(crtc)) {
 		fbc->no_fbc_reason = "not enough stolen memory";
 		goto out;
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 4c725ad..4babefc 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -552,8 +552,6 @@
 	drm_fb_helper_fini(&ifbdev->helper);
 
 	if (ifbdev->fb) {
-		drm_framebuffer_unregister_private(&ifbdev->fb->base);
-
 		mutex_lock(&dev->struct_mutex);
 		intel_unpin_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0));
 		mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 41601c7..4df80cc 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -156,11 +156,11 @@
 extern int intel_guc_resume(struct drm_device *dev);
 
 /* i915_guc_submission.c */
-int i915_guc_submission_init(struct drm_device *dev);
-int i915_guc_submission_enable(struct drm_device *dev);
+int i915_guc_submission_init(struct drm_i915_private *dev_priv);
+int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
 int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
 int i915_guc_submit(struct drm_i915_gem_request *rq);
-void i915_guc_submission_disable(struct drm_device *dev);
-void i915_guc_submission_fini(struct drm_device *dev);
+void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
+void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index f2b88c7..8fe96a2 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -425,9 +425,13 @@
 	if (!i915.enable_guc_loading) {
 		err = 0;
 		goto fail;
-	} else if (fw_path == NULL || *fw_path == '\0') {
-		if (*fw_path == '\0')
-			DRM_INFO("No GuC firmware known for this platform\n");
+	} else if (fw_path == NULL) {
+		/* Device is known to have no uCode (e.g. no GuC) */
+		err = -ENXIO;
+		goto fail;
+	} else if (*fw_path == '\0') {
+		/* Device has a GuC but we don't know what f/w to load? */
+		DRM_INFO("No GuC firmware known for this platform\n");
 		err = -ENODEV;
 		goto fail;
 	}
@@ -449,7 +453,7 @@
 		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
 		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
 
-	err = i915_guc_submission_init(dev);
+	err = i915_guc_submission_init(dev_priv);
 	if (err)
 		goto fail;
 
@@ -488,10 +492,7 @@
 		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
 
 	if (i915.enable_guc_submission) {
-		/* The execbuf_client will be recreated. Release it first. */
-		i915_guc_submission_disable(dev);
-
-		err = i915_guc_submission_enable(dev);
+		err = i915_guc_submission_enable(dev_priv);
 		if (err)
 			goto fail;
 		direct_interrupts_to_guc(dev_priv);
@@ -504,8 +505,8 @@
 		guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;
 
 	direct_interrupts_to_host(dev_priv);
-	i915_guc_submission_disable(dev);
-	i915_guc_submission_fini(dev);
+	i915_guc_submission_disable(dev_priv);
+	i915_guc_submission_fini(dev_priv);
 
 	/*
 	 * We've failed to load the firmware :(
@@ -524,18 +525,20 @@
 		ret = 0;
 	}
 
-	if (err == 0)
+	if (err == 0 && !HAS_GUC_UCODE(dev))
+		;	/* Don't mention the GuC! */
+	else if (err == 0)
 		DRM_INFO("GuC firmware load skipped\n");
-	else if (ret == -EIO)
-		DRM_ERROR("GuC firmware load failed: %d\n", err);
-	else
+	else if (ret != -EIO)
 		DRM_INFO("GuC firmware load failed: %d\n", err);
+	else
+		DRM_ERROR("GuC firmware load failed: %d\n", err);
 
 	if (i915.enable_guc_submission) {
 		if (fw_path == NULL)
 			DRM_INFO("GuC submission without firmware not supported\n");
 		if (ret == 0)
-			DRM_INFO("Falling back to execlist mode\n");
+			DRM_INFO("Falling back from GuC submission to execlist mode\n");
 		else
 			DRM_ERROR("GuC init failed: %d\n", ret);
 	}
@@ -730,8 +733,8 @@
 
 	mutex_lock(&dev->struct_mutex);
 	direct_interrupts_to_host(dev_priv);
-	i915_guc_submission_disable(dev);
-	i915_guc_submission_fini(dev);
+	i915_guc_submission_disable(dev_priv);
+	i915_guc_submission_fini(dev_priv);
 
 	if (guc_fw->guc_fw_obj)
 		drm_gem_object_unreference(&guc_fw->guc_fw_obj->base);
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
new file mode 100644
index 0000000..9fa458c
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "intel_gvt.h"
+
+/**
+ * DOC: Intel GVT-g host support
+ *
+ * Intel GVT-g is a graphics virtualization technology which shares the
+ * GPU among multiple virtual machines on a time-sharing basis. Each
+ * virtual machine is presented a virtual GPU (vGPU), which has equivalent
+ * features as the underlying physical GPU (pGPU), so i915 driver can run
+ * seamlessly in a virtual machine. This file provides the englightments
+ * of GVT and the necessary components used by GVT in i915 driver.
+ */
+
+static bool is_supported_device(struct drm_i915_private *dev_priv)
+{
+	if (IS_BROADWELL(dev_priv))
+		return true;
+	return false;
+}
+
+/**
+ * intel_gvt_init - initialize GVT components
+ * @dev_priv: drm i915 private data
+ *
+ * This function is called at the initialization stage to create a GVT device.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_gvt_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	if (!i915.enable_gvt) {
+		DRM_DEBUG_DRIVER("GVT-g is disabled by kernel params\n");
+		return 0;
+	}
+
+	if (!is_supported_device(dev_priv)) {
+		DRM_DEBUG_DRIVER("Unsupported device. GVT-g is disabled\n");
+		return 0;
+	}
+
+	/*
+	 * We're not in host or fail to find a MPT module, disable GVT-g
+	 */
+	ret = intel_gvt_init_host();
+	if (ret) {
+		DRM_DEBUG_DRIVER("Not in host or MPT modules not found\n");
+		return 0;
+	}
+
+	ret = intel_gvt_init_device(dev_priv);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Fail to init GVT device\n");
+		return 0;
+	}
+
+	return 0;
+}
+
+/**
+ * intel_gvt_cleanup - cleanup GVT components when i915 driver is unloading
+ * @dev_priv: drm i915 private *
+ *
+ * This function is called at the i915 driver unloading stage, to shutdown
+ * GVT components and release the related resources.
+ */
+void intel_gvt_cleanup(struct drm_i915_private *dev_priv)
+{
+	if (!intel_gvt_active(dev_priv))
+		return;
+
+	intel_gvt_clean_device(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/intel_gvt.h b/drivers/gpu/drm/i915/intel_gvt.h
new file mode 100644
index 0000000..960211d
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_gvt.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _INTEL_GVT_H_
+#define _INTEL_GVT_H_
+
+#include "gvt/gvt.h"
+
+#ifdef CONFIG_DRM_I915_GVT
+int intel_gvt_init(struct drm_i915_private *dev_priv);
+void intel_gvt_cleanup(struct drm_i915_private *dev_priv);
+int intel_gvt_init_device(struct drm_i915_private *dev_priv);
+void intel_gvt_clean_device(struct drm_i915_private *dev_priv);
+int intel_gvt_init_host(void);
+#else
+static inline int intel_gvt_init(struct drm_i915_private *dev_priv)
+{
+	return 0;
+}
+static inline void intel_gvt_cleanup(struct drm_i915_private *dev_priv)
+{
+}
+#endif
+
+#endif /* _INTEL_GVT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index d4fc3d4..fb21626 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1810,6 +1810,9 @@
 	enum port port = intel_dig_port->port;
 	uint8_t alternate_ddc_pin;
 
+	DRM_DEBUG_KMS("Adding HDMI connector on port %c\n",
+		      port_name(port));
+
 	if (WARN(intel_dig_port->max_lanes < 4,
 		 "Not enough lanes (%d) for HDMI on port %c\n",
 		 intel_dig_port->max_lanes, port_name(port)))
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5c191a1..debed01 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -208,16 +208,6 @@
 } while (0)
 
 enum {
-	ADVANCED_CONTEXT = 0,
-	LEGACY_32B_CONTEXT,
-	ADVANCED_AD_CONTEXT,
-	LEGACY_64B_CONTEXT
-};
-#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
-#define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
-		LEGACY_64B_CONTEXT :\
-		LEGACY_32B_CONTEXT)
-enum {
 	FAULT_AND_HANG = 0,
 	FAULT_AND_HALT, /* Debug only */
 	FAULT_AND_STREAM,
@@ -238,7 +228,7 @@
 
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
- * @dev: DRM device.
+ * @dev_priv: i915 device private
  * @enable_execlists: value of i915.enable_execlists module parameter.
  *
  * Only certain platforms support Execlists (the prerequisites being
@@ -281,8 +271,6 @@
 					(engine->id == VCS || engine->id == VCS2);
 
 	engine->ctx_desc_template = GEN8_CTX_VALID;
-	engine->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
-				   GEN8_CTX_ADDRESSING_MODE_SHIFT;
 	if (IS_GEN8(dev_priv))
 		engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
 	engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
@@ -325,7 +313,8 @@
 
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
 
-	desc = engine->ctx_desc_template;			/* bits  0-11 */
+	desc = ctx->desc_template;				/* bits  3-4  */
+	desc |= engine->ctx_desc_template;			/* bits  0-11 */
 	desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE;
 								/* bits 12-31 */
 	desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;		/* bits 32-52 */
@@ -415,6 +404,20 @@
 	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
+static inline void execlists_context_status_change(
+		struct drm_i915_gem_request *rq,
+		unsigned long status)
+{
+	/*
+	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
+	 * The compiler should eliminate this function as dead-code.
+	 */
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return;
+
+	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
+}
+
 static void execlists_context_unqueue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
@@ -441,6 +444,20 @@
 			i915_gem_request_unreference(req0);
 			req0 = cursor;
 		} else {
+			if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
+				/*
+				 * req0 (after merged) ctx requires single
+				 * submission, stop picking
+				 */
+				if (req0->ctx->execlists_force_single_submission)
+					break;
+				/*
+				 * req0 ctx doesn't require single submission,
+				 * but next req ctx requires, stop picking
+				 */
+				if (cursor->ctx->execlists_force_single_submission)
+					break;
+			}
 			req1 = cursor;
 			WARN_ON(req1->elsp_submitted);
 			break;
@@ -450,6 +467,12 @@
 	if (unlikely(!req0))
 		return;
 
+	execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
+
+	if (req1)
+		execlists_context_status_change(req1,
+						INTEL_CONTEXT_SCHEDULE_IN);
+
 	if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
 		/*
 		 * WaIdleLiteRestore: make sure we never cause a lite restore
@@ -488,6 +511,8 @@
 	if (--head_req->elsp_submitted > 0)
 		return 0;
 
+	execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
+
 	list_del(&head_req->execlist_link);
 	i915_gem_request_unreference(head_req);
 
@@ -516,7 +541,7 @@
 
 /**
  * intel_lrc_irq_handler() - handle Context Switch interrupts
- * @engine: Engine Command Streamer to handle.
+ * @data: tasklet handler passed in unsigned long
  *
  * Check the unread Context Status Buffers and manage the submission of new
  * contexts to the ELSP accordingly.
@@ -786,15 +811,9 @@
 
 /**
  * execlists_submission() - submit a batchbuffer for execution, Execlists style
- * @dev: DRM device.
- * @file: DRM file.
- * @ring: Engine Command Streamer to submit to.
- * @ctx: Context to employ for this submission.
+ * @params: execbuffer call parameters.
  * @args: execbuffer call arguments.
  * @vmas: list of vmas.
- * @batch_obj: the batchbuffer to submit.
- * @exec_start: batchbuffer start virtual address pointer.
- * @dispatch_flags: translated execbuffer call flags.
  *
  * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
  * away the submission details of the execbuffer ioctl call.
@@ -1081,12 +1100,13 @@
 	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	/*
-	 * WaDisableLSQCROPERFforOCL:skl
+	 * WaDisableLSQCROPERFforOCL:skl,kbl
 	 * This WA is implemented in skl_init_clock_gating() but since
 	 * this batch updates GEN8_L3SQCREG4 with default value we need to
 	 * set this bit here to retain the WA during flush.
 	 */
-	if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_E0))
+	if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_E0) ||
+	    IS_KBL_REVID(engine->i915, 0, KBL_REVID_E0))
 		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
 
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
@@ -1138,7 +1158,7 @@
 /**
  * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
  *
- * @ring: only applicable for RCS
+ * @engine: only applicable for RCS
  * @wa_ctx: structure representing wa_ctx
  *  offset: specifies start of the batch, should be cache-aligned. This is updated
  *    with the offset value received as input.
@@ -1212,7 +1232,7 @@
 /**
  * gen8_init_perctx_bb() - initialize per ctx batch with WA
  *
- * @ring: only applicable for RCS
+ * @engine: only applicable for RCS
  * @wa_ctx: structure representing wa_ctx
  *  offset: specifies start of the batch, should be cache-aligned.
  *  size: size of the batch in DWORDS but HW expects in terms of cachelines
@@ -1260,6 +1280,22 @@
 		return ret;
 	index = ret;
 
+	/* WaClearSlmSpaceAtContextSwitch:kbl */
+	/* Actual scratch location is at 128 bytes offset */
+	if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
+		uint32_t scratch_addr
+			= engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
+
+		wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
+		wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
+					   PIPE_CONTROL_GLOBAL_GTT_IVB |
+					   PIPE_CONTROL_CS_STALL |
+					   PIPE_CONTROL_QW_WRITE));
+		wa_ctx_emit(batch, index, scratch_addr);
+		wa_ctx_emit(batch, index, 0);
+		wa_ctx_emit(batch, index, 0);
+		wa_ctx_emit(batch, index, 0);
+	}
 	/* Pad to end of cacheline */
 	while (index % CACHELINE_DWORDS)
 		wa_ctx_emit(batch, index, MI_NOOP);
@@ -1657,9 +1693,10 @@
 	struct intel_ringbuffer *ringbuf = request->ringbuf;
 	struct intel_engine_cs *engine = ringbuf->engine;
 	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-	bool vf_flush_wa = false;
+	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 flags = 0;
 	int ret;
+	int len;
 
 	flags |= PIPE_CONTROL_CS_STALL;
 
@@ -1686,9 +1723,21 @@
 		 */
 		if (IS_GEN9(request->i915))
 			vf_flush_wa = true;
+
+		/* WaForGAMHang:kbl */
+		if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0))
+			dc_flush_wa = true;
 	}
 
-	ret = intel_ring_begin(request, vf_flush_wa ? 12 : 6);
+	len = 6;
+
+	if (vf_flush_wa)
+		len += 6;
+
+	if (dc_flush_wa)
+		len += 12;
+
+	ret = intel_ring_begin(request, len);
 	if (ret)
 		return ret;
 
@@ -1701,12 +1750,31 @@
 		intel_logical_ring_emit(ringbuf, 0);
 	}
 
+	if (dc_flush_wa) {
+		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+		intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+	}
+
 	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
 	intel_logical_ring_emit(ringbuf, flags);
 	intel_logical_ring_emit(ringbuf, scratch_addr);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, 0);
+
+	if (dc_flush_wa) {
+		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+		intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+		intel_logical_ring_emit(ringbuf, 0);
+	}
+
 	intel_logical_ring_advance(ringbuf);
 
 	return 0;
@@ -1860,7 +1928,7 @@
 /**
  * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
  *
- * @ring: Engine Command Streamer.
+ * @engine: Engine Command Streamer.
  *
  */
 void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
@@ -2413,7 +2481,7 @@
 
 /**
  * intel_lr_context_size() - return the size of the context for an engine
- * @ring: which engine to find the context size for
+ * @engine: which engine to find the context size for
  *
  * Each engine may require a different amount of space for a context image,
  * so when allocating (or copying) an image, this function can be used to
@@ -2484,7 +2552,7 @@
 		return PTR_ERR(ctx_obj);
 	}
 
-	ringbuf = intel_engine_create_ringbuffer(engine, 4 * PAGE_SIZE);
+	ringbuf = intel_engine_create_ringbuffer(engine, ctx->ring_size);
 	if (IS_ERR(ringbuf)) {
 		ret = PTR_ERR(ringbuf);
 		goto error_deref_obj;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a8db42a..2b8255c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -57,6 +57,11 @@
 #define GEN8_CSB_READ_PTR(csb_status) \
 	(((csb_status) & GEN8_CSB_READ_PTR_MASK) >> 8)
 
+enum {
+	INTEL_CONTEXT_SCHEDULE_IN = 0,
+	INTEL_CONTEXT_SCHEDULE_OUT,
+};
+
 /* Logical Rings */
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request);
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index b765c75..3c1482b 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -156,6 +156,16 @@
 			  "Platform that should have a MOCS table does not.\n");
 	}
 
+	/* WaDisableSkipCaching:skl,bxt,kbl */
+	if (IS_GEN9(dev_priv)) {
+		int i;
+
+		for (i = 0; i < table->size; i++)
+			if (WARN_ON(table->table[i].l3cc_value &
+				    (L3_ESC(1) | L3_SCC(0x7))))
+				return false;
+	}
+
 	return result;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0827459..658a7565 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -55,13 +55,37 @@
 #define INTEL_RC6p_ENABLE			(1<<1)
 #define INTEL_RC6pp_ENABLE			(1<<2)
 
+static void gen9_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
+	I915_WRITE(CHICKEN_PAR1_1,
+		   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+
+	I915_WRITE(GEN8_CONFIG0,
+		   I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
+
+	/* WaEnableChickenDCPR:skl,bxt,kbl */
+	I915_WRITE(GEN8_CHICKEN_DCPR_1,
+		   I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+
+	/* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */
+	/* WaFbcWakeMemOn:skl,bxt,kbl */
+	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+		   DISP_FBC_WM_DIS |
+		   DISP_FBC_MEMORY_WAKE);
+
+	/* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_DISABLE_DUMMY0);
+}
+
 static void bxt_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	/* See Bspec note for PSR2_CTL bit 31, Wa#828:bxt */
-	I915_WRITE(CHICKEN_PAR1_1,
-		   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+	gen9_init_clock_gating(dev);
 
 	/* WaDisableSDEUnitClockGating:bxt */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
@@ -6963,13 +6987,40 @@
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 }
 
+static void kabylake_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	gen9_init_clock_gating(dev);
+
+	/* WaDisableSDEUnitClockGating:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableGamClockGating:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
+			   GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaFbcNukeOnHostModify:kbl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+}
+
 static void skylake_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,kbl */
-	I915_WRITE(CHICKEN_PAR1_1,
-		   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+	gen9_init_clock_gating(dev);
+
+	/* WAC6entrylatency:skl */
+	I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
+		   FBC_LLC_FULLY_OPEN);
+
+	/* WaFbcNukeOnHostModify:skl */
+	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
 }
 
 static void broadwell_init_clock_gating(struct drm_device *dev)
@@ -7016,6 +7067,10 @@
 	 */
 	I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
 
+	/* WaKVMNotificationOnConfigChange:bdw */
+	I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
+		   | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
+
 	lpt_init_clock_gating(dev);
 }
 
@@ -7433,7 +7488,7 @@
 	if (IS_SKYLAKE(dev_priv))
 		dev_priv->display.init_clock_gating = skylake_init_clock_gating;
 	else if (IS_KABYLAKE(dev_priv))
-		dev_priv->display.init_clock_gating = skylake_init_clock_gating;
+		dev_priv->display.init_clock_gating = kabylake_init_clock_gating;
 	else if (IS_BROXTON(dev_priv))
 		dev_priv->display.init_clock_gating = bxt_init_clock_gating;
 	else if (IS_BROADWELL(dev_priv))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8d35a39..fedd270 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -908,24 +908,26 @@
 static int gen9_init_workarounds(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	uint32_t tmp;
 	int ret;
 
-	/* WaEnableLbsSlaRetryTimerDecrement:skl */
+	/* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */
+	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
+
+	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */
 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
-	/* WaDisableKillLogic:bxt,skl */
+	/* WaDisableKillLogic:bxt,skl,kbl */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
 		   ECOCHK_DIS_TLB);
 
-	/* WaClearFlowControlGpgpuContextSave:skl,bxt */
-	/* WaDisablePartialInstShootdown:skl,bxt */
+	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */
+	/* WaDisablePartialInstShootdown:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 			  FLOW_CONTROL_ENABLE |
 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
-	/* Syncing dependencies between camera and graphics:skl,bxt */
+	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
@@ -947,18 +949,18 @@
 		 */
 	}
 
-	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
-	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt */
+	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */
+	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 			  GEN9_ENABLE_YV12_BUGFIX |
 			  GEN9_ENABLE_GPGPU_PREEMPTION);
 
-	/* Wa4x4STCOptimizationDisable:skl,bxt */
-	/* WaDisablePartialResolveInVc:skl,bxt */
+	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl */
+	/* WaDisablePartialResolveInVc:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
 
-	/* WaCcsTlbPrefetchDisable:skl,bxt */
+	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl */
 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
@@ -968,31 +970,57 @@
 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
 				  PIXEL_MASK_CAMMING_DISABLE);
 
-	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
-	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
-	if (IS_SKL_REVID(dev_priv, SKL_REVID_F0, REVID_FOREVER) ||
-	    IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
-		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
-	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
 
-	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
-	if (IS_SKYLAKE(dev_priv) || IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
+	 * both tied to WaForceContextSaveRestoreNonCoherent
+	 * in some hsds for skl. We keep the tie for all gen9. The
+	 * documentation is a bit hazy and so we want to get common behaviour,
+	 * even though there is no clear evidence we would need both on kbl/bxt.
+	 * This area has been source of system hangs so we play it safe
+	 * and mimic the skl regardless of what bspec says.
+	 *
+	 * Use Force Non-Coherent whenever executing a 3D context. This
+	 * is a workaround for a possible hang in the unlikely event
+	 * a TLB invalidation occurs during a PSD flush.
+	 */
+
+	/* WaForceEnableNonCoherent:skl,bxt,kbl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_NON_COHERENT);
+
+	/* WaDisableHDCInvalidation:skl,bxt,kbl */
+	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+		   BDW_DISABLE_HDC_INVALIDATION);
+
+	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */
+	if (IS_SKYLAKE(dev_priv) ||
+	    IS_KABYLAKE(dev_priv) ||
+	    IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
 
-	/* WaDisableSTUnitPowerOptimization:skl,bxt */
+	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
-	/* WaOCLCoherentLineFlush:skl,bxt */
+	/* WaOCLCoherentLineFlush:skl,bxt,kbl */
 	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
 				    GEN8_LQSC_FLUSH_COHERENT_LINES));
 
-	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
+	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */
+	ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
+	if (ret)
+		return ret;
+
+	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */
 	ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
 	if (ret)
 		return ret;
 
-	/* WaAllowUMDToModifyHDCChicken1:skl,bxt */
+	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
 	if (ret)
 		return ret;
@@ -1060,7 +1088,7 @@
 			   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
 	}
 
-	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_D0)) {
+	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0)) {
 		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
 		I915_WRITE(FF_SLICE_CS_CHICKEN2,
 			   _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
@@ -1085,22 +1113,6 @@
 		WA_SET_BIT_MASKED(HIZ_CHICKEN,
 				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-	/* This is tied to WaForceContextSaveRestoreNonCoherent */
-	if (IS_SKL_REVID(dev_priv, 0, REVID_FOREVER)) {
-		/*
-		 *Use Force Non-Coherent whenever executing a 3D context. This
-		 * is a workaround for a possible hang in the unlikely event
-		 * a TLB invalidation occurs during a PSD flush.
-		 */
-		/* WaForceEnableNonCoherent:skl */
-		WA_SET_BIT_MASKED(HDC_CHICKEN0,
-				  HDC_FORCE_NON_COHERENT);
-
-		/* WaDisableHDCInvalidation:skl */
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   BDW_DISABLE_HDC_INVALIDATION);
-	}
-
 	/* WaBarrierPerformanceFixDisable:skl */
 	if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, SKL_REVID_D0))
 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
@@ -1113,6 +1125,9 @@
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 
+	/* WaDisableGafsUnitClkGating:skl */
+	WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
 	/* WaDisableLSQCROPERFforOCL:skl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
 	if (ret)
@@ -1145,6 +1160,12 @@
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 			  STALL_DOP_GATING_DISABLE);
 
+	/* WaDisablePooledEuLoadBalancingFix:bxt */
+	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
+		WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2,
+				  GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
+	}
+
 	/* WaDisableSbeCacheDispatchPortSharing:bxt */
 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) {
 		WA_SET_BIT_MASKED(
@@ -1171,6 +1192,63 @@
 		I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
 					   L3_HIGH_PRIO_CREDITS(2));
 
+	/* WaInsertDummyPushConstPs:bxt */
+	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	return 0;
+}
+
+static int kbl_init_workarounds(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	ret = gen9_init_workarounds(engine);
+	if (ret)
+		return ret;
+
+	/* WaEnableGapsTsvCreditFix:kbl */
+	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
+				   GEN9_GAPS_TSV_CREDIT_DISABLE));
+
+	/* WaDisableDynamicCreditSharing:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		WA_SET_BIT(GAMT_CHKN_BIT_REG,
+			   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
+
+	/* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
+		WA_SET_BIT_MASKED(HDC_CHICKEN0,
+				  HDC_FENCE_DEST_SLM_DISABLE);
+
+	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
+	 * involving this register should also be added to WA batch as required.
+	 */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
+		/* WaDisableLSQCROPERFforOCL:kbl */
+		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+			   GEN8_LQSC_RO_PERF_DIS);
+
+	/* WaInsertDummyPushConstPs:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	/* WaDisableGafsUnitClkGating:kbl */
+	WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaDisableSbeCacheDispatchPortSharing:kbl */
+	WA_SET_BIT_MASKED(
+		GEN7_HALF_SLICE_CHICKEN1,
+		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+
+	/* WaDisableLSQCROPERFforOCL:kbl */
+	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -1195,6 +1273,9 @@
 	if (IS_BROXTON(dev_priv))
 		return bxt_init_workarounds(engine);
 
+	if (IS_KABYLAKE(dev_priv))
+		return kbl_init_workarounds(engine);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index fe8faf3..e856d49 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -65,6 +65,9 @@
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 				    int power_well_id);
 
+static struct i915_power_well *
+lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id);
+
 const char *
 intel_display_power_domain_str(enum intel_display_power_domain domain)
 {
@@ -151,6 +154,23 @@
 	power_well->ops->disable(dev_priv, power_well);
 }
 
+static void intel_power_well_get(struct drm_i915_private *dev_priv,
+				 struct i915_power_well *power_well)
+{
+	if (!power_well->count++)
+		intel_power_well_enable(dev_priv, power_well);
+}
+
+static void intel_power_well_put(struct drm_i915_private *dev_priv,
+				 struct i915_power_well *power_well)
+{
+	WARN(!power_well->count, "Use count on power well %s is already zero",
+	     power_well->name);
+
+	if (!--power_well->count)
+		intel_power_well_disable(dev_priv, power_well);
+}
+
 /*
  * We should only use the power well if we explicitly asked the hardware to
  * enable it, so check if it's enabled and also check if we've requested it to
@@ -419,6 +439,16 @@
 	BIT(POWER_DOMAIN_MODESET) |			\
 	BIT(POWER_DOMAIN_AUX_A) |			\
 	BIT(POWER_DOMAIN_INIT))
+#define BXT_DPIO_CMN_A_POWER_DOMAINS (			\
+	BIT(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
+	BIT(POWER_DOMAIN_AUX_A) |			\
+	BIT(POWER_DOMAIN_INIT))
+#define BXT_DPIO_CMN_BC_POWER_DOMAINS (			\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT(POWER_DOMAIN_AUX_B) |			\
+	BIT(POWER_DOMAIN_AUX_C) |			\
+	BIT(POWER_DOMAIN_INIT))
 
 static void assert_can_enable_dc9(struct drm_i915_private *dev_priv)
 {
@@ -800,6 +830,72 @@
 	skl_set_power_well(dev_priv, power_well, false);
 }
 
+static enum dpio_phy bxt_power_well_to_phy(struct i915_power_well *power_well)
+{
+	enum skl_disp_power_wells power_well_id = power_well->data;
+
+	return power_well_id == BXT_DPIO_CMN_A ? DPIO_PHY1 : DPIO_PHY0;
+}
+
+static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	enum skl_disp_power_wells power_well_id = power_well->data;
+	struct i915_power_well *cmn_a_well;
+
+	if (power_well_id == BXT_DPIO_CMN_BC) {
+		/*
+		 * We need to copy the GRC calibration value from the eDP PHY,
+		 * so make sure it's powered up.
+		 */
+		cmn_a_well = lookup_power_well(dev_priv, BXT_DPIO_CMN_A);
+		intel_power_well_get(dev_priv, cmn_a_well);
+	}
+
+	bxt_ddi_phy_init(dev_priv, bxt_power_well_to_phy(power_well));
+
+	if (power_well_id == BXT_DPIO_CMN_BC)
+		intel_power_well_put(dev_priv, cmn_a_well);
+}
+
+static void bxt_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	bxt_ddi_phy_uninit(dev_priv, bxt_power_well_to_phy(power_well));
+}
+
+static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	return bxt_ddi_phy_is_enabled(dev_priv,
+				      bxt_power_well_to_phy(power_well));
+}
+
+static void bxt_dpio_cmn_power_well_sync_hw(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	if (power_well->count > 0)
+		bxt_dpio_cmn_power_well_enable(dev_priv, power_well);
+	else
+		bxt_dpio_cmn_power_well_disable(dev_priv, power_well);
+}
+
+
+static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *power_well;
+
+	power_well = lookup_power_well(dev_priv, BXT_DPIO_CMN_A);
+	if (power_well->count > 0)
+		bxt_ddi_phy_verify_state(dev_priv,
+					 bxt_power_well_to_phy(power_well));
+
+	power_well = lookup_power_well(dev_priv, BXT_DPIO_CMN_BC);
+	if (power_well->count > 0)
+		bxt_ddi_phy_verify_state(dev_priv,
+					 bxt_power_well_to_phy(power_well));
+}
+
 static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
 					   struct i915_power_well *power_well)
 {
@@ -826,7 +922,7 @@
 	gen9_assert_dbuf_enabled(dev_priv);
 
 	if (IS_BROXTON(dev_priv))
-		broxton_ddi_phy_verify_state(dev_priv);
+		bxt_verify_ddi_phy_power_wells(dev_priv);
 }
 
 static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
@@ -1518,10 +1614,8 @@
 	struct i915_power_well *power_well;
 	int i;
 
-	for_each_power_well(i, power_well, BIT(domain), power_domains) {
-		if (!power_well->count++)
-			intel_power_well_enable(dev_priv, power_well);
-	}
+	for_each_power_well(i, power_well, BIT(domain), power_domains)
+		intel_power_well_get(dev_priv, power_well);
 
 	power_domains->domain_use_count[domain]++;
 }
@@ -1615,14 +1709,8 @@
 	     intel_display_power_domain_str(domain));
 	power_domains->domain_use_count[domain]--;
 
-	for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
-		WARN(!power_well->count,
-		     "Use count on power well %s is already zero",
-		     power_well->name);
-
-		if (!--power_well->count)
-			intel_power_well_disable(dev_priv, power_well);
-	}
+	for_each_power_well_rev(i, power_well, BIT(domain), power_domains)
+		intel_power_well_put(dev_priv, power_well);
 
 	mutex_unlock(&power_domains->lock);
 
@@ -1793,6 +1881,13 @@
 	.is_enabled = gen9_dc_off_power_well_enabled,
 };
 
+static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = {
+	.sync_hw = bxt_dpio_cmn_power_well_sync_hw,
+	.enable = bxt_dpio_cmn_power_well_enable,
+	.disable = bxt_dpio_cmn_power_well_disable,
+	.is_enabled = bxt_dpio_cmn_power_well_enabled,
+};
+
 static struct i915_power_well hsw_power_wells[] = {
 	{
 		.name = "always-on",
@@ -2029,6 +2124,18 @@
 		.ops = &skl_power_well_ops,
 		.data = SKL_DISP_PW_2,
 	},
+	{
+		.name = "dpio-common-a",
+		.domains = BXT_DPIO_CMN_A_POWER_DOMAINS,
+		.ops = &bxt_dpio_cmn_power_well_ops,
+		.data = BXT_DPIO_CMN_A,
+	},
+	{
+		.name = "dpio-common-bc",
+		.domains = BXT_DPIO_CMN_BC_POWER_DOMAINS,
+		.ops = &bxt_dpio_cmn_power_well_ops,
+		.data = BXT_DPIO_CMN_BC,
+	},
 };
 
 static int
@@ -2294,14 +2401,10 @@
 
 	mutex_unlock(&power_domains->lock);
 
-	broxton_init_cdclk(dev_priv);
+	bxt_init_cdclk(dev_priv);
 
 	gen9_dbuf_enable(dev_priv);
 
-	broxton_ddi_phy_init(dev_priv);
-
-	broxton_ddi_phy_verify_state(dev_priv);
-
 	if (resume && dev_priv->csr.dmc_payload)
 		intel_csr_load_program(dev_priv);
 }
@@ -2313,11 +2416,9 @@
 
 	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
 
-	broxton_ddi_phy_uninit(dev_priv);
-
 	gen9_dbuf_disable(dev_priv);
 
-	broxton_uninit_cdclk(dev_priv);
+	bxt_uninit_cdclk(dev_priv);
 
 	/* The spec doesn't call for removing the reset handshake flag */
 
@@ -2448,6 +2549,7 @@
 /**
  * intel_power_domains_init_hw - initialize hardware power domain state
  * @dev_priv: i915 device instance
+ * @resume: Called from resume code paths or not
  *
  * This function initializes the hardware power domain state and enables all
  * power domains using intel_display_set_init_power().
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 324ccb0..fc65417 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -166,6 +166,20 @@
 
 	trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
 
+	/* We're still in the vblank-evade critical section, this can't race.
+	 * Would be slightly nice to just grab the vblank count and arm the
+	 * event outside of the critical section - the spinlock might spin for a
+	 * while ... */
+	if (crtc->base.state->event) {
+		WARN_ON(drm_crtc_vblank_get(&crtc->base) != 0);
+
+		spin_lock(&crtc->base.dev->event_lock);
+		drm_crtc_arm_vblank_event(&crtc->base, crtc->base.state->event);
+		spin_unlock(&crtc->base.dev->event_lock);
+
+		crtc->base.state->event = NULL;
+	}
+
 	local_irq_enable();
 
 	if (crtc->debug.start_vbl_count &&
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 9e9bddaa5..f49edec 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,6 +13,9 @@
 bool intel_enable_gtt(void);
 
 void intel_gtt_chipset_flush(void);
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags);
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags);