Merge tag 'topic/drm-misc-2016-10-11' of git://anongit.freedesktop.org/drm-intel into drm-next

Just flushing out my -misc queue. Slightly important are the prime
refcount/unload fixes from Chris.

There's also the reservation stuff from Chris still pending, and Sumits
hasn't landed that yet. Might get another pull for that, but pls don't
hold up the main pull for it ;-)

* tag 'topic/drm-misc-2016-10-11' of git://anongit.freedesktop.org/drm-intel:
  drm/crtc: constify drm_crtc_index parameter
  drm: use the right function name in documentation
  drm: Release resources with a safer function
  drm: Fix up kerneldoc for new drm_gem_dmabuf_export()
  drm/bridge: Drop drm_connector_unregister and call drm_connector_cleanup directly
  drm/fb-helper: fix sphinx markup for DRM_FB_HELPER_DEFAULT_OPS
  drm/bridge: Add RGB to VGA bridge support
  drm/prime: Take a ref on the drm_dev when exporting a dma_buf
  drm/prime: Pass the right module owner through to dma_buf_export()
  drm/bridge: Call drm_connector_cleanup directly
  drm: simple_kms_helper: Add prepare_fb and cleanup_fb hooks
  drm: Release resources with a safer function
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 53cf397..61360e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -32,5 +32,4 @@
 	  Selecting this option creates a debugfs file to inspect the mapped
 	  pages. Uses more memory for housekeeping, enable only for debugging.
 
-source "drivers/gpu/drm/amd/powerplay/Kconfig"
 source "drivers/gpu/drm/amd/acp/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 9ec262d..248a05d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -111,14 +111,10 @@
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
 amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
 
-ifneq ($(CONFIG_DRM_AMD_POWERPLAY),)
-
 include $(FULL_AMD_PATH)/powerplay/Makefile
 
 amdgpu-y += $(AMD_POWERPLAY_FILES)
 
-endif
-
 obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
 
 CFLAGS_amdgpu_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 235f390..039b57e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1943,6 +1943,7 @@
 	bool valid;
 	bool sw;
 	bool hw;
+	bool late_initialized;
 	bool hang;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 22c11e7..2e3a054 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -769,8 +769,10 @@
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
-	if (amdgpu_connector->ddc_bus->has_aux)
+	if (amdgpu_connector->ddc_bus->has_aux) {
 		drm_dp_aux_unregister(&amdgpu_connector->ddc_bus->aux);
+		amdgpu_connector->ddc_bus->has_aux = false;
+	}
 	amdgpu_connector_free_edid(connector);
 	kfree(amdgpu_connector->con_priv);
 	drm_connector_unregister(connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 99a15ca..a58513f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1424,6 +1424,7 @@
 				DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
 				return r;
 			}
+			adev->ip_block_status[i].late_initialized = true;
 		}
 	}
 
@@ -1469,8 +1470,11 @@
 	}
 
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_block_status[i].late_initialized)
+			continue;
 		if (adev->ip_blocks[i].funcs->late_fini)
 			adev->ip_blocks[i].funcs->late_fini((void *)adev);
+		adev->ip_block_status[i].late_initialized = false;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index dbe89fb..71ed27e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -174,7 +174,6 @@
 MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
 module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 
-#ifdef CONFIG_DRM_AMD_POWERPLAY
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
 
@@ -183,7 +182,6 @@
 
 MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
 module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444);
-#endif
 
 MODULE_PARM_DESC(sclkdeepsleep, "SCLK Deep Sleep (1 = enable (default), 0 = disable)");
 module_param_named(sclkdeepsleep, amdgpu_sclk_deep_sleep_en, int, 0444);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 34bab61..91d3673 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -220,6 +220,7 @@
 {
 	if (!i2c)
 		return;
+	WARN_ON(i2c->has_aux);
 	i2c_del_adapter(&i2c->adapter);
 	kfree(i2c);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 68ad241..7532ff8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -42,7 +42,6 @@
 	amd_pp = &(adev->powerplay);
 
 	if (adev->pp_enabled) {
-#ifdef CONFIG_DRM_AMD_POWERPLAY
 		struct amd_pp_init *pp_init;
 
 		pp_init = kzalloc(sizeof(struct amd_pp_init), GFP_KERNEL);
@@ -55,7 +54,6 @@
 		pp_init->device = amdgpu_cgs_create_device(adev);
 		ret = amd_powerplay_init(pp_init, amd_pp);
 		kfree(pp_init);
-#endif
 	} else {
 		amd_pp->pp_handle = (void *)adev;
 
@@ -97,7 +95,6 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret = 0;
 
-#ifdef CONFIG_DRM_AMD_POWERPLAY
 	switch (adev->asic_type) {
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
@@ -120,9 +117,6 @@
 		adev->pp_enabled = false;
 		break;
 	}
-#else
-	adev->pp_enabled = false;
-#endif
 
 	ret = amdgpu_powerplay_init(adev);
 	if (ret)
@@ -144,12 +138,11 @@
 		ret = adev->powerplay.ip_funcs->late_init(
 					adev->powerplay.pp_handle);
 
-#ifdef CONFIG_DRM_AMD_POWERPLAY
 	if (adev->pp_enabled && adev->pm.dpm_enabled) {
 		amdgpu_pm_sysfs_init(adev);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL);
 	}
-#endif
+
 	return ret;
 }
 
@@ -162,10 +155,8 @@
 		ret = adev->powerplay.ip_funcs->sw_init(
 					adev->powerplay.pp_handle);
 
-#ifdef CONFIG_DRM_AMD_POWERPLAY
 	if (adev->pp_enabled)
 		adev->pm.dpm_enabled = true;
-#endif
 
 	return ret;
 }
@@ -216,7 +207,6 @@
 
 static void amdgpu_pp_late_fini(void *handle)
 {
-#ifdef CONFIG_DRM_AMD_POWERPLAY
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	if (adev->pp_enabled) {
@@ -227,7 +217,6 @@
 	if (adev->powerplay.ip_funcs->late_fini)
 		adev->powerplay.ip_funcs->late_fini(
 			  adev->powerplay.pp_handle);
-#endif
 }
 
 static int amdgpu_pp_suspend(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 7a05f79..cb3d252 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -273,7 +273,6 @@
 
 	amdgpu_bo_unreserve(*bo);
 
-	fw_offset = 0;
 	for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) {
 		ucode = &adev->firmware.ucode[i];
 		if (ucode->fw) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 3b03558d..7fe8fd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -699,6 +699,20 @@
 		case 0x05000009: /* clock table */
 			break;
 
+		case 0x0500000c: /* hw config */
+			switch (p->adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
+			case CHIP_KAVERI:
+			case CHIP_MULLINS:
+#endif
+			case CHIP_CARRIZO:
+				break;
+			default:
+				r = -EINVAL;
+				goto out;
+			}
+			break;
+
 		case 0x03000001: /* encode */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
 						*size, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bc4b22c..06f2432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -878,13 +878,13 @@
 	 * allocation size to the fragment size.
 	 */
 
-	const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+	/* SI and newer are optimized for 64KB */
+	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
+	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
 
 	uint64_t frag_start = ALIGN(start, frag_align);
 	uint64_t frag_end = end & ~(frag_align - 1);
 
-	uint32_t frag;
-
 	/* system pages are non continuously */
 	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
 	    (frag_start >= frag_end)) {
@@ -893,10 +893,6 @@
 		return;
 	}
 
-	/* use more than 64KB fragment size if possible */
-	frag = lower_32_bits(frag_start | frag_end);
-	frag = likely(frag) ? __ffs(frag) : 31;
-
 	/* handle the 4K area at the beginning */
 	if (start != frag_start) {
 		amdgpu_vm_update_ptes(params, vm, start, frag_start,
@@ -906,7 +902,7 @@
 
 	/* handle the area in the middle */
 	amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
-			      flags | AMDGPU_PTE_FRAG(frag));
+			      flags | frag_flags);
 
 	/* handle the 4K area at the end */
 	if (frag_end != end) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 678f5eb..f264b8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -3159,6 +3159,7 @@
 
 	dce_v11_0_afmt_fini(adev);
 
+	drm_mode_config_cleanup(adev->ddev);
 	adev->mode_info.mode_config_initialized = false;
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index a754f25..c2bd9f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -99,15 +99,15 @@
 			      struct amdgpu_mode_mc_save *save)
 {
 	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
-#ifdef CONFIG_DRM_AMDGPU_CIK
 		dce_v8_0_disable_dce(adev);
-#endif
 		break;
+#endif
 	case CHIP_FIJI:
 	case CHIP_TONGA:
 		dce_v10_0_disable_dce(adev);
diff --git a/drivers/gpu/drm/amd/powerplay/Kconfig b/drivers/gpu/drm/amd/powerplay/Kconfig
deleted file mode 100644
index af38033..0000000
--- a/drivers/gpu/drm/amd/powerplay/Kconfig
+++ /dev/null
@@ -1,6 +0,0 @@
-config DRM_AMD_POWERPLAY
-	bool  "Enable AMD powerplay component"
-	depends on DRM_AMDGPU
-	default n
-	help
-	  select this option will enable AMD powerplay component.
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7f4e8ad..bfb2efd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1786,15 +1786,6 @@
 		goto error;
 	}
 
-	/*
-	 * rps/rc6 re-init is necessary to restore state lost after the
-	 * reset and the re-install of gt irqs. Skip for ironlake per
-	 * previous concerns that it doesn't respond well to some forms
-	 * of re-init after reset.
-	 */
-	intel_sanitize_gt_powersave(dev_priv);
-	intel_autoenable_gt_powersave(dev_priv);
-
 wakeup:
 	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
 	return;
@@ -1872,7 +1863,17 @@
 /* freeze: before creating the hibernation_image */
 static int i915_pm_freeze(struct device *kdev)
 {
-	return i915_pm_suspend(kdev);
+	int ret;
+
+	ret = i915_pm_suspend(kdev);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_freeze(kdev_to_i915(kdev));
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static int i915_pm_freeze_late(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4dd307e..8b9ee4e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1984,11 +1984,11 @@
 	struct vlv_s0ix_state vlv_s0ix_state;
 
 	enum {
-		I915_SKL_SAGV_UNKNOWN = 0,
-		I915_SKL_SAGV_DISABLED,
-		I915_SKL_SAGV_ENABLED,
-		I915_SKL_SAGV_NOT_CONTROLLED
-	} skl_sagv_status;
+		I915_SAGV_UNKNOWN = 0,
+		I915_SAGV_DISABLED,
+		I915_SAGV_ENABLED,
+		I915_SAGV_NOT_CONTROLLED
+	} sagv_status;
 
 	struct {
 		/*
@@ -2276,21 +2276,19 @@
 	/** Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;
 
-	union {
-		/** for phy allocated objects */
-		struct drm_dma_handle *phys_handle;
-
-		struct i915_gem_userptr {
-			uintptr_t ptr;
-			unsigned read_only :1;
-			unsigned workers :4;
+	struct i915_gem_userptr {
+		uintptr_t ptr;
+		unsigned read_only :1;
+		unsigned workers :4;
 #define I915_GEM_USERPTR_MAX_WORKERS 15
 
-			struct i915_mm_struct *mm;
-			struct i915_mmu_object *mmu_object;
-			struct work_struct *work;
-		} userptr;
-	};
+		struct i915_mm_struct *mm;
+		struct i915_mmu_object *mmu_object;
+		struct work_struct *work;
+	} userptr;
+
+	/** for phys allocated objects */
+	struct drm_dma_handle *phys_handle;
 };
 
 static inline struct drm_i915_gem_object *
@@ -3076,6 +3074,7 @@
 void i915_gem_load_init(struct drm_device *dev);
 void i915_gem_load_cleanup(struct drm_device *dev);
 void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
+int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
 void *i915_gem_object_alloc(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2c81067..1418c1c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2616,8 +2616,6 @@
 	list_for_each_entry_continue(request, &engine->request_list, link)
 		if (request->ctx == incomplete_ctx)
 			reset_request(request);
-
-	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
@@ -2628,9 +2626,15 @@
 
 	for_each_engine(engine, dev_priv)
 		i915_gem_reset_engine(engine);
-	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 
 	i915_gem_restore_fences(&dev_priv->drm);
+
+	if (dev_priv->gt.awake) {
+		intel_sanitize_gt_powersave(dev_priv);
+		intel_enable_gt_powersave(dev_priv);
+		if (INTEL_GEN(dev_priv) >= 6)
+			gen6_rps_busy(dev_priv);
+	}
 }
 
 static void nop_submit_request(struct drm_i915_gem_request *request)
@@ -4589,6 +4593,19 @@
 	rcu_barrier();
 }
 
+int i915_gem_freeze(struct drm_i915_private *dev_priv)
+{
+	intel_runtime_pm_get(dev_priv);
+
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_shrink_all(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	intel_runtime_pm_put(dev_priv);
+
+	return 0;
+}
+
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
@@ -4612,7 +4629,8 @@
 	 * the objects as well.
 	 */
 
-	i915_gem_shrink_all(dev_priv);
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
 
 	for (p = phases; *p; p++) {
 		list_for_each_entry(obj, *p, global_list) {
@@ -4620,6 +4638,7 @@
 			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 		}
 	}
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 33c8522..222796f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -451,8 +451,8 @@
 				 0, ggtt->mappable_end,
 				 DRM_MM_SEARCH_DEFAULT,
 				 DRM_MM_CREATE_DEFAULT);
-			if (ret)
-				return ERR_PTR(ret);
+			if (ret) /* no inactive aperture space, use cpu reloc */
+				return NULL;
 		} else {
 			ret = i915_vma_put_fence(vma);
 			if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 40978bc..8832f8e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -328,6 +328,7 @@
 
 	switch (state) {
 	case FENCE_COMPLETE:
+		request->engine->last_submitted_seqno = request->fence.seqno;
 		request->engine->submit_request(request);
 		break;
 
@@ -641,8 +642,8 @@
 					     &request->submitq);
 
 	request->emitted_jiffies = jiffies;
-	request->previous_seqno = engine->last_submitted_seqno;
-	engine->last_submitted_seqno = request->fence.seqno;
+	request->previous_seqno = engine->last_pending_seqno;
+	engine->last_pending_seqno = request->fence.seqno;
 	i915_gem_active_set(&engine->last_request, request);
 	list_add_tail(&request->link, &engine->request_list);
 	list_add_tail(&request->ring_link, &ring->request_list);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 43358e1..3106dcc 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -451,6 +451,18 @@
 	return ret;
 }
 
+void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
+{
+	const size_t wqi_size = sizeof(struct guc_wq_item);
+	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
+
+	GEM_BUG_ON(READ_ONCE(gc->wq_rsvd) < wqi_size);
+
+	spin_lock(&gc->wq_lock);
+	gc->wq_rsvd -= wqi_size;
+	spin_unlock(&gc->wq_lock);
+}
+
 /* Construct a Work Item and append it to the GuC's Work Queue */
 static void guc_wq_item_append(struct i915_guc_client *gc,
 			       struct drm_i915_gem_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c128fdb..3fc286cd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -350,6 +350,9 @@
 
 void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
+	if (READ_ONCE(dev_priv->rps.interrupts_enabled))
+		return;
+
 	spin_lock_irq(&dev_priv->irq_lock);
 	WARN_ON_ONCE(dev_priv->rps.pm_iir);
 	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
@@ -368,6 +371,9 @@
 
 void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
+	if (!READ_ONCE(dev_priv->rps.interrupts_enabled))
+		return;
+
 	spin_lock_irq(&dev_priv->irq_lock);
 	dev_priv->rps.interrupts_enabled = false;
 
@@ -2816,7 +2822,7 @@
 			if (engine == signaller)
 				continue;
 
-			if (offset == signaller->semaphore.signal_ggtt[engine->id])
+			if (offset == signaller->semaphore.signal_ggtt[engine->hw_id])
 				return signaller;
 		}
 	} else {
@@ -2826,13 +2832,13 @@
 			if(engine == signaller)
 				continue;
 
-			if (sync_bits == signaller->semaphore.mbox.wait[engine->id])
+			if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
 				return signaller;
 		}
 	}
 
-	DRM_DEBUG_DRIVER("No signaller ring found for ring %i, ipehr 0x%08x, offset 0x%016llx\n",
-			 engine->id, ipehr, offset);
+	DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n",
+			 engine->name, ipehr, offset);
 
 	return ERR_PTR(-ENODEV);
 }
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 9bad14d..495611b 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -578,6 +578,36 @@
 	return 0;
 }
 
+static void cancel_fake_irq(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	del_timer_sync(&b->hangcheck);
+	del_timer_sync(&b->fake_irq);
+	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+}
+
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	cancel_fake_irq(engine);
+	spin_lock(&b->lock);
+
+	__intel_breadcrumbs_disable_irq(b);
+	if (intel_engine_has_waiter(engine)) {
+		b->timeout = wait_timeout();
+		__intel_breadcrumbs_enable_irq(b);
+		if (READ_ONCE(b->irq_posted))
+			wake_up_process(b->first_wait->tsk);
+	} else {
+		/* sanitize the IMR and unmask any auxiliary interrupts */
+		irq_disable(engine);
+	}
+
+	spin_unlock(&b->lock);
+}
+
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
@@ -585,8 +615,7 @@
 	if (!IS_ERR_OR_NULL(b->signaler))
 		kthread_stop(b->signaler);
 
-	del_timer_sync(&b->hangcheck);
-	del_timer_sync(&b->fake_irq);
+	cancel_fake_irq(engine);
 }
 
 unsigned int intel_kick_waiters(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ad8d712..fbcfed6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3408,6 +3408,8 @@
 	dst_w--;
 	dst_h--;
 
+	intel_crtc->dspaddr_offset = surf_addr;
+
 	intel_crtc->adjusted_x = src_x;
 	intel_crtc->adjusted_y = src_y;
 
@@ -3629,6 +3631,7 @@
 		intel_runtime_pm_disable_interrupts(dev_priv);
 		intel_runtime_pm_enable_interrupts(dev_priv);
 
+		intel_pps_unlock_regs_wa(dev_priv);
 		intel_modeset_init_hw(dev);
 
 		spin_lock_irq(&dev_priv->irq_lock);
@@ -9509,6 +9512,24 @@
 	if (intel_crtc_has_dp_encoder(crtc_state))
 		dpll |= DPLL_SDVO_HIGH_SPEED;
 
+	/*
+	 * The high speed IO clock is only really required for
+	 * SDVO/HDMI/DP, but we also enable it for CRT to make it
+	 * possible to share the DPLL between CRT and HDMI. Enabling
+	 * the clock needlessly does no real harm, except use up a
+	 * bit of power potentially.
+	 *
+	 * We'll limit this to IVB with 3 pipes, since it has only two
+	 * DPLLs and so DPLL sharing is the only way to get three pipes
+	 * driving PCH ports at the same time. On SNB we could do this,
+	 * and potentially avoid enabling the second DPLL, but it's not
+	 * clear if it''s a win or loss power wise. No point in doing
+	 * this on ILK at all since it has a fixed DPLL<->pipe mapping.
+	 */
+	if (INTEL_INFO(dev_priv)->num_pipes == 3 &&
+	    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG))
+		dpll |= DPLL_SDVO_HIGH_SPEED;
+
 	/* compute bitmask from p1 value */
 	dpll |= (1 << (crtc_state->dpll.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
 	/* also FPA1 */
@@ -14364,8 +14385,8 @@
 		 * SKL workaround: bspec recommends we disable the SAGV when we
 		 * have more then one pipe enabled
 		 */
-		if (IS_SKYLAKE(dev_priv) && !skl_can_enable_sagv(state))
-			skl_disable_sagv(dev_priv);
+		if (!intel_can_enable_sagv(state))
+			intel_disable_sagv(dev_priv);
 
 		intel_modeset_verify_disabled(dev);
 	}
@@ -14422,9 +14443,8 @@
 		intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state);
 	}
 
-	if (IS_SKYLAKE(dev_priv) && intel_state->modeset &&
-	    skl_can_enable_sagv(state))
-		skl_enable_sagv(dev_priv);
+	if (intel_state->modeset && intel_can_enable_sagv(state))
+		intel_enable_sagv(dev_priv);
 
 	drm_atomic_helper_commit_hw_done(state);
 
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index acd0c51..14a3cf0 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4337,7 +4337,7 @@
 	intel_dp->has_audio = false;
 }
 
-static void
+static enum drm_connector_status
 intel_dp_long_pulse(struct intel_connector *intel_connector)
 {
 	struct drm_connector *connector = &intel_connector->base;
@@ -4361,7 +4361,7 @@
 	else
 		status = connector_status_disconnected;
 
-	if (status != connector_status_connected) {
+	if (status == connector_status_disconnected) {
 		intel_dp->compliance_test_active = 0;
 		intel_dp->compliance_test_type = 0;
 		intel_dp->compliance_test_data = 0;
@@ -4423,8 +4423,8 @@
 	intel_dp->aux.i2c_defer_count = 0;
 
 	intel_dp_set_edid(intel_dp);
-
-	status = connector_status_connected;
+	if (is_edp(intel_dp) || intel_connector->detect_edid)
+		status = connector_status_connected;
 	intel_dp->detect_done = true;
 
 	/* Try to read the source of the interrupt */
@@ -4443,12 +4443,11 @@
 	}
 
 out:
-	if ((status != connector_status_connected) &&
-	    (intel_dp->is_mst == false))
+	if (status != connector_status_connected && !intel_dp->is_mst)
 		intel_dp_unset_edid(intel_dp);
 
 	intel_display_power_put(to_i915(dev), power_domain);
-	return;
+	return status;
 }
 
 static enum drm_connector_status
@@ -4457,7 +4456,7 @@
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *intel_encoder = &intel_dig_port->base;
-	struct intel_connector *intel_connector = to_intel_connector(connector);
+	enum drm_connector_status status = connector->status;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
@@ -4472,14 +4471,11 @@
 
 	/* If full detect is not performed yet, do a full detect */
 	if (!intel_dp->detect_done)
-		intel_dp_long_pulse(intel_dp->attached_connector);
+		status = intel_dp_long_pulse(intel_dp->attached_connector);
 
 	intel_dp->detect_done = false;
 
-	if (is_edp(intel_dp) || intel_connector->detect_edid)
-		return connector_status_connected;
-	else
-		return connector_status_disconnected;
+	return status;
 }
 
 static void
@@ -4831,36 +4827,34 @@
 		      port_name(intel_dig_port->port),
 		      long_hpd ? "long" : "short");
 
+	if (long_hpd) {
+		intel_dp->detect_done = false;
+		return IRQ_NONE;
+	}
+
 	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);
 
-	if (long_hpd) {
-		intel_dp_long_pulse(intel_dp->attached_connector);
-		if (intel_dp->is_mst)
-			ret = IRQ_HANDLED;
-		goto put_power;
-
-	} else {
-		if (intel_dp->is_mst) {
-			if (intel_dp_check_mst_status(intel_dp) == -EINVAL) {
-				/*
-				 * If we were in MST mode, and device is not
-				 * there, get out of MST mode
-				 */
-				DRM_DEBUG_KMS("MST device may have disappeared %d vs %d\n",
-					      intel_dp->is_mst, intel_dp->mst_mgr.mst_state);
-				intel_dp->is_mst = false;
-				drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
-								intel_dp->is_mst);
-				goto put_power;
-			}
+	if (intel_dp->is_mst) {
+		if (intel_dp_check_mst_status(intel_dp) == -EINVAL) {
+			/*
+			 * If we were in MST mode, and device is not
+			 * there, get out of MST mode
+			 */
+			DRM_DEBUG_KMS("MST device may have disappeared %d vs %d\n",
+				      intel_dp->is_mst, intel_dp->mst_mgr.mst_state);
+			intel_dp->is_mst = false;
+			drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
+							intel_dp->is_mst);
+			intel_dp->detect_done = false;
+			goto put_power;
 		}
+	}
 
-		if (!intel_dp->is_mst) {
-			if (!intel_dp_short_pulse(intel_dp)) {
-				intel_dp_long_pulse(intel_dp->attached_connector);
-				goto put_power;
-			}
+	if (!intel_dp->is_mst) {
+		if (!intel_dp_short_pulse(intel_dp)) {
+			intel_dp->detect_done = false;
+			goto put_power;
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index c26d18a..1c59ca5 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -1694,21 +1694,32 @@
 	return bxt_ddi_set_dpll_hw_state(clock, &clk_div, dpll_hw_state);
 }
 
+static bool
+bxt_ddi_hdmi_set_dpll_hw_state(struct intel_crtc *intel_crtc,
+			       struct intel_crtc_state *crtc_state, int clock,
+			       struct intel_dpll_hw_state *dpll_hw_state)
+{
+	struct bxt_clk_div clk_div = { };
+
+	bxt_ddi_hdmi_pll_dividers(intel_crtc, crtc_state, clock, &clk_div);
+
+	return bxt_ddi_set_dpll_hw_state(clock, &clk_div, dpll_hw_state);
+}
+
 static struct intel_shared_dpll *
 bxt_get_dpll(struct intel_crtc *crtc,
 		struct intel_crtc_state *crtc_state,
 		struct intel_encoder *encoder)
 {
-	struct bxt_clk_div clk_div = {0};
-	struct intel_dpll_hw_state dpll_hw_state = {0};
+	struct intel_dpll_hw_state dpll_hw_state = { };
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_digital_port *intel_dig_port;
 	struct intel_shared_dpll *pll;
 	int i, clock = crtc_state->port_clock;
 
-	if (encoder->type == INTEL_OUTPUT_HDMI
-	    && !bxt_ddi_hdmi_pll_dividers(crtc, crtc_state,
-					  clock, &clk_div))
+	if (encoder->type == INTEL_OUTPUT_HDMI &&
+	    !bxt_ddi_hdmi_set_dpll_hw_state(crtc, crtc_state, clock,
+					    &dpll_hw_state))
 		return NULL;
 
 	if ((encoder->type == INTEL_OUTPUT_DP ||
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 8fd16ad..a19ec06 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -263,6 +263,7 @@
 		bool enabled;
 		bool combination_mode;	/* gen 2/4 only */
 		bool active_low_pwm;
+		bool alternate_pwm_increment;	/* lpt+ */
 
 		/* PWM chip */
 		bool util_pin_active_low;	/* bxt+ */
@@ -1741,9 +1742,9 @@
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */);
-bool skl_can_enable_sagv(struct drm_atomic_state *state);
-int skl_enable_sagv(struct drm_i915_private *dev_priv);
-int skl_disable_sagv(struct drm_i915_private *dev_priv);
+bool intel_can_enable_sagv(struct drm_atomic_state *state);
+int intel_enable_sagv(struct drm_i915_private *dev_priv);
+int intel_disable_sagv(struct drm_i915_private *dev_priv);
 bool skl_ddb_allocation_equals(const struct skl_ddb_allocation *old,
 			       const struct skl_ddb_allocation *new,
 			       enum pipe pipe);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e405f10..025e232 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -210,9 +210,6 @@
 void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 {
 	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
-	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-	if (intel_engine_has_waiter(engine))
-		i915_queue_hangcheck(engine->i915);
 }
 
 static void intel_engine_init_requests(struct intel_engine_cs *engine)
@@ -307,18 +304,6 @@
 	return 0;
 }
 
-void intel_engine_reset_irq(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (intel_engine_has_waiter(engine))
-		engine->irq_enable(engine);
-	else
-		engine->irq_disable(engine);
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
 /**
  * intel_engines_cleanup_common - cleans up the engine state created by
  *                                the common initiailizers.
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index b1ba869..5cdf7aa 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -160,6 +160,7 @@
 int i915_guc_submission_init(struct drm_i915_private *dev_priv);
 int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
 int i915_guc_wq_reserve(struct drm_i915_gem_request *rq);
+void i915_guc_wq_unreserve(struct drm_i915_gem_request *request);
 void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
 void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 25114336..0adb879 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -226,10 +226,16 @@
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 
+#define WA_TAIL_DWORDS 2
+
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine);
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine);
+static void execlists_init_reg_state(u32 *reg_state,
+				     struct i915_gem_context *ctx,
+				     struct intel_engine_cs *engine,
+				     struct intel_ring *ring);
 
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
@@ -621,6 +627,10 @@
 
 	request->ring = ce->ring;
 
+	ret = intel_lr_context_pin(request->ctx, engine);
+	if (ret)
+		return ret;
+
 	if (i915.enable_guc_submission) {
 		/*
 		 * Check that the GuC has space for the request before
@@ -629,21 +639,17 @@
 		 */
 		ret = i915_guc_wq_reserve(request);
 		if (ret)
-			return ret;
+			goto err_unpin;
 	}
 
-	ret = intel_lr_context_pin(request->ctx, engine);
-	if (ret)
-		return ret;
-
 	ret = intel_ring_begin(request, 0);
 	if (ret)
-		goto err_unpin;
+		goto err_unreserve;
 
 	if (!ce->initialised) {
 		ret = engine->init_context(request);
 		if (ret)
-			goto err_unpin;
+			goto err_unreserve;
 
 		ce->initialised = true;
 	}
@@ -658,6 +664,9 @@
 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
 	return 0;
 
+err_unreserve:
+	if (i915.enable_guc_submission)
+		i915_guc_wq_unreserve(request);
 err_unpin:
 	intel_lr_context_unpin(request->ctx, engine);
 	return ret;
@@ -708,7 +717,6 @@
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	void *vaddr;
-	u32 *lrc_reg_state;
 	int ret;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
@@ -727,17 +735,16 @@
 		goto unpin_vma;
 	}
 
-	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-
 	ret = intel_ring_pin(ce->ring);
 	if (ret)
 		goto unpin_map;
 
 	intel_lr_context_descriptor_update(ctx, engine);
 
-	lrc_reg_state[CTX_RING_BUFFER_START+1] =
+	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
 		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state = lrc_reg_state;
+
 	ce->state->obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1231,7 +1238,7 @@
 
 	lrc_init_hws(engine);
 
-	intel_engine_reset_irq(engine);
+	intel_engine_reset_breadcrumbs(engine);
 
 	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
 
@@ -1289,8 +1296,21 @@
 	struct execlist_port *port = engine->execlist_port;
 	struct intel_context *ce = &request->ctx->engine[engine->id];
 
+	/* We want a simple context + ring to execute the breadcrumb update.
+	 * We cannot rely on the context being intact across the GPU hang,
+	 * so clear it and rebuild just what we need for the breadcrumb.
+	 * All pending requests for this context will be zapped, and any
+	 * future request will be after userspace has had the opportunity
+	 * to recreate its own state.
+	 */
+	execlists_init_reg_state(ce->lrc_reg_state,
+				 request->ctx, engine, ce->ring);
+
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
+		i915_ggtt_offset(ce->ring->vma);
 	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+
 	request->ring->head = request->postfix;
 	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);
@@ -1310,6 +1330,9 @@
 	GEM_BUG_ON(request->ctx != port[0].request->ctx);
 	port[0].count = 0;
 	port[1].count = 0;
+
+	/* Reset WaIdleLiteRestore:bdw,skl as well */
+	request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1547,7 +1570,6 @@
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-#define WA_TAIL_DWORDS 2
 
 static int gen8_emit_request(struct drm_i915_gem_request *request)
 {
@@ -1894,38 +1916,13 @@
 	return indirect_ctx_offset;
 }
 
-static int
-populate_lr_context(struct i915_gem_context *ctx,
-		    struct drm_i915_gem_object *ctx_obj,
-		    struct intel_engine_cs *engine,
-		    struct intel_ring *ring)
+static void execlists_init_reg_state(u32 *reg_state,
+				     struct i915_gem_context *ctx,
+				     struct intel_engine_cs *engine,
+				     struct intel_ring *ring)
 {
-	struct drm_i915_private *dev_priv = ctx->i915;
-	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
-	void *vaddr;
-	u32 *reg_state;
-	int ret;
-
-	if (!ppgtt)
-		ppgtt = dev_priv->mm.aliasing_ppgtt;
-
-	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
-		return ret;
-	}
-
-	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
-		return ret;
-	}
-	ctx_obj->dirty = true;
-
-	/* The second page of the context object contains some fields which must
-	 * be set up prior to the first execution. */
-	reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
 
 	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
 	 * commands followed by (reg, value) pairs. The values we are setting here are
@@ -1939,14 +1936,11 @@
 		       _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
 					  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
 					  (HAS_RESOURCE_STREAMER(dev_priv) ?
-					    CTX_CTRL_RS_CTX_ENABLE : 0)));
+					   CTX_CTRL_RS_CTX_ENABLE : 0)));
 	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base),
 		       0);
 	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base),
 		       0);
-	/* Ring buffer start address is not known until the buffer is pinned.
-	 * It is written to the context image in execlists_update_context()
-	 */
 	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START,
 		       RING_START(engine->mmio_base), 0);
 	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
@@ -2029,6 +2023,36 @@
 		ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
 			       make_rpcs(dev_priv));
 	}
+}
+
+static int
+populate_lr_context(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_object *ctx_obj,
+		    struct intel_engine_cs *engine,
+		    struct intel_ring *ring)
+{
+	void *vaddr;
+	int ret;
+
+	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
+		return ret;
+	}
+
+	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
+		return ret;
+	}
+	ctx_obj->dirty = true;
+
+	/* The second page of the context object contains some fields which must
+	 * be set up prior to the first execution. */
+
+	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+				 ctx, engine, ring);
 
 	i915_gem_object_unpin_map(ctx_obj);
 
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index c10e9b0..be4b4d5 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -841,7 +841,7 @@
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
-	u32 pch_ctl1, pch_ctl2;
+	u32 pch_ctl1, pch_ctl2, schicken;
 
 	pch_ctl1 = I915_READ(BLC_PWM_PCH_CTL1);
 	if (pch_ctl1 & BLM_PCH_PWM_ENABLE) {
@@ -850,6 +850,22 @@
 		I915_WRITE(BLC_PWM_PCH_CTL1, pch_ctl1);
 	}
 
+	if (HAS_PCH_LPT(dev_priv)) {
+		schicken = I915_READ(SOUTH_CHICKEN2);
+		if (panel->backlight.alternate_pwm_increment)
+			schicken |= LPT_PWM_GRANULARITY;
+		else
+			schicken &= ~LPT_PWM_GRANULARITY;
+		I915_WRITE(SOUTH_CHICKEN2, schicken);
+	} else {
+		schicken = I915_READ(SOUTH_CHICKEN1);
+		if (panel->backlight.alternate_pwm_increment)
+			schicken |= SPT_PWM_GRANULARITY;
+		else
+			schicken &= ~SPT_PWM_GRANULARITY;
+		I915_WRITE(SOUTH_CHICKEN1, schicken);
+	}
+
 	pch_ctl2 = panel->backlight.max << 16;
 	I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2);
 
@@ -1242,10 +1258,10 @@
  */
 static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 {
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+	struct intel_panel *panel = &connector->panel;
 	u32 mul;
 
-	if (I915_READ(SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY)
+	if (panel->backlight.alternate_pwm_increment)
 		mul = 128;
 	else
 		mul = 16;
@@ -1261,9 +1277,10 @@
 static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+	struct intel_panel *panel = &connector->panel;
 	u32 mul, clock;
 
-	if (I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY)
+	if (panel->backlight.alternate_pwm_increment)
 		mul = 16;
 	else
 		mul = 128;
@@ -1414,6 +1431,13 @@
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
 	u32 pch_ctl1, pch_ctl2, val;
+	bool alt;
+
+	if (HAS_PCH_LPT(dev_priv))
+		alt = I915_READ(SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY;
+	else
+		alt = I915_READ(SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY;
+	panel->backlight.alternate_pwm_increment = alt;
 
 	pch_ctl1 = I915_READ(BLC_PWM_PCH_CTL1);
 	panel->backlight.active_low_pwm = pch_ctl1 & BLM_PCH_POLARITY;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2df06b7..a2f751c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2127,32 +2127,34 @@
 				GEN9_MEM_LATENCY_LEVEL_MASK;
 
 		/*
+		 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
+		 * need to be disabled. We make sure to sanitize the values out
+		 * of the punit to satisfy this requirement.
+		 */
+		for (level = 1; level <= max_level; level++) {
+			if (wm[level] == 0) {
+				for (i = level + 1; i <= max_level; i++)
+					wm[i] = 0;
+				break;
+			}
+		}
+
+		/*
 		 * WaWmMemoryReadLatency:skl
 		 *
 		 * punit doesn't take into account the read latency so we need
-		 * to add 2us to the various latency levels we retrieve from
-		 * the punit.
-		 *   - W0 is a bit special in that it's the only level that
-		 *   can't be disabled if we want to have display working, so
-		 *   we always add 2us there.
-		 *   - For levels >=1, punit returns 0us latency when they are
-		 *   disabled, so we respect that and don't add 2us then
-		 *
-		 * Additionally, if a level n (n > 1) has a 0us latency, all
-		 * levels m (m >= n) need to be disabled. We make sure to
-		 * sanitize the values out of the punit to satisfy this
-		 * requirement.
+		 * to add 2us to the various latency levels we retrieve from the
+		 * punit when level 0 response data us 0us.
 		 */
-		wm[0] += 2;
-		for (level = 1; level <= max_level; level++)
-			if (wm[level] != 0)
+		if (wm[0] == 0) {
+			wm[0] += 2;
+			for (level = 1; level <= max_level; level++) {
+				if (wm[level] == 0)
+					break;
 				wm[level] += 2;
-			else {
-				for (i = level + 1; i <= max_level; i++)
-					wm[i] = 0;
-
-				break;
 			}
+		}
+
 	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		uint64_t sskpd = I915_READ64(MCH_SSKPD);
 
@@ -2877,6 +2879,19 @@
 	}
 }
 
+static bool
+intel_has_sagv(struct drm_i915_private *dev_priv)
+{
+	if (IS_KABYLAKE(dev_priv))
+		return true;
+
+	if (IS_SKYLAKE(dev_priv) &&
+	    dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED)
+		return true;
+
+	return false;
+}
+
 /*
  * SAGV dynamically adjusts the system agent voltage and clock frequencies
  * depending on power and performance requirements. The display engine access
@@ -2889,12 +2904,14 @@
  *  - We're not using an interlaced display configuration
  */
 int
-skl_enable_sagv(struct drm_i915_private *dev_priv)
+intel_enable_sagv(struct drm_i915_private *dev_priv)
 {
 	int ret;
 
-	if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
-	    dev_priv->skl_sagv_status == I915_SKL_SAGV_ENABLED)
+	if (!intel_has_sagv(dev_priv))
+		return 0;
+
+	if (dev_priv->sagv_status == I915_SAGV_ENABLED)
 		return 0;
 
 	DRM_DEBUG_KMS("Enabling the SAGV\n");
@@ -2910,21 +2927,21 @@
 	 * Some skl systems, pre-release machines in particular,
 	 * don't actually have an SAGV.
 	 */
-	if (ret == -ENXIO) {
+	if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
 		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
-		dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (ret < 0) {
 		DRM_ERROR("Failed to enable the SAGV\n");
 		return ret;
 	}
 
-	dev_priv->skl_sagv_status = I915_SKL_SAGV_ENABLED;
+	dev_priv->sagv_status = I915_SAGV_ENABLED;
 	return 0;
 }
 
 static int
-skl_do_sagv_disable(struct drm_i915_private *dev_priv)
+intel_do_sagv_disable(struct drm_i915_private *dev_priv)
 {
 	int ret;
 	uint32_t temp = GEN9_SAGV_DISABLE;
@@ -2938,19 +2955,21 @@
 }
 
 int
-skl_disable_sagv(struct drm_i915_private *dev_priv)
+intel_disable_sagv(struct drm_i915_private *dev_priv)
 {
 	int ret, result;
 
-	if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
-	    dev_priv->skl_sagv_status == I915_SKL_SAGV_DISABLED)
+	if (!intel_has_sagv(dev_priv))
+		return 0;
+
+	if (dev_priv->sagv_status == I915_SAGV_DISABLED)
 		return 0;
 
 	DRM_DEBUG_KMS("Disabling the SAGV\n");
 	mutex_lock(&dev_priv->rps.hw_lock);
 
 	/* bspec says to keep retrying for at least 1 ms */
-	ret = wait_for(result = skl_do_sagv_disable(dev_priv), 1);
+	ret = wait_for(result = intel_do_sagv_disable(dev_priv), 1);
 	mutex_unlock(&dev_priv->rps.hw_lock);
 
 	if (ret == -ETIMEDOUT) {
@@ -2962,20 +2981,20 @@
 	 * Some skl systems, pre-release machines in particular,
 	 * don't actually have an SAGV.
 	 */
-	if (result == -ENXIO) {
+	if (IS_SKYLAKE(dev_priv) && result == -ENXIO) {
 		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
-		dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
 		return 0;
 	} else if (result < 0) {
 		DRM_ERROR("Failed to disable the SAGV\n");
 		return result;
 	}
 
-	dev_priv->skl_sagv_status = I915_SKL_SAGV_DISABLED;
+	dev_priv->sagv_status = I915_SAGV_DISABLED;
 	return 0;
 }
 
-bool skl_can_enable_sagv(struct drm_atomic_state *state)
+bool intel_can_enable_sagv(struct drm_atomic_state *state)
 {
 	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -2984,6 +3003,9 @@
 	enum pipe pipe;
 	int level, plane;
 
+	if (!intel_has_sagv(dev_priv))
+		return false;
+
 	/*
 	 * SKL workaround: bspec recommends we disable the SAGV when we have
 	 * more then one pipe enabled
@@ -3472,29 +3494,14 @@
 }
 
 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
-			       uint32_t horiz_pixels, uint8_t cpp,
-			       uint64_t tiling, uint32_t latency)
+			       uint32_t latency, uint32_t plane_blocks_per_line)
 {
 	uint32_t ret;
-	uint32_t plane_bytes_per_line, plane_blocks_per_line;
 	uint32_t wm_intermediate_val;
 
 	if (latency == 0)
 		return UINT_MAX;
 
-	plane_bytes_per_line = horiz_pixels * cpp;
-
-	if (tiling == I915_FORMAT_MOD_Y_TILED ||
-	    tiling == I915_FORMAT_MOD_Yf_TILED) {
-		plane_bytes_per_line *= 4;
-		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
-		plane_blocks_per_line /= 4;
-	} else if (tiling == DRM_FORMAT_MOD_NONE) {
-		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
-	} else {
-		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
-	}
-
 	wm_intermediate_val = latency * pixel_rate;
 	ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
 				plane_blocks_per_line;
@@ -3545,6 +3552,7 @@
 	uint8_t cpp;
 	uint32_t width = 0, height = 0;
 	uint32_t plane_pixel_rate;
+	uint32_t y_tile_minimum, y_min_scanlines;
 
 	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
 		*enabled = false;
@@ -3560,38 +3568,51 @@
 	cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 	plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
 
+	if (intel_rotation_90_or_270(pstate->rotation)) {
+		int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
+			drm_format_plane_cpp(fb->pixel_format, 1) :
+			drm_format_plane_cpp(fb->pixel_format, 0);
+
+		switch (cpp) {
+		case 1:
+			y_min_scanlines = 16;
+			break;
+		case 2:
+			y_min_scanlines = 8;
+			break;
+		default:
+			WARN(1, "Unsupported pixel depth for rotation");
+		case 4:
+			y_min_scanlines = 4;
+			break;
+		}
+	} else {
+		y_min_scanlines = 4;
+	}
+
+	plane_bytes_per_line = width * cpp;
+	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
+		plane_blocks_per_line =
+		      DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512);
+		plane_blocks_per_line /= y_min_scanlines;
+	} else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) {
+		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512)
+					+ 1;
+	} else {
+		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
+	}
+
 	method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
 	method2 = skl_wm_method2(plane_pixel_rate,
 				 cstate->base.adjusted_mode.crtc_htotal,
-				 width,
-				 cpp,
-				 fb->modifier[0],
-				 latency);
+				 latency,
+				 plane_blocks_per_line);
 
-	plane_bytes_per_line = width * cpp;
-	plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
+	y_tile_minimum = plane_blocks_per_line * y_min_scanlines;
 
 	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
 	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
-		uint32_t min_scanlines = 4;
-		uint32_t y_tile_minimum;
-		if (intel_rotation_90_or_270(pstate->rotation)) {
-			int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
-				drm_format_plane_cpp(fb->pixel_format, 1) :
-				drm_format_plane_cpp(fb->pixel_format, 0);
-
-			switch (cpp) {
-			case 1:
-				min_scanlines = 16;
-				break;
-			case 2:
-				min_scanlines = 8;
-				break;
-			case 8:
-				WARN(1, "Unsupported pixel depth for rotation");
-			}
-		}
-		y_tile_minimum = plane_blocks_per_line * min_scanlines;
 		selected_result = max(method2, y_tile_minimum);
 	} else {
 		if ((ddb_allocation / plane_blocks_per_line) >= 1)
@@ -3605,10 +3626,12 @@
 
 	if (level >= 1 && level <= 7) {
 		if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
-		    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)
-			res_lines += 4;
-		else
+		    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
+			res_blocks += y_tile_minimum;
+			res_lines += y_min_scanlines;
+		} else {
 			res_blocks++;
+		}
 	}
 
 	if (res_blocks >= ddb_allocation || res_lines > 31) {
@@ -3939,6 +3962,41 @@
 	return ret;
 }
 
+int
+skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
+{
+	struct drm_atomic_state *state = cstate->base.state;
+	struct drm_device *dev = state->dev;
+	struct drm_crtc *crtc = cstate->base.crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
+	struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
+	struct drm_plane_state *plane_state;
+	struct drm_plane *plane;
+	enum pipe pipe = intel_crtc->pipe;
+	int id;
+
+	WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
+
+	drm_for_each_plane_mask(plane, dev, crtc->state->plane_mask) {
+		id = skl_wm_plane_id(to_intel_plane(plane));
+
+		if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][id],
+					&new_ddb->plane[pipe][id]) &&
+		    skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][id],
+					&new_ddb->y_plane[pipe][id]))
+			continue;
+
+		plane_state = drm_atomic_get_plane_state(state, plane);
+		if (IS_ERR(plane_state))
+			return PTR_ERR(plane_state);
+	}
+
+	return 0;
+}
+
 static int
 skl_compute_ddb(struct drm_atomic_state *state)
 {
@@ -4003,7 +4061,7 @@
 		if (ret)
 			return ret;
 
-		ret = drm_atomic_add_affected_planes(state, &intel_crtc->base);
+		ret = skl_ddb_add_affected_planes(cstate);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7a74750..ed9955d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -564,7 +564,7 @@
 	else
 		intel_ring_setup_status_page(engine);
 
-	intel_engine_reset_irq(engine);
+	intel_engine_reset_breadcrumbs(engine);
 
 	/* Enforce ordering by reading HEAD register back */
 	I915_READ_HEAD(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7f64d61..ec0b4a0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -328,6 +328,7 @@
 	 * inspecting request list.
 	 */
 	u32 last_submitted_seqno;
+	u32 last_pending_seqno;
 
 	/* An RCU guarded pointer to the last request. No reference is
 	 * held to the request, users must carefully acquire a reference to
@@ -492,7 +493,6 @@
 void intel_ring_update_space(struct intel_ring *ring);
 
 void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
-void intel_engine_reset_irq(struct intel_engine_cs *engine);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
@@ -584,6 +584,7 @@
 	return wakeup;
 }
 
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned int intel_kick_waiters(struct drm_i915_private *i915);
 unsigned int intel_kick_signalers(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index a9b6c93..ee2306a 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -796,10 +796,9 @@
 		      const bool read,
 		      const bool before)
 {
-	if (WARN(check_for_unclaimed_mmio(dev_priv),
-		 "Unclaimed register detected %s %s register 0x%x\n",
-		 before ? "before" : "after",
-		 read ? "reading" : "writing to",
+	if (WARN(check_for_unclaimed_mmio(dev_priv) && !before,
+		 "Unclaimed %s register 0x%x\n",
+		 read ? "read from" : "write to",
 		 i915_mmio_reg_offset(reg)))
 		i915.mmio_debug--; /* Only report the first N failures */
 }
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index a4e9f35..74f99ba 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1638,8 +1638,8 @@
 	WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
 	       (viewport_w << 16) | viewport_h);
 
-	/* set pageflip to happen anywhere in vblank interval */
-	WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
+	/* set pageflip to happen only at start of vblank interval (front porch) */
+	WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		radeon_fb = to_radeon_framebuffer(fb);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 890171f..b8ab30a 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -321,16 +321,30 @@
 	update_pending = radeon_page_flip_pending(rdev, crtc_id);
 
 	/* Has the pageflip already completed in crtc, or is it certain
-	 * to complete in this vblank?
+	 * to complete in this vblank? GET_DISTANCE_TO_VBLANKSTART provides
+	 * distance to start of "fudged earlier" vblank in vpos, distance to
+	 * start of real vblank in hpos. vpos >= 0 && hpos < 0 means we are in
+	 * the last few scanlines before start of real vblank, where the vblank
+	 * irq can fire, so we have sampled update_pending a bit too early and
+	 * know the flip will complete at leading edge of the upcoming real
+	 * vblank. On pre-AVIVO hardware, flips also complete inside the real
+	 * vblank, not only at leading edge, so if update_pending for hpos >= 0
+	 *  == inside real vblank, the flip will complete almost immediately.
+	 * Note that this method of completion handling is still not 100% race
+	 * free, as we could execute before the radeon_flip_work_func managed
+	 * to run and set the RADEON_FLIP_SUBMITTED status, thereby we no-op,
+	 * but the flip still gets programmed into hw and completed during
+	 * vblank, leading to a delayed emission of the flip completion event.
+	 * This applies at least to pre-AVIVO hardware, where flips are always
+	 * completing inside vblank, not only at leading edge of vblank.
 	 */
 	if (update_pending &&
-	    (DRM_SCANOUTPOS_VALID & radeon_get_crtc_scanoutpos(rdev->ddev,
-							       crtc_id,
-							       USE_REAL_VBLANKSTART,
-							       &vpos, &hpos, NULL, NULL,
-							       &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) &&
-	    ((vpos >= (99 * rdev->mode_info.crtcs[crtc_id]->base.hwmode.crtc_vdisplay)/100) ||
-	     (vpos < 0 && !ASIC_IS_AVIVO(rdev)))) {
+	    (DRM_SCANOUTPOS_VALID &
+	     radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id,
+					GET_DISTANCE_TO_VBLANKSTART,
+					&vpos, &hpos, NULL, NULL,
+					&rdev->mode_info.crtcs[crtc_id]->base.hwmode)) &&
+	    ((vpos >= 0 && hpos < 0) || (hpos >= 0 && !ASIC_IS_AVIVO(rdev)))) {
 		/* crtc didn't flip in this target vblank interval,
 		 * but flip is pending in crtc. Based on the current
 		 * scanout position we know that the current frame is
@@ -438,16 +452,19 @@
 	}
 
 	/* Wait until we're out of the vertical blank period before the one
-	 * targeted by the flip
+	 * targeted by the flip. Always wait on pre DCE4 to avoid races with
+	 * flip completion handling from vblank irq, as these old asics don't
+	 * have reliable pageflip completion interrupts.
 	 */
 	while (radeon_crtc->enabled &&
-	       (radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0,
-					   &vpos, &hpos, NULL, NULL,
-					   &crtc->hwmode)
+		(radeon_get_crtc_scanoutpos(dev, work->crtc_id, 0,
+					    &vpos, &hpos, NULL, NULL,
+					    &crtc->hwmode)
 		& (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
-	       (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
-	       (int)(work->target_vblank -
-		     dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0)
+		(DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
+		(!ASIC_IS_AVIVO(rdev) ||
+		((int) (work->target_vblank -
+		dev->driver->get_vblank_counter(dev, work->crtc_id)) > 0)))
 		usleep_range(1000, 2000);
 
 	/* We borrow the event spin lock for protecting flip_status */
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 76c55c5..c55d653 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -406,8 +406,9 @@
 	for (i = 0; i < rdev->num_crtc; i++) {
 		if (save->crtc_enabled[i]) {
 			tmp = RREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i]);
-			if ((tmp & 0x7) != 0) {
+			if ((tmp & 0x7) != 3) {
 				tmp &= ~0x7;
+				tmp |= 0x3;
 				WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i], tmp);
 			}
 			tmp = RREG32(AVIVO_D1GRPH_UPDATE + crtc_offsets[i]);
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 2682f07..7f08d68 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -229,7 +229,7 @@
 	 * and need to make things up in a approximative but consistent way.
 	 */
 	ret |= DRM_SCANOUTPOS_IN_VBLANK;
-	vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
+	vblank_lines = mode->vtotal - mode->vdisplay;
 
 	if (flags & DRM_CALLED_FROM_VBLIRQ) {
 		/*
@@ -378,7 +378,7 @@
 	struct drm_crtc_state *state = crtc->state;
 	struct drm_display_mode *mode = &state->adjusted_mode;
 	bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
-	u32 vactive = (mode->vdisplay >> (interlace ? 1 : 0));
+	u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1;
 	u32 format = PV_CONTROL_FORMAT_24;
 	bool debug_dump_regs = false;
 	int clock_select = vc4_get_clock_select(crtc);
@@ -394,47 +394,65 @@
 	CRTC_WRITE(PV_CONTROL, 0);
 
 	CRTC_WRITE(PV_HORZA,
-		   VC4_SET_FIELD(mode->htotal - mode->hsync_end,
+		   VC4_SET_FIELD((mode->htotal -
+				  mode->hsync_end) * pixel_rep,
 				 PV_HORZA_HBP) |
-		   VC4_SET_FIELD(mode->hsync_end - mode->hsync_start,
+		   VC4_SET_FIELD((mode->hsync_end -
+				  mode->hsync_start) * pixel_rep,
 				 PV_HORZA_HSYNC));
 	CRTC_WRITE(PV_HORZB,
-		   VC4_SET_FIELD(mode->hsync_start - mode->hdisplay,
+		   VC4_SET_FIELD((mode->hsync_start -
+				  mode->hdisplay) * pixel_rep,
 				 PV_HORZB_HFP) |
-		   VC4_SET_FIELD(mode->hdisplay, PV_HORZB_HACTIVE));
+		   VC4_SET_FIELD(mode->hdisplay * pixel_rep, PV_HORZB_HACTIVE));
 
 	CRTC_WRITE(PV_VERTA,
-		   VC4_SET_FIELD(mode->vtotal - mode->vsync_end,
+		   VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end,
 				 PV_VERTA_VBP) |
-		   VC4_SET_FIELD(mode->vsync_end - mode->vsync_start,
+		   VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start,
 				 PV_VERTA_VSYNC));
 	CRTC_WRITE(PV_VERTB,
-		   VC4_SET_FIELD(mode->vsync_start - mode->vdisplay,
+		   VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay,
 				 PV_VERTB_VFP) |
-		   VC4_SET_FIELD(vactive, PV_VERTB_VACTIVE));
+		   VC4_SET_FIELD(mode->crtc_vdisplay, PV_VERTB_VACTIVE));
 
 	if (interlace) {
 		CRTC_WRITE(PV_VERTA_EVEN,
-			   VC4_SET_FIELD(mode->vtotal - mode->vsync_end - 1,
+			   VC4_SET_FIELD(mode->crtc_vtotal -
+					 mode->crtc_vsync_end - 1,
 					 PV_VERTA_VBP) |
-			   VC4_SET_FIELD(mode->vsync_end - mode->vsync_start,
+			   VC4_SET_FIELD(mode->crtc_vsync_end -
+					 mode->crtc_vsync_start,
 					 PV_VERTA_VSYNC));
 		CRTC_WRITE(PV_VERTB_EVEN,
-			   VC4_SET_FIELD(mode->vsync_start - mode->vdisplay,
+			   VC4_SET_FIELD(mode->crtc_vsync_start -
+					 mode->crtc_vdisplay,
 					 PV_VERTB_VFP) |
-			   VC4_SET_FIELD(vactive, PV_VERTB_VACTIVE));
+			   VC4_SET_FIELD(mode->crtc_vdisplay, PV_VERTB_VACTIVE));
+
+		/* We set up first field even mode for HDMI.  VEC's
+		 * NTSC mode would want first field odd instead, once
+		 * we support it (to do so, set ODD_FIRST and put the
+		 * delay in VSYNCD_EVEN instead).
+		 */
+		CRTC_WRITE(PV_V_CONTROL,
+			   PV_VCONTROL_CONTINUOUS |
+			   PV_VCONTROL_INTERLACE |
+			   VC4_SET_FIELD(mode->htotal * pixel_rep / 2,
+					 PV_VCONTROL_ODD_DELAY));
+		CRTC_WRITE(PV_VSYNCD_EVEN, 0);
+	} else {
+		CRTC_WRITE(PV_V_CONTROL, PV_VCONTROL_CONTINUOUS);
 	}
 
-	CRTC_WRITE(PV_HACT_ACT, mode->hdisplay);
+	CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep);
 
-	CRTC_WRITE(PV_V_CONTROL,
-		   PV_VCONTROL_CONTINUOUS |
-		   (interlace ? PV_VCONTROL_INTERLACE : 0));
 
 	CRTC_WRITE(PV_CONTROL,
 		   VC4_SET_FIELD(format, PV_CONTROL_FORMAT) |
 		   VC4_SET_FIELD(vc4_get_fifo_full_level(format),
 				 PV_CONTROL_FIFO_LEVEL) |
+		   VC4_SET_FIELD(pixel_rep - 1, PV_CONTROL_PIXEL_REP) |
 		   PV_CONTROL_CLR_AT_START |
 		   PV_CONTROL_TRIGGER_UNDERFLOW |
 		   PV_CONTROL_WAIT_HSTART |
@@ -544,16 +562,6 @@
 		return false;
 	}
 
-	/*
-	 * Interlaced video modes got CRTC_INTERLACE_HALVE_V applied when
-	 * coming from user space. We don't want this, as it screws up
-	 * vblank timestamping, so fix it up.
-	 */
-	drm_mode_set_crtcinfo(adjusted_mode, 0);
-
-	DRM_DEBUG_KMS("[CRTC:%d] adjusted_mode :\n", crtc->base.id);
-	drm_mode_debug_printmodeline(adjusted_mode);
-
 	return true;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 428e249..7c1e4d9 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -122,9 +122,16 @@
 struct vc4_bo {
 	struct drm_gem_cma_object base;
 
-	/* seqno of the last job to render to this BO. */
+	/* seqno of the last job to render using this BO. */
 	uint64_t seqno;
 
+	/* seqno of the last job to use the RCL to write to this BO.
+	 *
+	 * Note that this doesn't include binner overflow memory
+	 * writes.
+	 */
+	uint64_t write_seqno;
+
 	/* List entry for the BO's position in either
 	 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
 	 */
@@ -216,6 +223,9 @@
 	/* Sequence number for this bin/render job. */
 	uint64_t seqno;
 
+	/* Latest write_seqno of any BO that binning depends on. */
+	uint64_t bin_dep_seqno;
+
 	/* Last current addresses the hardware was processing when the
 	 * hangcheck timer checked on us.
 	 */
@@ -230,6 +240,13 @@
 	struct drm_gem_cma_object **bo;
 	uint32_t bo_count;
 
+	/* List of BOs that are being written by the RCL.  Other than
+	 * the binner temporary storage, this is all the BOs written
+	 * by the job.
+	 */
+	struct drm_gem_cma_object *rcl_write_bo[4];
+	uint32_t rcl_write_bo_count;
+
 	/* Pointers for our position in vc4->job_list */
 	struct list_head head;
 
@@ -307,18 +324,15 @@
 static inline struct vc4_exec_info *
 vc4_first_bin_job(struct vc4_dev *vc4)
 {
-	if (list_empty(&vc4->bin_job_list))
-		return NULL;
-	return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head);
+	return list_first_entry_or_null(&vc4->bin_job_list,
+					struct vc4_exec_info, head);
 }
 
 static inline struct vc4_exec_info *
 vc4_first_render_job(struct vc4_dev *vc4)
 {
-	if (list_empty(&vc4->render_job_list))
-		return NULL;
-	return list_first_entry(&vc4->render_job_list,
-				struct vc4_exec_info, head);
+	return list_first_entry_or_null(&vc4->render_job_list,
+					struct vc4_exec_info, head);
 }
 
 static inline struct vc4_exec_info *
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 77daea6..47a095f 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -467,6 +467,11 @@
 	list_for_each_entry(bo, &exec->unref_list, unref_head) {
 		bo->seqno = seqno;
 	}
+
+	for (i = 0; i < exec->rcl_write_bo_count; i++) {
+		bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
+		bo->write_seqno = seqno;
+	}
 }
 
 /* Queues a struct vc4_exec_info for execution.  If no job is
@@ -669,6 +674,14 @@
 		goto fail;
 
 	ret = vc4_validate_shader_recs(dev, exec);
+	if (ret)
+		goto fail;
+
+	/* Block waiting on any previous rendering into the CS's VBO,
+	 * IB, or textures, so that pixels are actually written by the
+	 * time we try to read them.
+	 */
+	ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
 
 fail:
 	drm_free_large(temp);
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 68ad106..c4cb2e2 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -62,6 +62,8 @@
 struct vc4_hdmi_encoder {
 	struct vc4_encoder base;
 	bool hdmi_monitor;
+	bool limited_rgb_range;
+	bool rgb_range_selectable;
 };
 
 static inline struct vc4_hdmi_encoder *
@@ -174,6 +176,9 @@
 			return connector_status_disconnected;
 	}
 
+	if (drm_probe_ddc(vc4->hdmi->ddc))
+		return connector_status_connected;
+
 	if (HDMI_READ(VC4_HDMI_HOTPLUG) & VC4_HDMI_HOTPLUG_CONNECTED)
 		return connector_status_connected;
 	else
@@ -202,41 +207,22 @@
 		return -ENODEV;
 
 	vc4_encoder->hdmi_monitor = drm_detect_hdmi_monitor(edid);
+
+	if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) {
+		vc4_encoder->rgb_range_selectable =
+			drm_rgb_quant_range_selectable(edid);
+	}
+
 	drm_mode_connector_update_edid_property(connector, edid);
 	ret = drm_add_edid_modes(connector, edid);
 
 	return ret;
 }
 
-/*
- * drm_helper_probe_single_connector_modes() applies drm_mode_set_crtcinfo to
- * all modes with flag CRTC_INTERLACE_HALVE_V. We don't want this, as it
- * screws up vblank timestamping for interlaced modes, so fix it up.
- */
-static int vc4_hdmi_connector_probe_modes(struct drm_connector *connector,
-					  uint32_t maxX, uint32_t maxY)
-{
-	struct drm_display_mode *mode;
-	int count;
-
-	count = drm_helper_probe_single_connector_modes(connector, maxX, maxY);
-	if (count == 0)
-		return 0;
-
-	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] probed adapted modes :\n",
-		      connector->base.id, connector->name);
-	list_for_each_entry(mode, &connector->modes, head) {
-		drm_mode_set_crtcinfo(mode, 0);
-		drm_mode_debug_printmodeline(mode);
-	}
-
-	return count;
-}
-
 static const struct drm_connector_funcs vc4_hdmi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.detect = vc4_hdmi_connector_detect,
-	.fill_modes = vc4_hdmi_connector_probe_modes,
+	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = vc4_hdmi_connector_destroy,
 	.reset = drm_atomic_helper_connector_reset,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
@@ -294,25 +280,143 @@
 	.destroy = vc4_hdmi_encoder_destroy,
 };
 
+static int vc4_hdmi_stop_packet(struct drm_encoder *encoder,
+				enum hdmi_infoframe_type type)
+{
+	struct drm_device *dev = encoder->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	u32 packet_id = type - 0x80;
+
+	HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG,
+		   HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) & ~BIT(packet_id));
+
+	return wait_for(!(HDMI_READ(VC4_HDMI_RAM_PACKET_STATUS) &
+			  BIT(packet_id)), 100);
+}
+
+static void vc4_hdmi_write_infoframe(struct drm_encoder *encoder,
+				     union hdmi_infoframe *frame)
+{
+	struct drm_device *dev = encoder->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	u32 packet_id = frame->any.type - 0x80;
+	u32 packet_reg = VC4_HDMI_GCP_0 + VC4_HDMI_PACKET_STRIDE * packet_id;
+	uint8_t buffer[VC4_HDMI_PACKET_STRIDE];
+	ssize_t len, i;
+	int ret;
+
+	WARN_ONCE(!(HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) &
+		    VC4_HDMI_RAM_PACKET_ENABLE),
+		  "Packet RAM has to be on to store the packet.");
+
+	len = hdmi_infoframe_pack(frame, buffer, sizeof(buffer));
+	if (len < 0)
+		return;
+
+	ret = vc4_hdmi_stop_packet(encoder, frame->any.type);
+	if (ret) {
+		DRM_ERROR("Failed to wait for infoframe to go idle: %d\n", ret);
+		return;
+	}
+
+	for (i = 0; i < len; i += 7) {
+		HDMI_WRITE(packet_reg,
+			   buffer[i + 0] << 0 |
+			   buffer[i + 1] << 8 |
+			   buffer[i + 2] << 16);
+		packet_reg += 4;
+
+		HDMI_WRITE(packet_reg,
+			   buffer[i + 3] << 0 |
+			   buffer[i + 4] << 8 |
+			   buffer[i + 5] << 16 |
+			   buffer[i + 6] << 24);
+		packet_reg += 4;
+	}
+
+	HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG,
+		   HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) | BIT(packet_id));
+	ret = wait_for((HDMI_READ(VC4_HDMI_RAM_PACKET_STATUS) &
+			BIT(packet_id)), 100);
+	if (ret)
+		DRM_ERROR("Failed to wait for infoframe to start: %d\n", ret);
+}
+
+static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder)
+{
+	struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
+	struct drm_crtc *crtc = encoder->crtc;
+	const struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+	union hdmi_infoframe frame;
+	int ret;
+
+	ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, mode);
+	if (ret < 0) {
+		DRM_ERROR("couldn't fill AVI infoframe\n");
+		return;
+	}
+
+	if (vc4_encoder->rgb_range_selectable) {
+		if (vc4_encoder->limited_rgb_range) {
+			frame.avi.quantization_range =
+				HDMI_QUANTIZATION_RANGE_LIMITED;
+		} else {
+			frame.avi.quantization_range =
+				HDMI_QUANTIZATION_RANGE_FULL;
+		}
+	}
+
+	vc4_hdmi_write_infoframe(encoder, &frame);
+}
+
+static void vc4_hdmi_set_spd_infoframe(struct drm_encoder *encoder)
+{
+	union hdmi_infoframe frame;
+	int ret;
+
+	ret = hdmi_spd_infoframe_init(&frame.spd, "Broadcom", "Videocore");
+	if (ret < 0) {
+		DRM_ERROR("couldn't fill SPD infoframe\n");
+		return;
+	}
+
+	frame.spd.sdi = HDMI_SPD_SDI_PC;
+
+	vc4_hdmi_write_infoframe(encoder, &frame);
+}
+
+static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder)
+{
+	vc4_hdmi_set_avi_infoframe(encoder);
+	vc4_hdmi_set_spd_infoframe(encoder);
+}
+
 static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder,
 				      struct drm_display_mode *unadjusted_mode,
 				      struct drm_display_mode *mode)
 {
+	struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	bool debug_dump_regs = false;
 	bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC;
 	bool vsync_pos = mode->flags & DRM_MODE_FLAG_PVSYNC;
-	u32 vactive = (mode->vdisplay >>
-		       ((mode->flags & DRM_MODE_FLAG_INTERLACE) ? 1 : 0));
-	u32 verta = (VC4_SET_FIELD(mode->vsync_end - mode->vsync_start,
+	bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE;
+	u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1;
+	u32 verta = (VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start,
 				   VC4_HDMI_VERTA_VSP) |
-		     VC4_SET_FIELD(mode->vsync_start - mode->vdisplay,
+		     VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay,
 				   VC4_HDMI_VERTA_VFP) |
-		     VC4_SET_FIELD(vactive, VC4_HDMI_VERTA_VAL));
+		     VC4_SET_FIELD(mode->crtc_vdisplay, VC4_HDMI_VERTA_VAL));
 	u32 vertb = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) |
-		     VC4_SET_FIELD(mode->vtotal - mode->vsync_end,
+		     VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end,
 				   VC4_HDMI_VERTB_VBP));
+	u32 vertb_even = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) |
+			  VC4_SET_FIELD(mode->crtc_vtotal -
+					mode->crtc_vsync_end -
+					interlaced,
+					VC4_HDMI_VERTB_VBP));
+	u32 csc_ctl;
 
 	if (debug_dump_regs) {
 		DRM_INFO("HDMI regs before:\n");
@@ -321,7 +425,8 @@
 
 	HD_WRITE(VC4_HD_VID_CTL, 0);
 
-	clk_set_rate(vc4->hdmi->pixel_clock, mode->clock * 1000);
+	clk_set_rate(vc4->hdmi->pixel_clock, mode->clock * 1000 *
+		     ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1));
 
 	HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL,
 		   HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) |
@@ -331,29 +436,62 @@
 	HDMI_WRITE(VC4_HDMI_HORZA,
 		   (vsync_pos ? VC4_HDMI_HORZA_VPOS : 0) |
 		   (hsync_pos ? VC4_HDMI_HORZA_HPOS : 0) |
-		   VC4_SET_FIELD(mode->hdisplay, VC4_HDMI_HORZA_HAP));
+		   VC4_SET_FIELD(mode->hdisplay * pixel_rep,
+				 VC4_HDMI_HORZA_HAP));
 
 	HDMI_WRITE(VC4_HDMI_HORZB,
-		   VC4_SET_FIELD(mode->htotal - mode->hsync_end,
+		   VC4_SET_FIELD((mode->htotal -
+				  mode->hsync_end) * pixel_rep,
 				 VC4_HDMI_HORZB_HBP) |
-		   VC4_SET_FIELD(mode->hsync_end - mode->hsync_start,
+		   VC4_SET_FIELD((mode->hsync_end -
+				  mode->hsync_start) * pixel_rep,
 				 VC4_HDMI_HORZB_HSP) |
-		   VC4_SET_FIELD(mode->hsync_start - mode->hdisplay,
+		   VC4_SET_FIELD((mode->hsync_start -
+				  mode->hdisplay) * pixel_rep,
 				 VC4_HDMI_HORZB_HFP));
 
 	HDMI_WRITE(VC4_HDMI_VERTA0, verta);
 	HDMI_WRITE(VC4_HDMI_VERTA1, verta);
 
-	HDMI_WRITE(VC4_HDMI_VERTB0, vertb);
+	HDMI_WRITE(VC4_HDMI_VERTB0, vertb_even);
 	HDMI_WRITE(VC4_HDMI_VERTB1, vertb);
 
 	HD_WRITE(VC4_HD_VID_CTL,
 		 (vsync_pos ? 0 : VC4_HD_VID_CTL_VSYNC_LOW) |
 		 (hsync_pos ? 0 : VC4_HD_VID_CTL_HSYNC_LOW));
 
+	csc_ctl = VC4_SET_FIELD(VC4_HD_CSC_CTL_ORDER_BGR,
+				VC4_HD_CSC_CTL_ORDER);
+
+	if (vc4_encoder->hdmi_monitor && drm_match_cea_mode(mode) > 1) {
+		/* CEA VICs other than #1 requre limited range RGB
+		 * output unless overridden by an AVI infoframe.
+		 * Apply a colorspace conversion to squash 0-255 down
+		 * to 16-235.  The matrix here is:
+		 *
+		 * [ 0      0      0.8594 16]
+		 * [ 0      0.8594 0      16]
+		 * [ 0.8594 0      0      16]
+		 * [ 0      0      0       1]
+		 */
+		csc_ctl |= VC4_HD_CSC_CTL_ENABLE;
+		csc_ctl |= VC4_HD_CSC_CTL_RGB2YCC;
+		csc_ctl |= VC4_SET_FIELD(VC4_HD_CSC_CTL_MODE_CUSTOM,
+					 VC4_HD_CSC_CTL_MODE);
+
+		HD_WRITE(VC4_HD_CSC_12_11, (0x000 << 16) | 0x000);
+		HD_WRITE(VC4_HD_CSC_14_13, (0x100 << 16) | 0x6e0);
+		HD_WRITE(VC4_HD_CSC_22_21, (0x6e0 << 16) | 0x000);
+		HD_WRITE(VC4_HD_CSC_24_23, (0x100 << 16) | 0x000);
+		HD_WRITE(VC4_HD_CSC_32_31, (0x000 << 16) | 0x6e0);
+		HD_WRITE(VC4_HD_CSC_34_33, (0x100 << 16) | 0x000);
+		vc4_encoder->limited_rgb_range = true;
+	} else {
+		vc4_encoder->limited_rgb_range = false;
+	}
+
 	/* The RGB order applies even when CSC is disabled. */
-	HD_WRITE(VC4_HD_CSC_CTL, VC4_SET_FIELD(VC4_HD_CSC_CTL_ORDER_BGR,
-					       VC4_HD_CSC_CTL_ORDER));
+	HD_WRITE(VC4_HD_CSC_CTL, csc_ctl);
 
 	HDMI_WRITE(VC4_HDMI_FIFO_CTL, VC4_HDMI_FIFO_CTL_MASTER_SLAVE_N);
 
@@ -368,6 +506,8 @@
 	struct drm_device *dev = encoder->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
+	HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0);
+
 	HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
 	HD_WRITE(VC4_HD_VID_CTL,
 		 HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
@@ -394,7 +534,7 @@
 			   VC4_HDMI_SCHEDULER_CONTROL_MODE_HDMI);
 
 		ret = wait_for(HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) &
-			       VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE, 1);
+			       VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE, 1000);
 		WARN_ONCE(ret, "Timeout waiting for "
 			  "VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE\n");
 	} else {
@@ -406,7 +546,7 @@
 			   ~VC4_HDMI_SCHEDULER_CONTROL_MODE_HDMI);
 
 		ret = wait_for(!(HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) &
-				 VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE), 1);
+				 VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE), 1000);
 		WARN_ONCE(ret, "Timeout waiting for "
 			  "!VC4_HDMI_SCHEDULER_CONTROL_HDMI_ACTIVE\n");
 	}
@@ -420,9 +560,10 @@
 			   HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) |
 			   VC4_HDMI_SCHEDULER_CONTROL_VERT_ALWAYS_KEEPOUT);
 
-		/* XXX: Set HDMI_RAM_PACKET_CONFIG (1 << 16) and set
-		 * up the infoframe.
-		 */
+		HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG,
+			   VC4_HDMI_RAM_PACKET_ENABLE);
+
+		vc4_hdmi_set_infoframes(encoder);
 
 		drift = HDMI_READ(VC4_HDMI_FIFO_CTL);
 		drift &= VC4_HDMI_FIFO_VALID_WRITE_MASK;
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 160942a..1aa44c2 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -175,6 +175,8 @@
 # define PV_CONTROL_CLR_AT_START		BIT(14)
 # define PV_CONTROL_TRIGGER_UNDERFLOW		BIT(13)
 # define PV_CONTROL_WAIT_HSTART			BIT(12)
+# define PV_CONTROL_PIXEL_REP_MASK		VC4_MASK(5, 4)
+# define PV_CONTROL_PIXEL_REP_SHIFT		4
 # define PV_CONTROL_CLK_SELECT_DSI_VEC		0
 # define PV_CONTROL_CLK_SELECT_DPI_SMI_HDMI	1
 # define PV_CONTROL_CLK_SELECT_MASK		VC4_MASK(3, 2)
@@ -183,6 +185,9 @@
 # define PV_CONTROL_EN				BIT(0)
 
 #define PV_V_CONTROL				0x04
+# define PV_VCONTROL_ODD_DELAY_MASK		VC4_MASK(22, 6)
+# define PV_VCONTROL_ODD_DELAY_SHIFT		6
+# define PV_VCONTROL_ODD_FIRST			BIT(5)
 # define PV_VCONTROL_INTERLACE			BIT(4)
 # define PV_VCONTROL_CONTINUOUS			BIT(1)
 # define PV_VCONTROL_VIDEN			BIT(0)
@@ -438,6 +443,8 @@
 #define VC4_HDMI_RAM_PACKET_CONFIG		0x0a0
 # define VC4_HDMI_RAM_PACKET_ENABLE		BIT(16)
 
+#define VC4_HDMI_RAM_PACKET_STATUS		0x0a4
+
 #define VC4_HDMI_HORZA				0x0c4
 # define VC4_HDMI_HORZA_VPOS			BIT(14)
 # define VC4_HDMI_HORZA_HPOS			BIT(13)
@@ -499,6 +506,9 @@
 
 #define VC4_HDMI_TX_PHY_RESET_CTL		0x2c0
 
+#define VC4_HDMI_GCP_0				0x400
+#define VC4_HDMI_PACKET_STRIDE			0x24
+
 #define VC4_HD_M_CTL				0x00c
 # define VC4_HD_M_REGISTER_FILE_STANDBY		(3 << 6)
 # define VC4_HD_M_RAM_STANDBY			(3 << 4)
@@ -528,10 +538,17 @@
 # define VC4_HD_CSC_CTL_MODE_SHIFT		2
 # define VC4_HD_CSC_CTL_MODE_RGB_TO_SD_YPRPB	0
 # define VC4_HD_CSC_CTL_MODE_RGB_TO_HD_YPRPB	1
-# define VC4_HD_CSC_CTL_MODE_CUSTOM		2
+# define VC4_HD_CSC_CTL_MODE_CUSTOM		3
 # define VC4_HD_CSC_CTL_RGB2YCC			BIT(1)
 # define VC4_HD_CSC_CTL_ENABLE			BIT(0)
 
+#define VC4_HD_CSC_12_11			0x044
+#define VC4_HD_CSC_14_13			0x048
+#define VC4_HD_CSC_22_21			0x04c
+#define VC4_HD_CSC_24_23			0x050
+#define VC4_HD_CSC_32_31			0x054
+#define VC4_HD_CSC_34_33			0x058
+
 #define VC4_HD_FRAME_COUNT			0x068
 
 /* HVS display list information. */
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 0f12418..08886a3 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -45,6 +45,8 @@
 
 	struct drm_gem_cma_object *rcl;
 	u32 next_offset;
+
+	u32 next_write_bo_index;
 };
 
 static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
@@ -407,6 +409,8 @@
 	if (!*obj)
 		return -EINVAL;
 
+	exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
+
 	if (surf->offset & 0xf) {
 		DRM_ERROR("MSAA write must be 16b aligned.\n");
 		return -EINVAL;
@@ -417,7 +421,8 @@
 
 static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
 				 struct drm_gem_cma_object **obj,
-				 struct drm_vc4_submit_rcl_surface *surf)
+				 struct drm_vc4_submit_rcl_surface *surf,
+				 bool is_write)
 {
 	uint8_t tiling = VC4_GET_FIELD(surf->bits,
 				       VC4_LOADSTORE_TILE_BUFFER_TILING);
@@ -440,6 +445,9 @@
 	if (!*obj)
 		return -EINVAL;
 
+	if (is_write)
+		exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
+
 	if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
 		if (surf == &exec->args->zs_write) {
 			DRM_ERROR("general zs write may not be a full-res.\n");
@@ -542,6 +550,8 @@
 	if (!*obj)
 		return -EINVAL;
 
+	exec->rcl_write_bo[exec->rcl_write_bo_count++] = *obj;
+
 	if (tiling > VC4_TILING_FORMAT_LT) {
 		DRM_ERROR("Bad tiling format\n");
 		return -EINVAL;
@@ -599,15 +609,18 @@
 	if (ret)
 		return ret;
 
-	ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
+	ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read,
+				    false);
 	if (ret)
 		return ret;
 
-	ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read,
+				    false);
 	if (ret)
 		return ret;
 
-	ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write,
+				    true);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index 9ce1d0a..26503e3 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -267,6 +267,9 @@
 	if (!ib)
 		return -EINVAL;
 
+	exec->bin_dep_seqno = max(exec->bin_dep_seqno,
+				  to_vc4_bo(&ib->base)->write_seqno);
+
 	if (offset > ib->base.size ||
 	    (ib->base.size - offset) / index_size < length) {
 		DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
@@ -555,8 +558,7 @@
 reloc_tex(struct vc4_exec_info *exec,
 	  void *uniform_data_u,
 	  struct vc4_texture_sample_info *sample,
-	  uint32_t texture_handle_index)
-
+	  uint32_t texture_handle_index, bool is_cs)
 {
 	struct drm_gem_cma_object *tex;
 	uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
@@ -714,6 +716,11 @@
 
 	*validated_p0 = tex->paddr + p0;
 
+	if (is_cs) {
+		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
+					  to_vc4_bo(&tex->base)->write_seqno);
+	}
+
 	return true;
  fail:
 	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
@@ -835,7 +842,8 @@
 			if (!reloc_tex(exec,
 				       uniform_data_u,
 				       &validated_shader->texture_samples[tex],
-				       texture_handles_u[tex])) {
+				       texture_handles_u[tex],
+				       i == 2)) {
 				return -EINVAL;
 			}
 		}
@@ -867,6 +875,9 @@
 		uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
 		uint32_t max_index;
 
+		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
+					  to_vc4_bo(&vbo->base)->write_seqno);
+
 		if (state->addr & 0x8)
 			stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;